Import upstream 5.1.3.

author Junfeng Dong <junfeng.dong@intel.com>

Mon, 16 Dec 2013 10:29:55 +0000 (18:29 +0800)

committer Junfeng Dong <junfeng.dong@intel.com>

Mon, 16 Dec 2013 10:29:55 +0000 (18:29 +0800)
author Junfeng Dong <junfeng.dong@intel.com>
Mon, 16 Dec 2013 10:29:55 +0000 (18:29 +0800)
committer Junfeng Dong <junfeng.dong@intel.com>
Mon, 16 Dec 2013 10:29:55 +0000 (18:29 +0800)
diff --git a/AUTHORS b/AUTHORS

index 79bfa8101df0bededc6c12d8688315aca1ba3a9d..e403b8316de1549f939a1811575af7282f98aef6 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -4,20 +4,21 @@ Torbj
  
  John Amanatides                Original version of mpz/pprime_p.c
  
-Paul Zimmermann                mpn/generic/mul_fft.c, dc_divrem_n.c, rootrem.c,
-                       old mpz/powm.c, old toom3 code.
+Paul Zimmermann                mpn/generic/mul_fft.c, now defunct dc_divrem_n.c,
+                       rootrem.c, old mpz/powm.c, old toom3 code.
  
-Ken Weber              mpn/generic/bdivmod.c, old mpn/generic/gcd.c
+Ken Weber              Now defunct mpn/generic/bdivmod.c, old mpn/generic/gcd.c
  
-Bennet Yee             mpz/jacobi.c mpz/legendre.c
+Bennet Yee             Previous versions of mpz/jacobi.c mpz/legendre.c
  
  Andreas Schwab         mpn/m68k/lshift.asm, mpn/m68k/rshift.asm
  
-Robert Harley          Old mpn/generic/mul_n.c, many files in mpn/arm
+Robert Harley          Old mpn/generic/mul_n.c, previous versions of files in
+                       mpn/arm
  
  Linus Nordberg         Random number framework, original autoconfery
  
-Kent Boortz            MacOS 9 port
+Kent Boortz            MacOS 9 port, now defunct.
  
  Kevin Ryde             Most x86 assembly, new autoconfery, and countless other
                         things (please see the GMP manual for complete list)
@@ -27,29 +28,52 @@ Gerardo Ballabio    gmpxx.h and C++ istream input
  Pedro Gimeno           Mersenne Twister random generator, other random number
                         revisions
  
-Jason Moxham           mpz/fac_ui.c and gen-fac_ui.c
+Jason Moxham           Previous versions of mpz/fac_ui.c and gen-fac_ui.c
  
-Niels Möller           mpn/generic/hgcd2.c, gcd.c, gcdext.c, matrix22_mul.c,
-                       hgcd.c, gcdext_1.c, gcd_subdiv_step.c, gcd_lehmer.c,
+Niels Möller           gen-jacobitab.c,
+                       mpn/generic/hgcd2.c, hgcd.c, hgcd_step.c,
+                       hgcd_appr.c, hgcd_matrix.c, hgcd_reduce.c,
+                       gcd.c, gcdext.c, matrix22_mul.c,
+                       gcdext_1.c, gcd_subdiv_step.c, gcd_lehmer.c,
                         gcdext_subdiv_step.c, gcdext_lehmer.c,
+                       jacobi_2.c, jacbase.c, hgcd_jacobi.c, hgcd2_jacobi.c
+                       matrix22_mul1_inverse_vector.c,
                         toom_interpolate_7pts, mulmod_bnm1.c, dcpi1_bdiv_qr.c,
                         dcpi1_bdiv_q.c, sbpi1_bdiv_qr.c, sbpi1_bdiv_q.c,
                         toom_eval_dgr3_pm1.c, toom_eval_dgr3_pm2.c,
                         toom_eval_pm1.c, toom_eval_pm2.c, toom_eval_pm2exp.c,
-                       divexact.c, mpn/x86/invert_limb.asm,
-                       mpn/x86_64/invert_limb.asm, mpz/nextprime.c,
-                       mpz/divexact.c.
+                       divexact.c, mod_1_1.c, div_qr_2.c,
+                       div_qr_2n_pi1.c, div_qr_2u_pi1.c, broot.c,
+                       brootinv.c,
+                       mpn/x86/k7/invert_limb.asm, mod_1_1.asm,
+                       mpn/x86_64/invert_limb.asm,
+                       invert_limb_table.asm, mod_1_1.asm,
+                       div_qr_2n_pi1.asm, div_qr_2u_pi1.asm,
+                       mpn/x86_64/core2/aorsmul_1.asm,
+                       mpz/nextprime.c, divexact.c, gcd.c, gcdext.c,
+                       jacobi.c, combit.c, mini-gmp/mini-gmp.c.
  
  Marco Bodrato          mpn/generic/toom44_mul.c, toom4_sqr.c, toom53_mul.c,
-                       toom62_mul.c, toom43_mul.c, toom52_mul.c,
+                       toom62_mul.c, toom43_mul.c, toom52_mul.c, toom54_mul.c,
                         toom_interpolate_6pts.c, toom_couple_handling.c,
                         toom63_mul.c, toom_interpolate_8pts.c,
                         toom6h_mul.c, toom6_sqr.c, toom_interpolate_12pts.c,
                         toom8h_mul.c, toom8_sqr.c, toom_interpolate_16pts.c,
                         mulmod_bnm1.c, sqrmod_bnm1.c, nussbaumer_mul.c,
                         toom_eval_pm2.c, toom_eval_pm2rexp.c,
-                       mullo_n.c, invert.c, invertappr.c.
-
-David Harvey           mpn/x86_64/mul_basecase.asm
+                       mullo_n.c, invert.c, invertappr.c;
+                       mpz/fac_ui.c, 2fac_ui.c, mfac_uiui.c, oddfac_1.c,
+                       primorial_ui.c, prodlimbs.c, goetgheluck_bin_uiui.c.
+
+David Harvey           mpn/generic/add_err1_n.c, add_err2_n.c,
+                       add_err3_n.c, sub_err1_n.c, sub_err2_n.c,
+                       sub_err3_n.c, mulmid_basecase.c, mulmid_n.c,
+                       toom42_mulmid.c,
+                       mpn/x86_64/mul_basecase.asm, aors_err1_n.asm,
+                       aors_err2_n.asm, aors_err3_n.asm,
+                       mulmid_basecase.asm,
+                       mpn/x86_64/core2/aors_err1_n.asm.
  
  Martin Boij            mpn/generic/perfpow.c
+
+Marc Glisse            gmpxx.h improvements
diff --git a/ChangeLog b/ChangeLog

index 01b90c3a73ab123f07324d38124fbad7143fef72..47a0df3ea9266b6f53cc0bcca9cc3b0f3f4aa4e2 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,501 +1,4482 @@
-2012-05-06  Torbjorn Granlund  <tege@gmplib.org>
+2013-09-29  Torbjorn Granlund  <tege@gmplib.org>
  
-       * Version 5.0.5 released.
+       * Version 5.1.3 released.
  
-       * mpn/Makefile.am (TARG_DIST): Remove thumb, since directory now empty.
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Bump version info.
+       * gmp-h.in: Bump version.
  
-2012-04-28  Torbjorn Granlund  <tege@gmplib.org>
+2013-09-27  Niels Möller  <nisse@lysator.liu.se>
  
-       * mpn/thumb/add_n.s: Remove broken code.
-       * mpn/thumb/sub_n.s: Likewise.
+       * NEWS: Mention the ia64 mpn_divrem_2 bugfix.
  
-2012-04-02  Torbjorn Granlund  <tege@gmplib.org>
+2013-07-16  Torbjorn Granlund  <tege@gmplib.org>
  
-       * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+       * doc/gmp.texi: Declare countless of function arguments as 'const'.
  
-       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
-       Bump version info.
+2013-07-15  Torbjorn Granlund  <tege@gmplib.org>
  
-       * gmp-h.in (_GMP_H_HAVE_FILE): Test also __STDIO_LOADED (for VMS).
+       * mpn/generic/sb_div_sec.c: Compute inverse as floor(B^2/(dh+1)), per
+       Niels' suggestion.
+       * mpn/generic/sbpi1_div_sec.c: Remove inverse rounding-up code.
  
-2012-03-27  Torbjorn Granlund  <tege@gmplib.org>
+2013-07-12  Torbjorn Granlund  <tege@gmplib.org>
  
-       * config.guess: Fix typo in coreisbr recognition.
+       * mpn/generic/sbpi1_div_sec.c: Partial rewrite.
  
-2012-03-07  Torbjorn Granlund  <tege@gmplib.org>
+2013-06-19  Torbjorn Granlund  <tege@gmplib.org>
  
-       * config.guess: Handle AMD 11h correctly.
+       * mpn/powerpc64/p6/lshift.asm: Rewrite switching-into-loop code.
+       * mpn/powerpc64/p6/rshift.asm: Likewise.
+       * mpn/powerpc64/p6/lshiftc.asm: Likewise.
  
-2012-03-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+2013-06-17  Torbjorn Granlund  <tege@gmplib.org>
  
-       * tests/mpz/t-invert.c: Avoid testing mod 0.
-       * doc/gmp.texi (mpz_invert): Specify mod 0 is not handled.
+       * mpn/powerpc64/p6/lshift.asm: Fix typo in label reference.
+       For 32-bit mode, zero extend `n' argument and split retval.
+       * mpn/powerpc64/p6/rshift.asm: Likewise.
+       * mpn/powerpc64/p6/lshiftc.asm: Likewise.
  
-2012-02-24  Torbjorn Granlund  <tege@gmplib.org>
+2013-06-09  Marc Glisse  <marc.glisse@inria.fr>
  
-       * tests/mpn/logic.c: New file.
-       * tests/mpn/Makefile.am (check_PROGRAMS): Add logic.
+       * mpn/generic/get_d.c (mpn_get_d): Avoid signed overflow.
+       * mpz/kronzs.c (mpz_kronecker_si): Use ABS_CAST.
  
-       * tests/mpz/t-invert.c: New file.
-       * tests/mpz/Makefile.am (check_PROGRAMS): Add t-invert.
+2013-05-22  Torbjorn Granlund  <tege@gmplib.org>
  
-2012-02-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+       * doc/gmp.texi (Reporting Bugs): Ask for configure's output.
  
-       * doc/gmp.texi (Multiplication Algorithms): Add Toom[68]'n'half.
+       * mpn/ia64/divrem_2.asm: Don't clobber f16-f18.
  
-2012-02-10  Torbjorn Granlund  <tege@gmplib.org>
+2013-05-20  Torbjorn Granlund  <tege@gmplib.org>
  
-       * Version 5.0.4 released.
+       * Version 5.1.2 released.
  
-2012-02-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+       * mpn/arm/udiv.asm: Change spacing to work around binutils bug.
  
-       * gmp-impl.h (mpn_toom3*_itch): Support any recursion depth.
-       * tests/refmpn.c (refmpn_mul): Restore tight allocations.
+2013-05-16  Torbjorn Granlund  <tege@gmplib.org>
  
-2012-02-09  Marc Glisse  <marc.glisse@inria.fr>
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Bump version info.
+       * gmp-h.in: Bump version.
  
-       * gmp-impl.h (ABS_CAST): New macro.
-       * mpf/cmp_si.c: Use ABS_CAST.
-       * mpf/get_si.c: Use ABS_CAST.
-       * mpf/iset_si.c: Use ABS_CAST.
-       * mpf/set_si.c: Use ABS_CAST.
-       * mpq/set_si.c: Use ABS_CAST.
-       * mpz/cmp_si.c: Use ABS_CAST.
-       * mpz/get_si.c: Use ABS_CAST.
-       * mpz/iset_si.c: Use ABS_CAST.
-       * mpz/mul_i.h: Use ABS_CAST.
-       * mpz/set_si.c: Use ABS_CAST.
+       * tests/misc.c (tests_hardware_getround, tests_hardware_setround):
+       Avoid assembly dependency unless WANT_ASSEMBLY.
  
-2012-02-09  Torbjorn Granlund  <tege@gmplib.org>
+       * configure.ac (WANT_ASSEMBLY): Conditionally define.
  
-       * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+2013-05-14  Torbjorn Granlund  <tege@gmplib.org>
  
-       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
-       Bump version info.
+       * configure.ac (arm1156): Don't fall back to plain v6 compiler option.
  
-2012-02-08  Torbjorn Granlund  <tege@gmplib.org>
+2013-05-10  Torbjorn Granlund  <tege@gmplib.org>
  
-       * mpn/powerpc32/divrem_2.asm: Fix off-by-one condition in invert_limb
-       code.
+       * mpn/x86/p6/mmx/gmp-mparam.h: Set down SQR_TOOM2_THRESHOLD to parent
+       directory value.
  
-2012-02-08  Niels Möller  <nisse@lysator.liu.se>
+2013-05-09  Torbjorn Granlund  <tege@gmplib.org>
  
-       * doc/gmp.texi (mpz_gcdext): Describe cofactor canonicalization.
-       (mpn_gcdext): Copied doc updates from main repo.
+       * mpn/x86_64/bd1/mul_1.asm: Fix typo.
  
-2012-02-07  Niels Möller  <nisse@lysator.liu.se>
+2013-04-29  Torbjorn Granlund  <tege@gmplib.org>
  
-       * mpn/generic/gcdext.c (mpn_gcdext): Fixed assert, related to the
-       special case A = (2k+1) G, B = 2 G.
+       * configure.ac (sparc-*-*): Recognise t5 along with t3 and t4.
+       Remove sparc64/ultrasparct1 from path_64 for T3, T3, and T5.
  
-2012-02-06  Niels Möller  <nisse@lysator.liu.se>
+2013-04-27  Mike Frysinger  <vapier@gentoo.org>
  
-       * mpn/generic/hgcd.c (hgcd_matrix_update_q): Fixed carry handling
-       bug.
+       * configure.ac (arm*-*-*): Set up path also for plainest CPU variants.
  
-       * tests/mpz/t-gcd.c (main): Omit tests with urandomb operands.
-       * tests/mpn/t-hgcd.c (main): Likewise.
+2013-03-19  Torbjorn Granlund  <tege@gmplib.org>
  
-2012-02-05  Niels Möller  <nisse@lysator.liu.se>
+       * tests/arm32check.c: Get printing of clobbered register right.
  
-       * tests/mpz/t-gcd.c (main): Add tests with rrandomb operands.
-       * tests/mpn/t-hgcd.c (main): Likewise.
+       * tests/Makefile.am (EXTRA_libtests_la_SOURCES): Add arm32call.asm and
+       arm32check.c.
  
-       * mpn/generic/gcdext_subdiv_step.c (mpn_gcdext_subdiv_step):
-       Bugfix, in u1 += q * u0, handle carry in all cases. Also normalize
-       the product q * u0.
+2013-03-18  Torbjorn Granlund  <tege@gmplib.org>
  
-2012-02-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+       * configure.ac (arm*-*-*): Define CALLING_CONVENTIONS_OBJS.
  
-       * tests/refmpn.c (refmpn_mul): More conservative allocations.
+       * tests/arm32call.asm: New file.
+       * tests/arm32check.c: New file.
  
-2012-02-03  Torbjorn Granlund  <tege@gmplib.org>
+       * mpn/arm/arm-defs.m4 (LEA): Rewrite to properly handle repeated use.
+       (EPILOGUE_cpu): Define.
  
-       * mpn/x86_64/bd1/gmp-mparam.h: New file.
+       * mpn/x86/darwin.m4 (m4append): Move definition from here...
+       * mpn/asm-defs.m4: ...to here.
  
-       * longlong.h (udiv_qrnnd from sdiv_qrnnd): Declare udiv_w_sdiv.
+2012-03-17  Marc Glisse  <marc.glisse@inria.fr>
  
-       * mpn/generic/udiv_w_sdiv.c: Use c89 function header.
+       * tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc: New file.
+       * tests/cxx/Makefile.am: Add new file. Reorder the tests.
  
-2012-02-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+2013-03-11  Torbjorn Granlund  <tege@gmplib.org>
  
-       * mpn/generic/toom_interpolate_16pts.c: Correct an unlikely 32-bit bug.
+       * tests/mpz/t-powm_ui.c: Test larger arguments.
  
-2012-02-02  Torbjorn Granlund  <tege@gmplib.org>
+       * mpz/powm_ui.c (mod): Adhere to mpn_mu_div_qr's overlap requirements.
  
-       * mpn/generic/toom63_mul.c: Allow s+t==n by adjusting an ASSERT.
-       * mpn/generic/toom_interpolate_8pts.c: Perform final incr iff s+t!=n.
+2013-02-25  Niels Möller  <nisse@lysator.liu.se>
  
-       * tests/mpn/t-toom6h.c (MIN_BN): Make more consistent with ASSERT in
-       tested function.
+       * mini-gmp/tests/t-double.c (testmain): Declare double variables
+       as volatile, to drop extended precision.
  
-2012-02-01  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/testutils.c (testfree): New function. Use it
+       everywhere where test programs deallocate storage allocated via
+       the mini-gmp allocation functions, including uses of mpz_get_str
+       for various test failure messages.
  
-       * tests/mpn/t-mul.c: New file.
-       * tests/mpn/Makefile.am: Compile it.
+2013-02-20  Niels Möller  <nisse@lysator.liu.se>
  
-2012-01-31  Torbjorn Granlund  <tege@gmplib.org>
+       * tests/mpq/t-get_d.c (check_random): Rewrote to make test less
+       dependent on float operations. Fixes problem with m68k-linux and
+       extended float precision.
  
-       * mpn/generic/powm_sec.c (SQR_BASECASE_LIM): New name for
-       SQR_BASECASE_MAX.
-       (SQR_BASECASE_LIM, fat variant): Define to read __gmpn_cpuvec.
-       (SQR_BASECASE_LIM, native variant): Define to SQR_TOOM2_THRESHOLD
-       straight, without arithmetic.
-       (mpn_local_sqr): Use BELOW_THRESHOLD as per Marco's suggestion.
+2013-02-19 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-2012-01-30  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/mini-gmp.c: Move asserts to work-around a compiler bug.
  
-       * tests/mpz/t-powm.c: Ensure all sizes are seen.
+       * mini-gmp/tests/t-reuse.c: Fix typo causing the same negation
+       condition to be applied to all operands. (See 2013-02-03, Torbjorn)
  
-2012-01-27  Torbjorn Granlund  <tege@gmplib.org>
+2013-02-17  Marc Glisse  <marc.glisse@inria.fr>
  
-       * Version 5.0.3 released.
+       * cxx/osdoprnti.cc: Use <stdarg.h> and <string.h> rather than <cstdarg>
+       and <cstring> (revert 2002-12-21).
  
-       * Upgrade to libtool 2.4.2.
+       * tests/cxx/Makefile.am: Link with libm.
+       * tests/cxx/t-ops2.cc: Comment about more tests. Use <math.h> rather
+       than <cmath> and using namespace. Don't include <iostream>.
  
-2012-01-25  Torbjorn Granlund  <tege@gmplib.org>
+2013-02-16  Marc Glisse  <marc.glisse@inria.fr>
  
-       * tune/tuneup.c: Remove unused tuneup variables.
+       * gmpxx.h: Include <algorithm>.
  
-2012-01-23  Torbjorn Granlund  <tege@gmplib.org>
+2013-02-16  Torbjorn Granlund  <tege@gmplib.org>
  
-       * mpn/powerpc64/mode64/p6/gmp-mparam.h: New file.
-       * mpn/powerpc64/mode64/p7/gmp-mparam.h: New file.
-       * mpn/x86_64/bobcat/gmp-mparam.h: New file.
+       * mpn/x86_64/x86_64-defs.m4 (PROTECT): Emit '.hidden' instead of
+       '.protected" to please Sun's assembler, but also for semantic reasons.
  
-2012-01-18  Marc Glisse  <marc.glisse@inria.fr>
+2013-02-10  Torbjorn Granlund  <tege@gmplib.org>
  
-       * doc/gmp.texi (mpf_class::mpf_class): Use mp_bitcnt_t.
+       * Version 5.1.1 released.
  
-2012-01-17  Torbjorn Granlund  <tege@gmplib.org>
+2013-02-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-       * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+       * tune/speed.h (SPEED_ROUTINE_MPN_MUL): Use operands from struct s.
+       * tune/README: Document new parameter syntax mpn_mul.<#> .
  
-       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
-       Bump version info.
+2013-02-06  Niels Möller  <nisse@lysator.liu.se>
  
-       * configure.in: Add ultrasparc T4 support.
+       * tests/mpz/t-jac.c (check_large_quotients): Rewrote. Now uses a
+       more efficient method for generating the test inputs.
  
-       * demos/isprime.c (main): Run 25 millerrabin tests.
+2013-02-05  Torbjorn Granlund  <tege@gmplib.org>
  
-2012-01-15  Niels Möller  <nisse@lysator.liu.se>
+       * tests/mpn/t-div.c: Limit random dbits to avoid an infinite loop.
  
-       * mpz/scan0.c (mpz_scan0): Use ~(mp_bitcnt_t) 0, rather than
-       ULONG_MAX, when returning "infinity".
-       * mpz/scan1.c (mpz_scan1): Likewise.
+2013-02-03  Torbjorn Granlund  <tege@gmplib.org>
  
-2011-12-30  Torbjorn Granlund  <tege@gmplib.org>
+       * tests/mpz/reuse.c: Fix typo causing the same negation condition to be
+       applied to all operands.  Fix condition for when to invoke mpz_remove.
+       Make different-size random operands.
  
-       * mpz/hamdist.c: Fix typo in a return statement.
+2013-02-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-2011-12-08  Torbjorn Granlund  <tege@gmplib.org>
+       * mpz/remove.c: Correct the sign in case of reuse.
  
-       * mpn/generic/powm_sec.c: Handle fat binaries better.
+2013-02-01  Torbjorn Granlund  <tege@gmplib.org>
  
-2011-12-07  Torbjorn Granlund  <tege@gmplib.org>
+       * gmp-impl.h (DIGITS_IN_BASE_PER_LIMB): Add a cast.
+       (LIMBS_PER_DIGIT_IN_BASE): Likewise.
  
-       * configure.in: Fix typo making HAVE_NATIVE_mpn_X fail for fat
-       functions.
+       * tests/refmpn.c (refmpn_mul): Use toom6h instead of toom44 for the
+       largest operands.
  
-       * mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Add a missing break.
+2013-01-31  Torbjorn Granlund  <tege@gmplib.org>
  
-2011-12-01  Torbjorn Granlund  <tege@gmplib.org>
+       * mpn/generic/toom44_mul.c: Revert last change in favour of a simple
+       change (thanks Marco!).
+       * mpn/generic/toom4_sqr.c: Likewise.
  
-       * mpn/x86_64/fat/fat.c: Copy fake cpuid code from x86/fat/fat.c.
+2013-01-30  Torbjorn Granlund  <tege@gmplib.org>
  
-       * gmp-impl.h (DECL_divexact_1): Fix typo in return type.
+       * mpn/generic/toom44_mul.c (MAYBE_mul_toom44): Take toom6h and toom8h
+       into account, using new macro MUL_NEXTALG_THRESHOLD.
+       * mpn/generic/toom4_sqr.c (MAYBE_sqr_toom4): Likewise.
  
-2011-11-28  Torbjorn Granlund  <tege@gmplib.org>
+2013-01-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-       * mpn/generic/udiv_w_sdiv.c: Use CNST_LIMB for some constants.
+       * mpz/remove.c: init+set=init_set, cast before shifting.
  
-2011-11-25  Torbjorn Granlund  <tege@gmplib.org>
+       * mpz/cmp_si.c: Use ABS_CAST.
  
-       * configure.in: Overhaul x86/x86_64 support, merging three case
-       statements into one.
+2013-01-26  Torbjorn Granlund  <tege@gmplib.org>
  
-2011-11-24  Torbjorn Granlund  <tege@gmplib.org>
+       * tests/mpn/logic.c: Set things up to always test library logops, not
+       gmp-impl.h's inlined variants.  Test also mpn_com.
  
-       * doc/gmp.texi (Formatted Output Strings): Clarify rules for mpf_t
-       precision.
+       * tests/mpn/t-mod_1.c: Test also mpn_mod_1s_3p.
  
-2011-11-21  Torbjorn Granlund  <tege@gmplib.org>
+       * mpn/generic/mod_1_3.c: Swap some lines to make it similar to mod_4.c.
  
-       * gmp-h.in (__GNU_MP_RELEASE): Renamed from typo name.
+       * tests/mpz/reuse.c: Fix typo in last change.
  
-2011-11-20  Torbjorn Granlund  <tege@gmplib.org>
+2013-01-23 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-       * configure.in: Split x86 CPUs into more subtypes for more accurate
-       passing of gcc flags.
+       * mini-gmp/mini-gmp.c (mpz_cmpabs_d, mpz_cmp_d): Simplify.
+       (mpz_set_str): Behaviour more adherent to the real GMP.
  
-       * configure.in: Pass -m32 for powerpc64 with abi=32, using via _maybe
-       mechanism.
+       * mini-gmp/tests/t-str.c: Cast size_t to unsigned long, for printf.
+       * mini-gmp/tests/t-import.c: Likewise.
+       * mini-gmp/tests/t-comb.c: Remove an unused var.
+       * mini-gmp/tests/t-div.c: Remove unused args passed to fprintf.
+       * mini-gmp/tests/t-double.c: Use float immediates with float vars.
  
-2011-11-15  Torbjorn Granlund  <tege@gmplib.org>
+2013-01-22  Torbjorn Granlund  <tege@gmplib.org>
  
-       * mpn/generic/powm_sec.c (mpn_local_sqr): Remove forgotten TMP_* calls.
-       (redcify): Likewise.
-       (mpn_powm_sec): Likewise.
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Bump version info.
+       * gmp-h.in: Bump version.
  
-       * mpn/generic/powm_sec.c (mpn_powm_sec): Use mpn_tabselect also in
-       initialisation.
+       * tests/mpz/reuse.c: Delete always zero 'failures' and code depending
+       on it.  Replace rotating progress with real measure.
  
-2011-10-15  Torbjorn Granlund  <tege@gmplib.org>
+       * Makefile.am (check-mini-gmp): Fix typo in last change.
  
-       * configure.in (s390): Rewrite support to handle known CPUs.
-       * config.guess: Recognise s390 CPUs.
-       * config.sub: Match s390 CPUs.
-       * acinclude.m4 (S390_PATTERN, S390X_PATTERN): New defines.
+2013-01-22  Niels Möller  <nisse@lysator.liu.se>
  
-2011-10-14  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/mini-gmp.c (mpz_cmp_d): Simplified, just sort out
+       signs, then call mpz_cmpabs_d.
  
-       From Per Olofsson:
-       * mpn/generic/popham.c: Add __GMP_NOTHROW to make it match gmp.h.
+       * mini-gmp/tests/testutils.h: Include stdio.h and stdlib.h.
+       (numberof): New define.
  
-       * configure.in: AC_DEFINE HAVE_HOST_CPU_s390_zarch.
-       * longlong.h (s390): Use it.
-       (s390 umul_ppmm): Fix typo in pure C variant.
+       * mini-gmp/tests/t-cmp_d.c: New file, copied from
+       tests/mpz/t-cmp_d.c with minor changes.
+       * mini-gmp/tests/Makefile (CHECK_PROGRAMS): Added t-cmp_d,
  
-2011-10-13  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/mini-gmp.c (mpz_cmpabs_d): New function.
+       * mini-gmp/mini-gmp.h: Declare it.
  
-       * longlong.h (s390): Put back an accidentally deleted #else.
+2013-01-21  Niels Möller  <nisse@lysator.liu.se>
  
-       * configure.in (s390): Unset extra_functions for s390x.
+       * mini-gmp/tests/t-str.c (testmain): Test mpz_out_str, using
+       the tmpfile function for i/o.
  
-2011-10-12  Torbjorn Granlund  <tege@gmplib.org>
+2013-01-20  Torbjorn Granlund  <tege@gmplib.org>
  
-       * longlong.h (s390 umul_ppmm): With new-enough gcc, avoid asm.
+       * Makefile.am (check-mini-gmp): Set also DYLD_LIBRARY_PATH for the
+       benefit of Darwin.
  
-       From Andreas Krebbel:
-       * longlong.h (s390 umul_ppmm): Support 32-bit limbs with gcc using
-       64-bit registers.
-       (s390 udiv_qrnnd): Likewise.
+       * tests/mpn/t-div.c: Test mpn_sb_div_qr_sec and mpn_sb_div_r_sec.
+       (main): Separate divisor into normalised (dnp) and unnormalised (dup),
+       pass appropriate variant to each function.
+       (main): Make negative `test' index value mean divisor bits, for better
+       small operands coverage.
+       (main): Put random junk at qp[] instead of zeroing.
  
-2011-10-11  Torbjorn Granlund  <tege@gmplib.org>
+       * tests/mpz/t-remove.c: Back out last change which left `divisor_size'
+       uninitialised; achieve change's aim with a parameter tweak.
  
-       * configure.in (s390x): Pass -mzarch to gcc in 32-bit mode.
+2013-01-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-       * longlong.h (s390x): Add __CLOBBER_CC for relevant asm patterns.
+       * mini-gmp/tests/testutils.c (testhalves): New function, test default
+       memory functions.
+       * mini-gmp/tests/testutils.h (testhalves): Declare it
+       * mini-gmp/tests/t-logops.c: Use testhalves.
  
-2011-10-10  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/mini-gmp.c (mpz_init_set_str): New function.
+       * mini-gmp/mini-gmp.h (mpz_init_set_str): Declare it.
+       * mini-gmp/tests/t-str.c: Test mpz_init_set_str.
  
-       From Marco Trudel:
-       * tests/mpz/t-scan.c (check_ref): Fix loop end bound.
+2013-01-20  Torbjorn Granlund  <tege@gmplib.org>
  
-2011-10-09  Torbjorn Granlund  <tege@gmplib.org>
+       * tests/memory.c (PTRLIMB): New macro, used for conformant casting.
  
-       * longlong.h (s390x): Put back UDItype casts to make gcc reloading use
-       right more for constants.
-       (s390x count_leading_zeros): Disable until we support z10 specifically.
-       (s390x add_ssaaaa): Remove algsi/slgsi until we support z10.
+2013-01-19 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-2011-10-07  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/t-double.c (testmain): Get the current free
+       function using mp_get_memory_functions.
+       * mini-gmp/tests/t-str.c (testmain): Likewise.
  
-       * longlong.h (s390): Add 32-bit zarch umul_ppmm and udiv_qrnnd.
-       (s390): Overhaul 32-bit and 64-bit code.
+       * mini-gmp/tests/testutils.h (tu_free): Remove declaration.
  
-2011-10-04  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/testutils.c (block_check, tu_free): Mark static.
  
-       * mpn/Makefile.am (TARG_DIST): Add s390_32 and s390_64, remove s390.
+       * tests/mpz/t-set_str.c: Check also failing conditions.
  
-       * doc/gmp.texi (Custom Allocation): Rephrase a paragraph.
+       * tests/mpz/t-remove.c: Test removal of 1.
  
-       * demos/factorize.c: Run 25 Miller-Rabin tests.
+2013-01-18  Niels Möller  <nisse@lysator.liu.se>
  
-       * mpz/nextprime.c: Run 25 mpz_millerrabin tests (was 10).
+       * mini-gmp/tests/t-str.c (test_small): New function, exercising
+       parsing of whitespace and base prefixes.
+       (testmain): Call it.
  
-2011-10-03  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/t-gcd.c (gcdext_valid_p): Fixed memory leak.
  
-       * configure.in: Support s390x.
+       * mini-gmp/tests/t-double.c (testmain): Call tu_free rather than
+       free, for storage allocated by mpz_get_str.
+       * mini-gmp/tests/t-str.c (testmain): Likewise.
  
-       * longlong.h: Add support for 64-bit s390x.
+       * mini-gmp/tests/testutils.c (block_init, block_check): New
+       functions.
+       (tu_alloc, tu_realloc, tu_free): New functions.
+       (main): Use mp_set_memory_functions.
+       * mini-gmp/tests/testutils.h (tu_free): Declare.
  
-       * mpn/s390_64: New directory.
-       * mpn/s390_32: Directory renamed from mpn/s390.
+       * mini-gmp/tests/testutils.h: New file, declarations for test
+       programs.
  
-2011-09-26  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/testutils.c (main): New file, with shared main
+       function for all the test programs. Also includes mini-gmp.c.
+       Calls testmain after initialization. All other test programs
+       updated to define testmain rather than main.
  
-       * mpn/sh/sh2/submul_1.s: Make this old submul_1 implementation
-       actually compute intended function.
+2013-01-18 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-2011-09-25  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/t-signed.c: Slightly larger coverage.
+       * mini-gmp/tests/t-double.c: Test also mpz_init_set_d.
  
-       * mpn/sh: Migrate files to '.asm'.
-       * configure.in: Recognise sh3 and sh4.
+2013-01-18  Torbjorn Granlund  <tege@gmplib.org>
  
-2011-08-18  Torbjorn Granlund  <tege@gmplib.org>
+       * mpn/generic/set_str.c (normalization_steps): Eliminate set-but-unused
+       variable.
  
-       * printf/doprntf.c (__gmp_doprnt_mpf): For DOPRNT_CONV_FIXED, ask for
-       one more digit.
+       * mini-gmp/tests/t-div.c: Test mpz_divisible_p and mpz_divisible_ui_p.
  
-2011-08-17  Torbjorn Granlund  <tege@gmplib.org>
+       * tests/tests.h (TESTS_REPS): Fix printf argument type clashes.
  
-       * mpf/sub.c: Fix typo in copy condition.  Delay an allocation.
+       * mini-gmp/tests/t-div.c: Test also mpz_mod, mpz_mod_ui.  Compare
+       mpz_divisible_p just to ceil, to save time.
  
-2011-08-10  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/mini-gmp.c: Prefix some names with GMP_.
  
-       * tests/rand/t-lc2exp.c (check_bigc): Call abort after reporting error.
+2013-01-16 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-2011-07-15  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/t-double.c: Test mpz_cmp_d.
+       * mini-gmp/mini-gmp.c (mpz_cmp_d): Correct multiword comparison.
  
-       * mpn/arm/invert_limb.asm: Swap around some registers to silence 'as'
-       warnings.
+       * mini-gmp/mini-gmp.c (mpz_set_str): Handle the empty string.
+       * mini-gmp/tests/t-str.c: Test base <= 0.
  
-2011-07-14  Torbjorn Granlund  <tege@gmplib.org>
+2013-01-15  Niels Möller  <nisse@lysator.liu.se>
  
-       * mpn/generic/dcpi1_bdiv_q.c (mpn_dcpi1_bdiv_q): Get mpn_sub_1 size
-       argument right.
+       * mini-gmp/tests/t-str.c (main): Use x->_mp_d rather than x[0]._mp_d.
+       * mini-gmp/tests/t-invert.c (main): Likewise.
  
-2011-07-04  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/t-mul.c (main): Test mpn_mul_n and mpn_sqr.
  
-       * tests/misc/t-locale.c: Disable test for mingw.
+       * mini-gmp/tests/hex-random.h (enum hex_random_op): New value
+       OP_SQR.
  
-       * configure.in (x86_64 *-*-mingw*): Handle also cygwin here; clear out
-       extra_functions_64.
+       * mini-gmp/tests/mini-random.c (mini_random_op3): Renamed, from...
+       (mini_random_op): ... old name. Updated callers.
+       (mini_random_op2): New function.
  
-2011-07-02  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/hex-random.c (hex_random_op3): Renamed, from...
+       (hex_random_op): ... old name. Updated callers.
+       (hex_random_op2): New function.
  
-       * config.guess: Don't print newline in x86 cpuid function.
-       Rewrite x86-64 cpu recognition asm code to work under Windoze.
+2013-01-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-2011-06-16  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/tests/t-logops.c: Improve popcount/hamdist testing.
+       * mini-gmp/tests/t-signed.c: Test more cases.
  
-       * acinclude.m4 (GMP_ASM_RODATA): Fix typo in 2011-04-10 change.
+2013-01-15  Torbjorn Granlund  <tege@gmplib.org>
  
-       * configure.in: Surround tr ranges with [] for portability.
+       From Mike Frysinger:
+       * configure.ac: Add x32 ABI for x86_64.
  
-2011-05-08  Marc Glisse  <marc.glisse@inria.fr>
+2013-01-14  Niels Möller  <nisse@lysator.liu.se>
  
-       * doc/gmp.texi (gmp_randclass::get_f): Replace unsigned long
-       with mp_bitcnt_t.
+       * mini-gmp/tests/t-str.c (main): Added tests for mpn_get_str and
+       mpn_set_str.
  
-2011-05-07  Torbjorn Granlund  <tege@gmplib.org>
+2013-01-14 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-       * Version 5.0.2 released.
+       * doc/gmp.texi (gmp_version): Remove "was used" repetition.
+       (Upward compatibility): Mention mpn_bdivmod, GMP 4 -> GMP 5.
  
-       * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+2013-01-13  Marc Glisse  <marc.glisse@inria.fr>
  
-       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
-       Bump version info.
+       * doc/gmp.texi: Let mpn_sqrtrem reference mpn_perfect_square_p instead
+       of mpz_perfect_square_p.
  
-2011-05-05  Marc Glisse  <marc.glisse@inria.fr>
+2013-01-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-       [These changes were made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
+       * mini-gmp/tests/t-comb.c: New test program, testing both
+       mpz_fac_ui and mpz_bin_uiui.
+       * mini-gmp/tests/Makefile (CHECK_PROGRAMS): Added t-comb.
  
-       * mpn/x86_64/fat/fat.c: Update for Sandy Bridge.
-       * config.guess: warning to keep it in sync with fat.c.
+       * mini-gmp/mini-gmp.c (mpz_mul_si): Simplify.
+       (mpz_mul_ui, mpz_mul, mpz_div_qr): Replace init+REALLOC with init2.
  
-2011-05-05  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/mini-gmp.c (NEG_CAST): New macro.
+       (mpz_mul_si, mpz_set_si, mpz_cmp_si): Use NEG_CAST.
  
-       * mpn/x86_64/fat/fat_entry.asm: (PIC_OR_DARWIN): New symbol.  Use it to
-       work around Darwin problems.
+       * mini-gmp/mini-gmp.c (mpz_set_si, mpz_cmp_si): Simplify by using
+       the _ui variant.
  
-2011-05-02  Marc Glisse  <marc.glisse@inria.fr>
+       * mini-gmp/tests/t-root.c: Use mpz_ui_pow_ui, when base fits an ui.
  
-       * configfsf.guess: Update to version of 2011-02-02.
-       * configfsf.sub: Update to version of 2011-03-23.
+       * mini-gmp/tests/t-mul.c: Test also mpz_mul_si.
+       * mini-gmp/tests/t-sub.c: Test also mpz_ui_sub.
  
-2011-04-30  Marc Glisse  <marc.glisse@inria.fr>
+       * mini-gmp/mini-gmp.c (mpz_fits_slong_p): Correct range.
+       * mini-gmp/tests/t-signed.c: New test program, for get/set/cmp_si.
+       * mini-gmp/tests/Makefile (CHECK_PROGRAMS): Added t-signed.
  
-       * gmp-h.in (mpz_cdiv_q_2exp): Use mp_bitcnt_t to match the definition
-       and the documentation.
-       (mpz_remove): Likewise.
-       (mpf_eq): Likewise.
+       * mini-gmp/mini-gmp.c (mpz_hamdist): Handle different sizes.
+       * mini-gmp/tests/t-logops.c: Test also popcount and hamdist.
  
-       * ltmain.sh: Remove.
-       * .bootstrap: Let libtoolize generate ltmain.sh.
+2013-01-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-       * doc/gmp.texi (mpf_urandomb): Explicit the fact that it does not
-       change the precision.
+       * mpz/export.c: Less restrictive ASSERTs.
+       * mini-gmp/mini-gmp.c (mpz_export, mpz_import): Likewise.
+       * mini-gmp/tests/t-import.c: Test also size=0 or count=0.
  
-2011-04-28  Torbjorn Granlund  <tege@gmplib.org>
+2013-01-10  Torbjorn Granlund  <tege@gmplib.org>
  
-       [This change was made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
+       * mini-gmp/tests/t-import.c (main): Don't drop off functon end.
  
-       * configure.in (x86_64): Support bobcat specifically.
-       (x86): Match bobcat and bulldozer, handle like k10.
+       * Makefile.am (check-mini-gmp): Set LD_LIBRARY_PATH to allow testing
+       with dynamic main GMP build.
  
-2011-04-27  Torbjorn Granlund  <tege@gmplib.org>
+2013-01-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-       * tune/speed.h (speed_cyclecounter): Always use PIC variant when
-       compiled with Apple's GCC.
+       * mini-gmp/mini-gmp.c (mpz_export): Support op=0 countp=NULL.
  
-2011-04-26  Torbjorn Granlund  <tege@gmplib.org>
+2013-01-08  Niels Möller  <nisse@lysator.liu.se>
  
-       * mpn/sparc32/sparc-defs.m4 (changecom): Don't redefine '!' as it
-       interferes with expressions.
+       * mini-gmp/tests/t-import.c: New test program, testing both
+       mpz_import and mpz_export.
+       * mini-gmp/tests/Makefile (CHECK_PROGRAMS): Added t-import.
  
-2011-04-10  Niels Möller  <nisse@lysator.liu.se>
+       * mini-gmp/tests/mini-random.c (mini_rrandomb_export): New
+       function.
+       * mini-gmp/tests/mini-random.h: Declare it.
+       * mini-gmp/tests/hex-random.c (hex_rrandomb_export): New function.
+       * mini-gmp/tests/hex-random.h: Declare it.
  
-       [This change was made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
+       * mini-gmp/mini-gmp.c (mpz_export): Compute accurate word count up
+       front, to avoid generating any high zero words.
  
-       * configure.in: Add invert_limb_table to extra_functions_64 on
-       x86_64.
+2013-01-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-2011-04-10  Torbjorn Granlund  <tege@gmplib.org>
+       * mini-gmp/README: Document base limitation for conversions.
+       * mini-gmp/mini-gmp.c (mpz_set_str): Remove goto.
+       (mpz_import, mpz_export): Correctly use order/endianess.
  
-       * acinclude.m4 (GMP_ASM_RODATA): Make 'foo' larger to avoid clang
-       problems.
+2013-01-05  Torbjorn Granlund  <tege@gmplib.org>
  
-2011-03-28  Torbjorn Granlund  <tege@gmplib.org>
+       * longlong.h (aarch64): Make add_ssaaaa and sub_ddmmss actually work.
  
-       * mpn/x86/invert_limb.asm: Protect movzwl register parameters from
-       being interpreted as m4 macro parameters.
+2013-01-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-2011-03-21  Torbjorn Granlund  <tege@gmplib.org>
+       From shuax:
+       * mini-gmp/mini-gmp.c (mpz_import): Reset limb after storing it.
  
-       * configure.in (hppa): Under linux, treat 64-bit processors as if they
-       were 32-bit processors.
+2013-01-04  Torbjorn Granlund  <tege@gmplib.org>
  
-2011-03-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+       From Marko Lindqvist:
+       * configure.ac: Use AC_CONFIG_HEADERS instead of the obsolete
+       AM_CONFIG_HEADER.
  
-       * mpn/generic/toom_interpolate_16pts.c: Remove ambiguity.
+2013-01-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-2011-03-12  Torbjorn Granlund  <tege@gmplib.org>
+       * tests/mpz/bit.c: Wider testing for mpz_combit.
+       * tests/mpz/logic.c: Check the -2^n case.
  
-       * tune/powerpc.asm: Use powerpc syntax, not power syntax.
+       * mpz/ior.c: Fixed an allocation bug in the -2^n case.
  
-2011-03-09  Marc Glisse  <marc.glisse@inria.fr>
+2012-12-31  Torbjorn Granlund  <tege@gmplib.org>
  
-       * doc/gmp.texi: Remove void return type from constructors. Document
-       explicit constructors. Document mpf_class::mpf_class(mpf_t).
+       * mpn/generic/get_d.c: Minor reorg, add vax D code.
  
-2011-02-24  Torbjorn Granlund  <tege@gmplib.org>
+       * gmp-impl.h (double_extract): New union type for vax D floats.
  
-       * mpn/x86/p6/sse2/mod_1_4.asm: Fix typo in MULFUNC_PROLOGUE.
+       * tests/mpq/t-get_d.c (check_random): Limit exponents on vax.
  
-2011-02-04  Torbjorn Granlund  <tege@gmplib.org>
+2012-12-30 Marco Bodrato <bodrato@mail.dm.unipi.it>
  
-       * mpn/x86_64/core2/popcount.asm: Add a MULFUNC_PROLOGUE.
-       * mpn/x86_64/pentium4/popcount.asm: Likewise.
+       * tests/mpz/bit.c (check_clr_extend): Check _set shrink.
  
-2011-01-31  Torbjorn Granlund  <tege@gmplib.org>
+2012-12-29  Torbjorn Granlund  <tege@gmplib.org>
  
-       [These changes were made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
+       * demos/calc/calc.c: Remove generated file from repo.
+       * demos/calc/calc.h: Likewise.
+       * demos/calc/calclex.c: Likewise.
  
-       * config.guess: Recognise new Intel processors.
+2012-12-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/get_d.c: Complete rewrite of non-IEEE code.
+
+       * tests/mpq/t-get_d.c (main): Suppress check_random for vax.
+
+2012-12-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/bdiv_q_1.asm: Use LEA for binvert_limb_table.
+
+2012-12-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-get_d.c (check_onebit): Decrease vax limit to avoid
+       overflow in last, unused 'want' value.
+
+       * config.guess: Recognise AMD family 22 as a future bobcat.
+
+2012-12-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.ac: Rename configure.in.
+
+2012-12-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Version 5.1.0 released.
+
+       * configure.in (none-*-*): Allow this again, but print a warning.
+
+2012-12-17 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/n_pow_ui.c: Fix typos in an ASSERT.
+
+2012-12-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr): Explicitly use
+       MPN_COPY_INCR for slightly overlapping copy.
+
+2012-12-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/mpn/toom-sqr-shared.h: Skip ALLOCs if the test is skipped.
+
+2012-12-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/dos64.m4 (PIC): Move definition early.
+       (JMPENT): Remove PIC variant.
+
+       * mpn/x86_64/darwin.m4 (JUMPTABSECT): Define to .text, instead of
+       something sensible.
+
+2012-12-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/x86_64-defs.m4 (JMPENT): New macro.
+       * mpn/x86_64/dos64.m4: Likewise.
+       * mpn/x86_64/darwin.m4: Likewise.
+       * mpn/x86_64/mod_34lsub1.asm: Use JMPENT to properly support PIC.
+       * mpn/x86_64/mullo_basecase.asm: Likewise.
+       * mpn/x86_64/sqr_basecase.asm: Likewise.
+
+2012-12-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/mod_34lsub1.asm: Try different jump table for the benefit
+       of broken Apple linkers.
+
+2012-12-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Make GMP_NONSTD_ABI ABI specific.
+
+2012-12-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Bump version info.
+       * gmp-h.in: Bump version.
+
+2012-12-06 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/mpq/reuse.c: New test (adapted from mpf/reuse.c).
+       * tests/mpq/Makefile.am (check_PROGRAMS): Add reuse.
+
+       * mpz/abs.c: Use NEWALLOC.
+       * mpz/neg.c: Likewise.
+       * mpz/com.c: Reduce branches.
+
+2012-12-05  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/brootinv.c (mpn_brootinv): Make valgrind happier, at
+       the cost of a redundant MPN_ZERO.
+
+       * mpz/jacobi.c (mpz_jacobi): Check for asize == 0 or bsize == 0
+       before using the low limbs.
+
+2012-12-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/set_str.c (mpn_dc_set_str): Work around a valgrind issue.
+
+       * mpz/powm_ui.c: Don't assume >= 2 limbs in mod argument.
+
+       * tests/tests.h (TESTS_REPS): Handle float GMP_CHECK_REPFACTOR.
+
+       * longlong.h: Refine cpp test for vax.
+       * tests/mpn/t-get_d.c: Likewise.
+       * tests/mpz/t-get_d.c: Likewise.
+       * tests/mpz/t-cmp_d.c: Likewise.
+       * tests/mpz/t-get_d.c: Likewise.
+       * tests/mpq/t-get_d.c: Likewise.
+       * tests/mpf/t-get_d.c: Likewise.
+
+2012-11-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gen-fac.c (gen_consts): Correct printf types.
+
+       * mpn/arm/v7a/cora15/gmp-mparam.h: New file.
+
+       * configure.in (arm*-*-*): New compiler optional "tune".  Pass value for
+       selected processors.  Add more specific path components.
+
+2012-11-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       From Andoni Morales Alastruey:
+       * longlong.h: Conditionalise ARM asm on !__thumb__.
+
+2012-11-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess (arm*-*-*): Support specific ARM processors.
+       * config.sub: Match arm CPUs.
+       * configure.in (arm*-*-*): Likewise.
+
+       * mpz/powm.c: Move new_b out since it lives on through b.
+
+       * configure.in (arm*-*-*): Pass -marm to deal with compilers defaulting
+       to thumb code.
+
+2012-11-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/cxx/t-ops2.cc (checkz): Reduce huge numbers to avoid vax
+       overflow.
+
+2012-11-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/get_d.c: Reinsert non-IEEE code.
+
+       * mpn/vax/add_n.asm: New file.
+       * mpn/vax/add_n.s: Remove.
+       * mpn/vax/addmul_1.asm: New file.
+       * mpn/vax/addmul_1.s: Remove.
+       * mpn/vax/lshift.asm: New file.
+       * mpn/vax/lshift.s: Remove.
+       * mpn/vax/mul_1.asm: New file.
+       * mpn/vax/mul_1.s: Remove.
+       * mpn/vax/rshift.asm: New file.
+       * mpn/vax/rshift.s: Remove.
+       * mpn/vax/sub_n.asm: New file.
+       * mpn/vax/sub_n.s: Remove.
+       * mpn/vax/submul_1.asm: New file.
+       * mpn/vax/submul_1.s: Remove.
+
+       * mpn/vax/elf.m4: New file.
+       * configure.in (vax*-*-*elf*): New case, grabbing vax/elf.m4.
+
+       * tests/mpn/t-get_d.c (check_onebit): Get vax bounds right.
+       (main): Switch off check_rand for vax.
+
+2012-11-22  Niels Möller  <nisse@lysator.liu.se>
+
+       * mini-gmp/tests/run-tests: Copied latest version from GNU Nettle.
+       Minor fix to the use of $EMULATOR, and proper copyright notice.
+
+2012-11-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm_sec.c (redcify): Use mpn_sb_div_r_sec.
+
+       * mpn/generic/sb_div_sec.c: New file.
+       * mpn/generic/sbpi1_div_sec.c: New file.
+       * configure.in (gmp_mpn_functions): Add new files.
+       * gmp-impl.h: Declare new functions.
+
+2012-11-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h: Add ARM64 support.
+       * longlong.h: Add AVR support.
+
+       * mpn/powerpc64/mode64/divrem_1.asm: Tune, simplify.
+
+       * mpq/md_2exp.c: Use MPN_COPY_INCR, not MPN_COPY_DECR.
+       * tests/mpq/t-md_2exp.c (check_random): New function.
+
+2012-11-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/remove.c (mpn_bdiv_qr_wrap): Make static.
+
+2012-11-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/powm_ui.c: Rewrite.
+
+2012-11-01  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/brootinv.c (mpn_brootinv): Input size in limbs
+       rather than bits. Use single-precision iterations for the first
+       limb.
+       * mpn/generic/perfpow.c (is_kth_power): Update mpn_brootinv call.
+       * tests/mpn/t-brootinv.c (main): Likewise.
+       * tune/speed.h (SPEED_ROUTINE_MPN_BROOTINV): Likewise.
+       * gmp-impl.h (mpn_brootinv): Updated prototype.
+
+       * mpn/generic/hgcd2.c (mpn_hgcd2): Removed redundant loop exit
+       tests in the single-precision loop.
+
+       * mpz/combit.c (mpz_combit): Rewrite, optimizing for the common
+       case.
+
+2012-10-31  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-brootinv.
+       * tests/mpn/t-brootinv.c: New file
+
+       * mpn/generic/broot.c (mpn_broot_invm1): Avoid a mullo_n in the
+       loop, and do powering as a plain mpn_sqr followed by mpn_powlo.
+
+       * tune/speed.c (routine): Added mpn_broot, mpn_broot_invm1,
+       mpn_brootinv.
+
+       * tune/common.c (speed_mpn_broot, speed_mpn_broot_invm1)
+       (speed_mpn_brootinv): New functions.
+       * tune/speed.h (SPEED_ROUTINE_MPN_BROOT)
+       (SPEED_ROUTINE_MPN_BROOTINV): New macros.
+
+       * mpn/generic/broot.c (mpn_broot_invm1): Made non-static (mainly
+       for benchmarking).
+       * gmp-impl.h (mpn_broot_invm1): Declare it.
+
+2012-10-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (gmp_mpn_functions): Add new files.
+       * gmp-impl.h: Declare new functions.
+       * mpn/generic/perfpow.c: Overhaul.
+       (binv_root, binv_sqroot): Remove.
+       * mpn/generic/brootinv.c: New file, code from overhauled binv_root.
+       * mpn/generic/bsqrtinv.c: New file, code from overhauled binv_sqroot.
+       * mpn/generic/bsqrt.c: New file.
+
+       * tests/mpn/t-broot.c: Add a forgotten TMP_MARK.
+
+2012-10-28  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/broot.c (mpn_broot): New file and function.
+       * configure.in (gmp_mpn_functions): Add broot.
+       * gmp-impl.h (mpn_broot): Declare.
+       * tests/mpn/t-broot.c: New testcase.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-broot.
+
+2012-10-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/remove.c: Get remainder allocation right.
+
+2012-10-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h: De-support old POWER asm syntax.
+
+       * tests/mpz/t-remove.c: Run more tests, but use a tad smaller operands.
+
+       * mpn/generic/remove.c (mpn_bdiv_qr_wrap): New function.
+       (mpn_remove): Call mpn_bdiv_qr_wrap.
+       * mpz/remove.c: Enable suppressed mpn_remove call.
+
+2012-10-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/powm_ui.c (mpz_powm_ui): Deflect to mpz_powm for large exponent.
+
+2012-09-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * demos/factorize.c: Rewrite no more current form.  Implement Lucas
+       prime proving, and make its use the default.
+       * demos/primes.h: New file.
+
+2012-08-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * demos/factorize.c: Overhaul.
+
+2012-08-06 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * doc/gmp.texi (mpn_neg): Correctly document returned type.
+
+       * gmp-impl.h (_mpz_newalloc, log_n_max): mark with inline (spotted by Niels).
+
+2012-07-28  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (std::common_type): New partial specializations with builtin
+       types.
+       * tests/cxx/t-cxx11.cc: Test it.
+
+2012-07-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc32/vmx/mod_34lsub1.asm: Fix r0 clobbering issue with
+       "large" code affecting elf+darwin PIC.
+
+2012-07-21  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__GMPXX_CONSTANT): Disable for g++-3.4.
+
+2012-06-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Makefile.am (LIBMP_LT_*): Remove these.
+
+2012-06-26  Marc Glisse  <marc.glisse@inria.fr>
+
+       * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Update comment for 5.1.0.
+
+2012-06-24 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * configure.in (CALLING_CONVENTIONS_OBJS): Disable any use of
+       assembly code with the --disable-assembly option.
+       * mpz/oddfac_1.c: Use the ASSERT_CODE macro.
+       * gen-trialdivtab.c (mpz_log2): Use mpz_sizeinbase (., 2).
+
+       * gmp-impl.h (MPN_SIZEINBASE_16): Replace with MPN_SIZEINBASE_2EXP
+       from mpz/export.c .
+       * mpz/export.c (MPN_SIZEINBASE_2EXP): Removed.
+       * mpn/generic/sizeinbase.c: Use MPN_SIZEINBASE.
+
+       * mpz/nextprime.c: Use MPN_SIZEINBASE_2EXP to count bits.
+       * mpn/generic/perfpow.c: Likewise.
+       * mpn/generic/rootrem.c: Likewise.
+       * mpz/get_d_2exp.c: Likewise.
+       * mpn/generic/powm_sec.c: Likewise, nailify.
+       * mpn/generic/powlo.c: Likewise.
+       * mpn/generic/powm.c: Likewise.
+
+       * mini-gmp/mini-gmp.c (mpz_div_r_2exp, mpz_div_q_2exp): Improve
+       adjustment condition.
+
+2012-06-23  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (numeric_limits): Make content public.
+       * cxx/limits.cc: New file, proper declarations.
+       * Makefile.am: List new file.
+       * cxx/Makefile.am: Likewise.
+       * cxx/t-misc.cc: Add minimal test for numeric_limits.
+
+2012-06-09  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__gmp_resolve_expr::srcptr_type): New typedef.
+       (__gmp_temp): Wrapper for mp*_class, the constructor copies the
+       precision of its second argument for mpf_t.
+       (__gmp_expr::eval(p, prec)): Remove.
+       (__gmp_expr::eval(p)): Use __gmp_temp.
+       (__gmp_set_expr): Never pass prec to eval().
+
+2012-06-08 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h (__GMP_WITHIN_CONFIGURE): Use the same #if as in gmp-h.in.
+       (MPN_NORMALIZE_NOT_ZERO): Tighter ASSERT.
+       (MPZ_NEWALLOC): New macro.
+       * mpq: Use the new macro when possible.
+       * mpz/bin_uiui.c: Likewise.
+       * mpz/oddfac_1.c: Likewise.
+       * mpz/prodlimbs.c: Likewise.
+
+       * mini-gmp/mini-gmp.c (mpz_realloc): remove a branch.
+
+2012-06-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/aix.m4 (ASM_START): Claim machine type "any".
+
+2012-06-03  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext.c (mpn_gcdext): Deleted code for handling
+       impossible case u1 == 0, Simplified test for unlikely case u0 == 0.
+
+2012-06-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/lshiftc.asm: New file.
+
+2012-06-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/aorslsh1_n.asm: Use cmp/cmn instead of subs/adds in more
+       places.
+
+       * mpz/get_str.c: Don't strip leading zeros since current mpn_get_str
+       won't generate any.  Misc streamlining.
+       * mpz/out_str.c: Analogous changes.
+
+       * tests/mpz/io.c: Use a wider range of bases.
+
+       * tests/mpz/t-cong.c (check_random): Rewrite random generation for
+       exponentially distributed operand sizes.
+
+2012-06-01 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpq: Use more macros and MPZ_REALLOC return value when possible.
+
+       * gmp-impl.h (LIMBS): Removed, was an alias for PTR.
+       * mpz/combit.c: Use PTR and CNST_LIMB.
+
+       * tests/mpn/t-bdiv.c: Test also mpn_bdiv_qr.
+       * mpn/generic/bdiv_qr.c: Add an ASSERT.
+
+       * mpn/generic/remove.c: Add a zero limb to use bdiv_qr...
+
+2012-05-31  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (mpq_class::mpq_class): Handle mpq_class(0,1).
+       * tests/cxx/t-constr.cc: Test it.
+
+2012-05-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64 (FUNC_ENTRY): New name for DOS64_ENTRY.
+       * mpn/x86_64 (FUNC_EXIT): New name for DOS64_EXIT.
+
+2012-05-29 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/remove.c: Optimise branches.
+
+       * mpn/generic/toom6h_mul.c: less branches in the LIKELY balanced path.
+       * mpn/generic/toom8h_mul.c: Likewise.
+
+2012-05-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/v5/mod_1_1.asm: New file.
+
+2012-05-28  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext.c (compute_v): Simplified carry handling a
+       bit, reduced stated scratch need from 2n+1 to 2n. Also comment and
+       ASSERT improvements.
+
+2012-05-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Add new x86 CPUs.
+       * mpn/x86/fat/fat.c: Likewise.
+       * mpn/x86_64/fat/fat.c: Likewise.
+
+2012-05-27 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86_64/fat/fat.c: abort iff longmode-capable-bit is turned off.
+
+       * mpn/generic/toom8h_mul.c: mark UNLIKELY branches.
+
+2012-05-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz: Use MPZ_REALLOC return value when possible.
+
+2012-05-25 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mini-gmp/tests/t-div.c: Test all _qr, _q, _r variants.
+       * mini-gmp/tests/t-lcm.c: Test the _ui variant.
+
+       * mini-gmp/mini-gmp.c (mpz_mod, mpz_mod_ui): New functions.
+       * mini-gmp/mini-gmp.h (mpz_mod, mpz_mod_ui): Prototypes.
+
+       * mpz/scan1.c: Simplify, and add a shortcut for scan1(z, 0).
+
+2012-05-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/n_pow_ui.c: Cast non-limb count_leading_zeros argument.
+
+2012-05-24 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/remove.c: Support negative divisor.
+       * tests/mpz/t-remove.c: Test negative divisor.
+
+2012-05-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/reuse.c: Major rewrite.
+
+2012-05-23 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/sqrt.c: Further simplify.
+       * mpz/sqrtrem.c: Likewise.
+
+       * Mark failing branches with UNLIKELY. Many files affected.
+
+2012-05-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/sqrt.c: Allocate less for overlapping operands, simplify.
+       * mpz/sqrtrem.c: Likewise.
+
+2012-05-21 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom8_sqr.c: Reduce branches for recursion.
+       * mpn/generic/toom8h_mul.c: Likewise.
+
+       * tests/mpn/t-toom8h.c: Don't use GMP_NUMB_BITS when not yet defined.
+
+2012-05-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-gcd.c: Rewrite.
+
+2012-05-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-gcd.c: Generate larger operands for better gcd code
+       coverage; distribute size exponentially.
+
+2012-05-17 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpf/pow_ui.c: Simplify.
+       * tests/mpf/reuse.c (dsi_func): Exercise pow_ui.
+
+       * tests/mpf/t-set_ui.c (check_data): LONG_HIGHBIT -> ULONG_HIGHBIT.
+       * tests/mpf/t-set.c (check_random): New check, both set and init_set.
+
+       * tests/cxx/t-ops.cc (check_mpq): Check squaring.
+       * tests/mpq/t-equal.c (check_various): Check different den-size.
+
+       * mpn/generic/mullo_n.c: Disable MAYBE_ if WANT_FAT_BINARY.
+       * mpz/cmpabs_d.c: Remove an unused branch.
+
+       * tests/mpz/t-get_d_2exp.c (check_zero): New check.
+       * tests/mpz/t-inp_str.c: A few more cases.
+       * tests/mpz/t-cmp_d.c: More bases and symbols, a few cases.
+
+       * mpz/rootrem.c: Correctly handle odd roots of negatives.
+       * tests/mpz/t-root.c: Test it.
+
+2012-05-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpf/t-eq.c (check_random): New function, meat from old main().
+       (check_data): New function.
+
+2012-05-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/rsh1aors_n.asm: New file.
+       * mpn/arm/v5/mod_1_2.asm: New file.
+
+2012-05-11  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (explicit operator bool): New functions.
+       * tests/cxx/t-cxx11.cc: Test the above.
+
+2012-05-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h (__gmpn_cpuvec_initialized): Was __gmpn_cpuvec.initialized
+       * mpn/x86/fat/fat.c: Use separated _initialized variable.
+       * mpn/x86_64/fat/fat.c: Likewise.
+       * tests/mpn/t-fat.c: Likewise.
+
+       * mpn/generic/toom2_sqr.c: Override global __gmpn_cpuvec_initialized.
+       * mpn/generic/toom22_mul.c: Likewise.
+       * mpn/generic/toom3_sqr.c: Likewise.
+       * mpn/generic/toom33_mul.c: Likewise.
+
+2012-05-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mini-gmp/mini-gmp.c: merge mpz_rootrem and mpz_sqrtrem.
+
+       * mpn/generic/sqrtrem.c (invsqrttab): Reduce size removing common byte.
+
+       * mpz/bin_uiui.c (mul3, mul4, mul8): Remove unneeded shifts.
+       (MAXFACS): Redefine, using the shared (safer) log_n_max.
+
+2012-05-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/minithres/gmp-mparam.h (REDC_1_TO_REDC_N_THRESHOLD): Up to 9, for
+       coherency with ASSERT in mpn/generic/redc_n.c.
+
+2012-05-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/minithres/gmp-mparam.h: Updated TOOM6 and FAC_DSC.
+       * tests/mpn/toom-sqr-shared.h: Don't test if no range.
+
+       * mpz/oddfac_1.c: Add ASSERTs to warn about small threshold.
+       * tune/tuneup.c: Update minimal threshold for FAC_DSC.
+
+2012-05-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/v6/sqr_basecase.asm: Simplify n=4 code.
+
+2012-05-05 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/invert.c: Mark a branch UNLIKELY.
+       * tune/tuneup.c (tune_fac_u): Update DSC_THRESHOLD minimum.
+       * gmp-impl.h (FAC_???_THRESHOLD): Update default values.
+       (ABOVE_THRESHOLD): New definition with __builtin_constant_p.
+
+       * mpn/generic/toom22_mul.c: Disable MAYBE_ if WANT_FAT_BINARY.
+       * mpn/generic/toom33_mul.c: Likewise.
+       * mpn/generic/toom2_sqr.c: Likewise.
+       * mpn/generic/toom3_sqr.c: Likewise.
+
+2012-05-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c: Measure POWM_SEC_TABLE after the REDC thresholds.
+
+2012-05-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm_sec.c: Use redc_2.
+       (INNERLOOP): Use this mechanism, like plain powm.c.
+       (WANT_CACHE_SECURITY): Remove, feature now unconditional.
+
+2012-05-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/bin_uiui.c: Make use of CNST_LIMB.
+
+2012-05-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/mfac_uiui.c: Support limb != ui.
+
+2012-05-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/logops_n.asm: Work around register clobbering issue.
+
+       * mpn/arm/aorscnd_n.asm: New file.
+
+2012-05-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Put arm dirs in path in proper prio order.
+
+       * mpn/arm/logops_n.asm: New file.
+
+       * mpz/2fac_ui.c: Fix assumed typo.
+
+       * mpn/arm/v6/gmp-mparam.h: New file.
+
+       * mpn/arm/v5/gcd_1.asm: Hack for undefined BMOD_1_TO_MOD_1_THRESHOLD.
+       * mpn/arm/v6t2/gcd_1.asm: Likewise.
+
+2012-04-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/v6/sqr_basecase.asm: New file.
+
+2012-04-30 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/comb_tables.c: New file.
+       * configure.in: Add it.
+       * gen-fac.c: Define table limits.
+       * gmp-impl.h: Declare tables.
+       (log_n_max): New static function.
+       * mpz/2fac_ui.c: Use shared tables.
+       * mpz/bin_uiui.c: Likewise.
+       * mpz/oddfac_1.c: Likewise.
+       * mpz/primorial_ui.c: Likewise.
+
+       * mpz/mfac_uiui.c: New file.
+       * Makefile.am: Compile it.
+       * mpz/Makefile.am (libmpz_la_SOURCES): Add mpz_mfac_uiui.c
+       * gmp-h.in (mpz_mfac_uiui): Declare.
+
+       * tests/mpz/t-mfac_uiui.c: New file.
+       * tests/mpz/Makefile.am: Run it.
+
+       * doc/gmp.texi: Document mpz_mfac_uiui, collapsing with other factorial functions.
+
+       * tests/mpz/t-lcm.c: Test zero too.
+
+       * mpz/prodlimbs.c: Simplify threshold (should be tuned, not guessed).
+
+2012-04-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/aors_n.asm: Tune for more stable performance.
+
+       * mpn/arm/aorslsh1_n.asm: New file.
+
+       * mpn/arm/mod_34lsub1.asm: New file.
+
+       * mpn/arm/v6t2/divrem_1.asm: New file.
+
+2012-04-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/thumb/add_n.asm: New file.
+       * mpn/thumb/sub_n.asm: New file.
+       * mpn/thumb/add_n.s: Remove broken code.
+       * mpn/thumb/sub_n.s: Likewise.
+
+       * mpn/arm/v6/addmul_1.asm: Rewrite for stable speed, smaller size.
+       * mpn/arm/v6/mul_1.asm: Likewise.
+
+2012-04-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Search arm/v6t2 for arm7.
+
+       * mpn/arm/v5/gcd_1.asm: New file.
+       * mpn/arm/v6t2/gcd_1.asm: New file.
+
+       * mpn/arm/mode1o.asm: New file.
+       * mpn/arm/v6t2/mode1o.asm: New file.
+
+       * mpn/arm/arm-defs.m4 (LEA): New define.
+       * mpn/arm/invert_limb.asm: Use LEA.
+
+2012-04-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/bin_uiui.c (bc_bin_uiui): Nail support.
+       * tests/cxx/t-ops2.cc: Test 0/3.
+       * oddfac_1.c: assume n > 26.
+       * tests/mpz/t-jac.c (mpn_jacobi_n): Enlarge tested sizes.
+
+2012-04-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/v6/addmul_2.asm: New file.
+       * mpn/arm/v6/mul_2.asm: New file.
+
+2012-04-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/aorsmul_1.asm: Tweak loop control for a 6% speed increase.
+
+2012-04-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Recognise ARM sub-architectures.
+
+       * configfsf.guess: Update to current FSF version.
+       * configfsf.sub: Likewise.
+
+       * mpn/arm/bdiv_dbm1c.asm: New file.
+
+       * mpn/arm/v6/mul_1.asm: New file.
+       * mpn/arm/v6/addmul_1.asm: New file.
+
+2012-04-22 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gen-fac.c: Renamed, was gen-fac_ui.c .
+       * Makefile.am: Renamed gen-fac.c and fac_table.h .
+       * gmp-impl.h: #include "fac_table.h".
+       * mpz/oddfac_1.c: Use generated constant.
+       * mpz/bin_ui.c: Small optimisations.
+
+       * tune/common.c (speed_mpz_bin_ui): New function.
+       * tune/speed.h: Declare it.
+       * tune/speed.c: Use it.
+
+2012-04-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/mul_1.asm: Cleanup.
+       * mpn/arm/copyi.asm: Cleanup, assume allocate-on-write cache.
+       * mpn/arm/copyd.asm: Likewise.
+
+       * mpn/arm/add_n.asm: Delete.
+       * mpn/arm/sub_n.asm: Delete.
+       * mpn/arm/aors_n.asm: New file, made from old files.
+
+       * mpn/arm/addmul_1.asm: Delete.
+       * mpn/arm/submul_1.asm: Delete.
+       * mpn/arm/aorsmul_1.asm: New file, made from old files.
+
+       * mpn/arm/com.asm: New file.
+       * mpn/arm/lshift.asm: New file.
+       * mpn/arm/rshift.asm: New file.
+
+2012-04-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpq/io.c: New file.
+       * tests/mpq/Makefile.am: Run it.
+
+       * mpz/clrbit.c: Simplify along the lines of setbit.c.
+
+2012-04-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/setbit.c: Simplify.
+
+       * gmp-impl.h (LOG2C): Define.
+       * mpz/fac_ui.c (LOG2C): Remove.
+       * mpz/2fac_ui.c (LOG2C): Remove.
+       * mpz/oddfac_1.c (LOG2C): Remove.
+       * mpn/generic/binvert.c (LOG2C): Remove.
+       * mpn/generic/invertappr.c (LOG2C): Remove.
+
+       * mpz/bin_uiui.c (mpz_goetgheluck_bin_uiui): Move declarations,
+       and assume that n and k are not small.
+
+2012-04-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add t-remove.
+
+       * tests/mpz/t-remove.c: Clear out mpz variables.
+
+       * tests/mpz/t-cong.c (check_random): Use much larger numbers.
+       (check_data): Check congruences mod 0.
+
+       * tests/mpz/t-divis.c: Test divisibility by zero.
+
+       * tests/mpz/reuse.c: Test mpz_mod.
+
+       * mpz/setbit.c: Remove dead code.  Use CNST_LIMB.
+       * mpz/clrbit.c: Use CNST_LIMB.
+
+2012-04-19 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * primesieve.c: New file, with functions from mpz/oddfac_1.c .
+       * mpz/oddfac_1.c (bitwise_primesieve): Re-moved.
+       * Makefile.am (libgmp_la_SOURCES): Add primesieve.c .
+       * gmp-impl.h (gmp_primesieve): Declare.
+
+       * mpz/bin_uiui.c (mpz_goetgheluck_bin_uiui): New, factor-based
+       implementation.
+       * tests/mpz/t-bin.c: Extend tests, to cover _goetgheluck.
+
+       * mpz/primorial_ui.c: New file.
+       * mpz/Makefile.am (libmpz_la_SOURCES): Add mpz/primorial_ui.c
+       * Makefile.am (MPZ_OBJECTS): Add mpz/primorial_ui$U.lo
+       * gmp-h.in (mpz_primorial_ui): Declare.
+       * tests/mpz/t-primorial_ui.c: New test for the new function.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add t-primorial_ui.
+       * doc/gmp.texi: Short documentation for the new function.
+
+2012-04-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/coreisbr/aorsmul_1.asm: Fix some DOS64 issues.
+       * mpn/x86_64/coreisbr/mul_1.asm: Likewise.
+
+       * mpn/x86_64/fastsse/lshiftc-movdqu2.asm: Adhere to DOS64 register
+       partitioning rules.
+
+       * mpn/x86_64/fastsse/copyi-palignr.asm: Implement temporary workaround
+       to overlap issue.
+
+2012-04-17 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/bin_uiui.c: Support small limbs (fallback on bin_ui).
+
+       * tests/mpn/toom-sqr-shared.h: Use a restricted range.
+       * tests/mpn/t-toom2-sqr.c: Specify correct range.
+       * tests/mpn/t-toom3-sqr.c: Likewise.
+       * tests/mpn/t-toom4-sqr.c: Likewise.
+       * tests/mpn/t-toom6-sqr.c: Likewise.
+       * tests/mpn/t-toom8-sqr.c: Likewise, but extended.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add t-toom?-sqr tests.
+
+       * mpn/generic/sbpi1_bdiv_q.c: Move ASSERTs, to support qp = np.
+
+2012-04-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/copyd.asm: Rewrite.
+       * mpn/x86_64/copyi.asm: Rewrite.
+
+2012-04-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fastsse/lshift-movdqu2.asm: Add DOS entry/exit sequences.
+       * mpn/x86_64/fastsse/rshift-movdqu2.asm: Likewise.
+       * mpn/x86_64/fastsse/lshiftc-movdqu2.asm: Likewise.
+
+       * mpn/x86_64/x86_64-defs.m4 (palignr): New macro.
+       (x86_opcode_regxmm, x86_opcode_regxmm_list): New, made from x86 mmx
+       counterparts.
+       (x86_lookup): Copy from x86/x86-defs.m4.
+       * mpn/x86_64/fastsse/copyd-palignr.asm: Use palignr macro.
+       * mpn/x86_64/fastsse/copyi-palignr.asm: Likewise.
+
+2012-04-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/mpz/t-bin.c: Add more tests on small values.
+       * mpz/bin_uiui.c (mpz_bdiv_bin_uiui): Smaller temporary areas.
+
+2012-04-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fastsse/copyd-palignr.asm: New file.
+       * mpn/x86_64/fastsse/copyi-palignr.asm: New file.
+       * mpn/x86_64/core2/copyd.asm: New file.
+       * mpn/x86_64/core2/copyi.asm: New file.
+       * mpn/x86_64/nano/copyd.asm: New file.
+       * mpn/x86_64/nano/copyi.asm: New file.
+       * mpn/x86_64/atom/copyd.asm: New file.
+       * mpn/x86_64/atom/copyi.asm: New file.
+
+2012-04-13 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/bin_uiui.c: Rewrite (some parts are Torbjorn's).
+       * gen-fac_ui.c: Generate new constants for bin_uiui.
+
+       * mini-gmp/mini-gmp.h (mpz_fac_ui, mpz_bin_uiui): New definitions.
+       * mini-gmp/mini-gmp.c (mpz_fac_ui, mpz_bin_uiui): Trivial
+       implementation.
+
+       * tests/mpz/t-fac_ui.c: Check Wilson's theorem on a big value.
+
+       * mpn/generic/invert.c: Remove support for scratch == NULL.
+       * tune/speed.h (SPEED_ROUTINE_MPN_MUPI_DIV_QR): Allocate scratch
+       space for mpn_invert.
+
+       * mpz/mul_i.h: Small clean-up.
+
+       * tests/mpn/toom-sqr-shared.h: New file.
+       * tests/mpn/t-toom2-sqr.c: New file.
+       * tests/mpn/t-toom3-sqr.c: New file.
+       * tests/mpn/t-toom4-sqr.c: New file.
+       * tests/mpn/t-toom6-sqr.c: New file.
+       * tests/mpn/t-toom8-sqr.c: New file.
+       * tests/mpn/Makefile.am (EXTRA_DIST): Add toom-sqr-shared.h .
+
+       * mpn/generic/toom62_mul.c: Use add_n, sub_n, when possible.
+
+2012-04-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fastsse/lshift-movdqu2.asm: New file.
+       * mpn/x86_64/fastsse/rshift-movdqu2.asm: New file.
+       * mpn/x86_64/fastsse/lshiftc-movdqu2.asm: New file.
+       * mpn/x86_64/coreisbr/lshift.asm: New file.
+       * mpn/x86_64/coreisbr/rshift.asm: New file.
+       * mpn/x86_64/coreisbr/lshiftc.asm: New file.
+       * mpn/x86_64/k10/lshift.asm: New file.
+       * mpn/x86_64/k10/rshift.asm: New file.
+       * mpn/x86_64/k10/lshiftc.asm: New file.
+
+       * mpn/x86_64/fastsse/lshift.asm: Simplify to very basic form.
+
+2012-04-11  Niels Möller  <nisse@lysator.liu.se>
+
+       * Makefile.am (check-mini-gmp): Pass -I../.. in EXTRA_CFLAGS, to
+       locate gmp.h.
+
+2012-04-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mini-gmp/mini-gmp.h (mpz_root, mpz_rootrem): define (correctly).
+       * mini-gmp/mini-gmp.c (mpz_rootrem): Extended code from _root.
+       (mpz_root): Use mpz_rootrem.
+       (mpz_mul_ui): Correctly handle negative operands.
+
+       * mini-gmp/tests/Makefile (CHECK_PROGRAMS): add t-root.
+       * mini-gmp/tests/t-root.c: New file.
+       * mini-gmp/tests/t-reuse.c: Enable root{,rem} tests.
+
+2012-04-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gen-fac_ui.c (mpz_root): Remove.
+       * mini-gmp/mini-gmp.c (mpz_root): New, support negative operands.
+       * mini-gmp/mini-gmp.h (mpz_root): define.
+       (mpz_out_str): Test also __STDIO_LOADED (for VMS).
+       * mpz/2fac_ui.c: Cosmetic change.
+
+2012-04-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/ia64/gcd_1.asm: Rewrite inner loop to use ctz table.
+
+2012-04-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/p7/popcount.asm: Properly extend arg n for mode32.
+       * mpn/powerpc64/p7/hamdist.asm: Likewise.
+
+2012-04-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/p7/popcount.asm: New file.
+       * mpn/powerpc64/p7/hamdist.asm: New file.
+
+       * longlong.h (ARM count_leading_zeros): Enable for more arch versions.
+
+       * mpn/x86_64/gcd_1.asm: Make room for DOS64 regparm shadow area.
+       * mpn/x86_64/core2/gcd_1.asm: Likewise.
+
+2012-04-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/coreisbr/aorrlsh_n.asm: Make it actually work for DOS64.
+
+2012-04-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/oddfac_1.c: Initalize size for ASSERT.
+
+2012-04-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (_GMP_H_HAVE_FILE): Test also __STDIO_LOADED (for VMS).
+
+       * gmp-impl.h (doprnt_format_t, etc): Remove bogus __GMP_DECLSPECs.
+
+2012-03-30 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86_64/sqr_basecase.asm: Speed-up for small cases.
+
+2012-03-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sparc64/gcd_1.asm: New file.
+
+2012-03-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Fix typo in coreisbr recognition.
+
+2012-03-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86_64/gcd_1.asm: Reduce latency.
+       * mpn/x86_64/mul_basecase.asm: Save one jump.
+
+       * mpz/iset_ui.c: Don't realloc.
+
+2012-03-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mp_clz_tab.c: Add __clz_tab[128].
+       * longlong.h (count_trailing_zeros): Use it in pure C variant.
+
+2012-03-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (x86 fat_path): Add many missing directories.
+       * mpn/x86/fat/fat.c (__gmpn_cpuvec_init): Rewrite.
+       (fake_cpuid_table): Add many more CPUs.
+
+       * mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Minor spacing cleanup.
+
+2012-03-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/x86-defs.m4 (CALL, PIC_WITH_EBX): New macros.
+       * mpn/x86/darwin.m4: Likewise.
+       * mpn/x86/k7/gcd_1.asm: Use new macros to support PIC.
+       * mpn/x86/p6/gcd_1.asm: Likewise.
+
+2012-03-19 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gen-fac_ui.c: Generate more constants (possible mini-mpz_root).
+       * mpz/oddfac_1.c: Improve ASSERTs.
+       (log_n_max): Use precomputed table.
+
+       * longlong.h (_PROTO): Remove.
+
+2012-03-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h (count_trailing_zeros): Write better pure C default
+       variant.
+
+       * mpn/x86/p6/gcd_1.asm: Remove forgotten x86_64 reference.
+
+       * mpn/x86/p6/gmp-mparam.h: Update, to get BMOD_1_TO_MOD_1_THRESHOLD
+       defined for fat binaries.
+
+2012-03-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k7/gcd_1.asm: Rewrite.
+       * mpn/x86/p6/gcd_1.asm: New file.
+
+       * mpn/x86_64/core2/gcd_1.asm: Conditionally suppress reduction calls.
+       * mpn/x86_64/gcd_1.asm: Rewrite.
+
+2012-03-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/gcd_1.c: Parameterise zerotab code.
+
+       * mpn/x86_64/nano/gcd_1.asm: New file, grabbing core2 asm file.
+
+       * mpn/x86_64/core2/gcd_1.asm: Speed up loop code, simplify non-loop
+       code.
+
+2012-03-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/core2/gcd_1.asm: Add hack to support fat builds.
+
+       * mpn/x86_64/core2/gcd_1.asm: Shorten critical path.
+
+2012-03-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/core2/gcd_1.asm: New file.
+       * mpn/x86_64/k10/gcd_1.asm: New file, grabbing core2 asm file.
+       * mpn/x86_64/bd1/gcd_1.asm: Likewise.
+
+       * mpn/x86_64/bobcat/sqr_basecase.asm: New file.
+       * mpn/x86_64/bobcat/mul_basecase.asm: Minor tuning.
+
+2012-03-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (fat_functions): Add addlsh1_n, addlsh2_n, addmul_2,
+       mullo_basecase, redc_1, redc_2, sublsh1_n.
+
+       * gmp-impl.h (struct cpuvec_t): Add fields for new fat functions.
+       * gmp-impl.h: Adjust corresponding declarations.
+
+       * mpn/generic/redc_2.c (mpn_addmul_2): Make static.
+
+       * mpn/x86_64/fat/fat_entry.asm (FAT_INIT): Expand before fat_init to
+       reduce branch offsets.  Pass plain 0,1,3... in %al since we'd else run
+       out of 8-bit range.
+
+       * mpn/x86_64/fat/fat_entry.asm (fat_init): Scale passed index value.
+       * mpn/x86/fat/fat_entry.asm (fat_init): Use movzbl for expanding index
+       value.
+
+       * mpn/x86_64/x86_64-defs.m4 (CPUVEC_FUNCS_LIST): Add new fat functions.
+       * mpn/x86/x86-defs.m4 (CPUVEC_FUNCS_LIST): Likewise.
+       * mpn/x86_64/fat/fat.c (__gmpn_cpuvec): Likewise.
+       * mpn/x86/fat/fat.c (__gmpn_cpuvec): Likewise.
+
+       * mpn/x86_64/fat/redc_2.c: New file.
+       * mpn/x86/fat/mullo_basecase.c: New file.
+       * mpn/x86/fat/redc_1.c: New file.
+       * mpn/x86/fat/redc_2.c: New file.
+
+       * tests/mpn/t-fat.c: Test mullo_basecase.
+
+2012-03-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/coreisbr/addmul_2.asm: Port to DOS64.
+
+2012-02-29  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h: Ignore partial C++11 support in g++-4.6.
+       * tests/cxx/t-cxx11.cc: Likewise.
+
+       * gmpxx.h (operator""): New functions.
+       * tests/cxx/t-cxx11.cc: Test the above.
+       * doc/gmp.texi: Document the above.
+
+2012-03-08 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * acinclude.m4 (GMP_H_ANSI): Remove.
+       * configure.in: Don't use GMP_H_ANSI.
+       * gmp-h.in (__GMP_HAVE_PROTOTYPES): Remove.
+
+2012-03-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fat/fat.c (fake_cpuid_table): Recognise "bulldozer".
+       (__gmpn_cpuvec_init): Overhaul to match configure.in.
+
+       * configure.in: Adjust bulldozer path_64.
+
+2012-03-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (x86_64 fat_path): List recently added AMD directories.
+
+       * mpn/x86_64/bobcat/copyi.asm: New file.
+       * mpn/x86_64/bobcat/copyd.asm: New file.
+
+       * config.guess: Handle AMD 11h correctly.
+
+       * tune/tuneup.c (tune_redc): Better handle situation where redc_2 is
+       never faster.
+
+2012-03-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/bobcat/mul_basecase.asm: New file.
+
+2012-03-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/bobcat/mul_1.asm: New file.
+       * mpn/x86_64/bobcat/aorsmul_1.asm: New file.
+
+2012-03-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/invert.c: Remove mod 0 branch.
+       * tests/mpz/t-invert.c: Avoid testing mod 0.
+       * doc/gmp.texi (mpz_invert): Specify mod 0 is not handled.
+
+       * gmp-h.in (__gmp_signed, __gmp_const): Remove.
+       (__GMP_HAVE_TOKEN_PASTE, __GMP_HAVE_CONST): Remove.
+       * gmp-impl.h: Strip __GMP_HAVE_TOKEN_PASTE and __GMP_HAVE_CONST.
+       * demos/expr/: Strip __gmp_const usage from all files.
+
+       * tests/mpz/t-powm.c (allsizes_seen): Require unsigned*.
+
+2012-03-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/k8/gmp-mparam.h: New file.
+       * mpn/x86_64/k10/gmp-mparam.h: New file.
+
+       * mpn/generic/hgcd_step.c (mpn_hgcd_step): Remove unused variables.
+       * mpn/generic/hgcd_jacobi.c (hgcd_jacobi_step): Likewise.
+       * mpn/generic/hgcd_reduce.c (hgcd_matrix_apply): Likewise.
+       * mpn/generic/mu_bdiv_qr.c: Likewise.
+       * mpz/jacobi.c: Likewise.
+       * mpz/mod.c: Likewise.
+
+       * mpn/generic/toom42_mul.c: Remove unread variable.
+       * mpn/generic/set_str.c (mpn_set_str_compute_powtab): Likewise.
+       * mpn/generic/rootrem.c (mpn_rootrem_internal): Likewise.
+       * tests/refmpn.c (refmpn_mul): Likewise.
+       * mpn/generic/hgcd_appr.c (mpn_hgcd_appr): Propagate mask computation
+       into ASSERT, remove variable.
+
+       * gmp-h.in (__GMP_PROTO): Remove.
+       * Strip __GMP_PROTO usage from all files.
+       * Strip prototype parameter names from all files.
+
+2012-03-01 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * doc/gmp.texi (mpz_invert): Correctly document result range.
+       * tests/mpz/t-invert.c: Small range correction.
+
+2012-03-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/mullo_basecase.asm: New file.
+
+2012-02-29  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (std::numeric_limits): New partial specialization.
+
+2012-02-29  Niels Möller  <nisse@lysator.liu.se>
+
+       * mini-gmp/tests/t-reuse.c: New test case, based on
+       tests/mpz/reuse.c.
+
+       * mini-gmp/mini-gmp.c (mpz_cdiv_r_ui): New function.
+       (mpz_fdiv_r_ui): New function.
+       (mpz_tdiv_r_ui): New function.
+       (mpz_powm_ui): New function.
+       (mpz_pow_ui): New function.
+       (mpz_ui_pow_ui): Use mpz_pow_ui.
+       (mpz_gcdext): Fixed input/output overlap, for the case of one
+       input being zero.
+       (mpz_sqrtrem): Fix for the case r NULL, U zero.
+
+       * Makefile.am (check-mini-gmp): Use $(MAKE).
+       (clean-mini-gmp): New target.
+       (clean-local, distclean-local): New automake targets. Depend on
+       clean-mini-gmp.
+
+2012-02-28  Niels Möller  <nisse@lysator.liu.se>
+
+       * Makefile.am (check-mini-gmp): New target, for running the
+       mini-gmp testsuite.
+
+       * mini-gmp/tests/Makefile (srcdir, MINI_GMP_DIR): New make
+       variables. These can be overridden when using a separate build
+       directory.
+       (EXTRA_CFLAGS): Renamed, was OPTFLAGS.
+
+       * mini-gmp/mini-gmp.c (mpz_abs_add): Don't cache limb pointers
+       over MPZ_REALLOC, since that breaks in-place operation. Bug
+       spotted by Torbjörn.
+       (mpz_and, mpz_ior, mpz_xor): Likewise.
+       (mpz_cmp): Fixed comparison of negative numbers.
+
+2012-02-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fastsse/lshiftc.asm: New file.
+       * mpn/x86_64/fastsse/com.asm: New file.
+
+       * mpn/x86_64/bd1/popcount.asm: New file.
+       * mpn/x86_64/bd1/hamdist.asm: New file.
+
+       * mpn/x86_64/fastsse/copyi.asm: New file.
+       * mpn/x86_64/fastsse/copyd.asm: New file.
+       * mpn/x86_64/fastsse/lshift.asm: New file.
+
+2012-02-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/coreisbr/addmul_2.asm: New file.
+
+       * tests/devel/try.c (param_init): Don't require addmul_N to handle
+       overlap.
+
+       * mpn/x86_64/bd1/mul_1.asm: New file.
+       * mpn/x86_64/bd1/aorsmul_1.asm: New file.
+
+2012-02-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/2fac_ui.c: New file: implements n!!.
+       * Makefile.am (MPZ_OBJECTS): Add mpz/2fac_ui.
+       * gmp-h.in: Declare mpz_2fac_ui.
+       * tests/mpz/t-fac.c: Test mpz_2fac_ui.
+       * doc/gmp.texi: Document mpz_2fac_ui.
+       * mpz/Makefile.am (libmpz_la_SOURCES): Add 2fac_ui.c.
+
+       * mpz/oddfac_1.c (mpz_oddfac_1): Use umul_ppmm when size = 2.
+
+2012-02-26  Niels Möller  <nisse@lysator.liu.se>
+
+       * mini-gmp: New subdirectory. For use by GMP bootstrap, and as a
+       fallback for applications needing bignums but not high
+       performance.
+
+       * bootstrap.c: New file, replacing dumbmp.c. Uses mini-gmp for the
+       standard GMP functions, and then defines the few functions
+       particular for the bootstrap.
+       * dumbmp.c: Deleted file. A few functions moved to bootstrap.c.
+
+       * gen-bases.c: Include bootstrap.c, not dumbmp.c.
+       * gen-fac_ui.c: Likewise.
+       * gen-trialdivtab.c: Likewise.
+       * gen-fib.c: Include bootstrap.c, not dumbmp.c. Use assert rather
+       than ASSERT. Deleted casts of xmalloc return value.
+       * gen-psqr.c: Likewise.
+       (COLLAPSE_ELEMENT): Use memmove rather than mem_copyi.
+
+       * Makefile.am: Replaced all uses of dumbmp.c by bootstrap.c.
+       (EXTRA_DIST, dist-hook): Arrange for distribution of the mini-gmp
+       files.
+
+2012-02-24 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/invert.c: Use ABSIZ, MPZ_EQUAL_1_P.
+       * mpz/abs.c: Collapse MPZ_REALLOC(x,.) and PTR(x).
+       * mpz/aors_ui.h: Likewise.
+       * mpz/com.c: Likewise.
+       * mpz/neg.c: Likewise.
+
+       * mpz/invert.c: Reply "no-inverse" when modulus is zero.
+       * tests/mpz/t-invert.c: Add more checks.
+       * doc/gmp.texi (mpz_invert): Inverse can not be zero.
+
+2012-02-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpn/logic.c: New file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Add logic.
+
+       * tests/mpz/t-invert.c: New file.
+       * tests/mpz/Makefile.am (check_PROGRAMS): Add t-invert.
+
+2012-02-24  Marc Glisse  <marc.glisse@inria.fr>
+
+       * tests/mpq/t-cmp.c: Move NUM and DEN macros...
+       * tests/mpq/t-cmp_ui.c: Likewise...
+       * gmp-impl.h: ... to here.
+
+       * mpq/abs.c: Use NUM, DEN, SIZ, ALLOC, PTR, MPZ_REALLOC.
+       * mpq/aors.c: Likewise.
+       * mpq/canonicalize.c: Likewise.
+       * mpq/clear.c: Likewise.
+       * mpq/cmp.c: Likewise.
+       * mpq/cmp_si.c: Likewise.
+       * mpq/cmp_ui.c: Likewise.
+       * mpq/div.c: Likewise.
+       * mpq/equal.c: Likewise.
+       * mpq/get_d.c: Likewise.
+       * mpq/get_den.c: Likewise.
+       * mpq/get_num.c: Likewise.
+       * mpq/get_str.c: Likewise.
+       * mpq/init.c: Likewise.
+       * mpq/inp_str.c: Likewise.
+       * mpq/inv.c: Likewise.
+       * mpq/md_2exp.c: Likewise.
+       * mpq/mul.c: Likewise.
+       * mpq/neg.c: Likewise.
+       * mpq/set.c: Likewise.
+       * mpq/set_d.c: Likewise.
+       * mpq/set_den.c: Likewise.
+       * mpq/set_f.c: Likewise.
+       * mpq/set_num.c: Likewise.
+       * mpq/set_si.c: Likewise.
+       * mpq/set_str.c: Likewise.
+       * mpq/set_ui.c: Likewise.
+       * mpq/set_z.c: Likewise.
+       * mpq/swap.c: Likewise.
+
+       * tests/mpq/t-inv.c: New test file.
+       * tests/mpq/Makefile.am: Add the above.
+
+       * gmpxx.h (__gmp_set_expr): Use mpq_set_z.
+
+       * mpq/md_2exp.c: Collapse MPZ_REALLOC(x,.) and PTR(x).
+       * mpq/set_d.c: Likewise.
+       * mpq/set_f.c: Likewise.
+
+2012-02-24  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86_64/core2/aorsmul_1.asm: Added mpn_addmul_1c and
+       mpn_submul_1c entry points.
+
+2012-02-23  Marc Glisse  <marc.glisse@inria.fr>
+
+       * mpz/abs.c: Use ALLOC, SIZ, ABSIZ, PTR, MPZ_REALLOC.
+       * mpz/aors_ui.h: Likewise.
+       * mpz/array_init.c: Likewise.
+       * mpz/cdiv_q.c: Likewise.
+       * mpz/cdiv_qr.c: Likewise.
+       * mpz/cdiv_r.c: Likewise.
+       * mpz/clear.c: Likewise.
+       * mpz/clrbit.c: Likewise.
+       * mpz/cmp_si.c: Likewise.
+       * mpz/com.c: Likewise.
+       * mpz/fdiv_q.c: Likewise.
+       * mpz/fdiv_qr.c: Likewise.
+       * mpz/fdiv_r.c: Likewise.
+       * mpz/get_si.c: Likewise.
+       * mpz/get_str.c: Likewise.
+       * mpz/init.c: Likewise.
+       * mpz/inp_str.c: Likewise.
+       * mpz/iset.c: Likewise.
+       * mpz/iset_d.c: Likewise.
+       * mpz/iset_si.c: Likewise.
+       * mpz/iset_str.c: Likewise.
+       * mpz/iset_ui.c: Likewise.
+       * mpz/mod.c: Likewise.
+       * mpz/neg.c: Likewise.
+       * mpz/out_str.c: Likewise.
+       * mpz/random2.c: Likewise.
+       * mpz/set_si.c: Likewise.
+       * mpz/set_str.c: Likewise.
+       * mpz/set_ui.c: Likewise.
+       * mpz/setbit.c: Likewise.
+       * mpz/sqrt.c: Likewise.
+       * mpz/swap.c: Likewise.
+       * mpz/tdiv_r_2exp.c: Likewise.
+
+       * tests/cxx/t-ops.cc: Test mpz_abs reallocation.
+
+2012-02-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/core2/rsh1aors_n.asm: Complete rewrite.
+       * mpn/x86_64/coreisbr/rsh1aors_n.asm: Move old core2 code here.
+
+       * mpn/x86_64/redc_1.asm: Make it work for DOS64 (broken in last edit).
+
+2012-02-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_interpolate_8pts.c: Compute carry iif non-trivial.
+
+       * mpz/gcdext.c: Adapt to relaxed mpn_gcdext's input requirements.
+
+       * mpz/and.c: Use mpn_ logic everywhere. Reduce branches.
+       * mpz/ior.c: Likewise.
+       * mpz/xor.c: Likewise.
+
+2012-02-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/coreisbr/mul_1.asm: New file.
+
+       * mpn/x86_64/coreisbr/aorsmul_1.asm: New file.
+
+       * mpn/x86_64/mod_34lsub1.asm: Avoid ",pt" branch hint since many
+       assemblers don't support it.
+
+2012-02-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/redc_1.c: Put back mpn_add_n call, return its carry.
+       Reintroduce previously removed RP argument.
+       * mpn/x86_64/redc_1.asm: Likewise.
+
+       * mpn/generic/redc_2.c:  Remove mpn_sub_n call, return carry from
+       mpn_add_n call.
+
+       * gmp-impl.h (mpn_redc_1, mpn_redc_2): Now return an mp_limb_t.
+
+       * tune/speed.h (SPEED_ROUTINE_REDC_1): Adopt to pass RP argument.
+
+       * tests/refmpn.c (refmpn_redc_1): Adopt to new redc_1 interface.
+
+       * mpn/generic/powm.c (MPN_REDC_1): Pass rp parameter to mpn_redc_1.
+       * mpn/generic/powm_sec.c (MPN_REDC_1_SEC): Likewise.
+       * mpn/generic/powm.c (MPN_REDC_2): New macro, use for mpn_redc_2.
+
+2012-02-18  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (std::common_type): New partial specialization in C++11.
+       * tests/cxx/t-cxx11.cc: Test it.
+
+       * gmpxx.h: Don't declare long double functions that are never defined.
+
+       * gmpxx.h (__gmp_binary_expr): Let things happen in place: q=q*q+z*z
+       becomes tmp=z*z, q=q*q, q+=tmp.
+       * tests/cxx/t-binary.cc: More variable reuse tests.
+
+2012-02-17  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmp-h.in (__GMP_WITHIN_GMP): Test with #ifdef instead of #if, for
+       the benefit of applications using gcc -Wundef.
+       (__GMP_WITHIN_GMPXX): Likewise.
+
+2012-02-16  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__gmp_binary_expr): Let things happen in place: e=a*b-c*d
+       becomes tmp=c*d, e=a*b, e-=tmp.
+       * tests/cxx/t-binary.cc: More variable reuse tests.
+
+2012-02-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/tuneup.c (mul_toom43_to_toom54_threshold): New global.
+       (tune_mul): Added tuning of MUL_TOOM43_TO_TOOM54_THRESHOLD.
+       * tune/speed.h (SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL): New macro.
+       (SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL): New macro.
+       Prototypes for corresponding functions.
+       * tune/common.c (speed_mpn_toom43_for_toom54_mul): New function.
+       (speed_mpn_toom54_for_toom43_mul): New function.
+
+       * gmp-impl.h (MPN_TOOM43_MUL_MINSIZE): Corrected constant.
+       (MPN_TOOM53_MUL_MINSIZE): Likewise.
+       (MPN_TOOM54_MUL_MINSIZE): New constant.
+       (mpn_toom54_mul): Added prototype.
+       (MUL_TOOM43_TO_TOOM54_THRESHOLD): New threshold. Default value and
+       tuning setup.
+
+2012-02-14  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/toom54_mul.c: New file, originally contributed by
+       Marco.
+       * gmp-impl.h (mpn_toom54_mul_itch): New function.
+       * configure.in (gmp_mpn_functions): Added toom54_mul.
+       * tests/mpn/t-toom54.c: New file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-toom54.
+
+2012-02-13  Niels Möller  <nisse@lysator.liu.se>
+
+       * configure.in: Display summary of options.
+
+2012-02-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/tests.h (TESTS_REPS): Print any non-standard repetitions.
+
+2012-02-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * doc/gmp.texi (Factorial): Shortly describe current algorithm.
+       (Multiplication Algorithms): Add Toom[68]'n'half, (too) shortly.
+       * gmp-impl.h (ASSERT_ALWAYS): Consider failures UNLIKELY.
+
+2012-02-10  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpz/t-gcd.c (gcdext_valid_p): Enforce sligthly stricter
+       bound for cofactors.
+
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_hook): Corrected
+       handling of unlikely (maybe impossible?) case u1n < un. Related to
+       the 2012-02-05 bugfix of gcdext_subdiv_step.c in the gmp-5.0 repo.
+
+2012-02-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h (mpn_toom3*_itch): Support any recursion depth.
+       * tests/refmpn.c (refmpn_mul): Restore tight allocations.
+
+       * mpz/oddfac_1.c (mpz_oddfac_1): Get ready for n!!
+       * gmp-impl.h (mpz_oddfac_1): Update signature.
+       * mpz/fac_ui.c (mpz_fac_ui): Update call to mpz_oddfac_1.
+
+2012-02-09  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmp-impl.h (ABS_CAST): New macro.
+       * mpf/cmp_si.c: Use ABS_CAST.
+       * mpf/get_si.c: Use ABS_CAST.
+       * mpf/iset_si.c: Use ABS_CAST.
+       * mpf/set_si.c: Use ABS_CAST.
+       * mpq/set_si.c: Use ABS_CAST.
+       * mpz/cmp_si.c: Use ABS_CAST.
+       * mpz/get_si.c: Use ABS_CAST.
+       * mpz/iset_si.c: Use ABS_CAST.
+       * mpz/mul_i.h: Use ABS_CAST.
+       * mpz/set_si.c: Use ABS_CAST.
+
+2012-02-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc32/divrem_2.asm: Fix off-by-one condition in invert_limb
+       code.
+
+2012-02-08  Niels Möller  <nisse@lysator.liu.se>
+
+       * doc/gmp.texi (mpz_gcdext): Clarified corner cases in cofactor
+       canonicalization.
+
+2012-02-07  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcdext.c (mpn_gcdext): Fixed assert, related to the
+       special case A = (2k+1) G, B = 2 G. Fix copied from gmp-5.0 repo.
+
+2012-02-06  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd_matrix.c (hgcd_matrix_update_q): Fixed carry
+       handling bug. Fix copied from gmp-5.0 repo, where the function is
+       found in hgcd.c.
+
+       * tests/mpz/t-gcd.c (main): Use mpz_rrandomb for test operands,
+       not mpz_urandomb. Change copied from gmp-5.0 repo.
+       * tests/mpn/t-hgcd.c (main): Likewise.
+
+2012-02-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/refmpn.c (refmpn_mul): More conservative allocations.
+
+2012-02-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/bd1/gmp-mparam.h: New file.
+
+       * longlong.h (udiv_qrnnd from sdiv_qrnnd): Declare udiv_w_sdiv.
+
+       * mpn/generic/udiv_w_sdiv.c: Use c89 function header.
+
+2012-02-03 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/fac_ui.c: mpz_oddfac_1 removed, with many related functions.
+       * mpz/oddfac_1.c: New file, mpz_oddfac_1 implementation.
+       * gmp-impl.h: mpz_oddfac_1 declaration.
+       * Makefile.am (MPZ_OBJECTS): add mpz/oddfac_1$U.lo .
+       * mpz/Makefile.am (libmpz_la_SOURCES): add oddfac_1.c .
+       * tune/Makefile.am (fac_ui.c): include mpz/oddfac_1.c .
+
+2012-02-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_interpolate_16pts.c: Correct an unlikely 32-bit bug.
+
+2012-02-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom63_mul.c: Allow s+t==n by adjusting an ASSERT.
+       * mpn/generic/toom_interpolate_8pts.c: Perform final incr iff s+t!=n.
+
+       * tests/mpn/t-toom6h.c (MIN_BN): Make more consistent with ASSERT in
+       tested function.
+
+2012-02-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpn/t-mul.c: New file.
+       * tests/mpn/Makefile.am: Compile it.
+
+2012-02-01  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h: Remove check for g++ older than 2.91.
+
+2012-02-01  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/mul.c: Added diagram on where toom functions can be
+       called.
+
+2012-02-01  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__gmp_unary_expr): Make the constructor explicit.
+       (__gmp_expr(__gmp_expr&&)): New move constructors.
+       (__gmp_expr::operator=(__gmp_expr&&)): New move assignments.
+       (swap): Mark as noexcept.
+       (__GMPXX_USE_CXX11): New macro.
+       (__GMPXX_NOEXCEPT): New macro.
+       * tests/cxx/t-cxx11.cc: New file.
+       * tests/cxx/Makefile.am: Added t-cxx11.
+
+2012-01-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm_sec.c (SQR_BASECASE_LIM): New name for
+       SQR_BASECASE_MAX.
+       (SQR_BASECASE_LIM, fat variant): Define to read __gmpn_cpuvec.
+       (SQR_BASECASE_LIM, native variant): Define to SQR_TOOM2_THRESHOLD
+       straight, without arithmetic.
+       (mpn_local_sqr): Use BELOW_THRESHOLD as per Marco's suggestion.
+
+2012-01-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-powm.c: Ensure all sizes are seen.
+
+2012-01-30  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__gmp_binary_expr): Let things happen in place: d=a+b+c
+       when d != c.
+       * tests/cxx/t-binary.cc: Test variable reuse: c=a+b+c.
+
+2012-01-28  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h: Don't compute -LONG_MIN.
+
+       * doc/gmp.texi (gmp_randclass::get_z_bits): Use mp_bitcnt_t.
+       * gmpxx.h: Replace unsigned long with mp_bitcnt_t.
+
+2012-01-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Upgrade to libtool 2.4.2.
+
+2012-01-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/mpz/t-fac_ui.c: Increase default test cases.
+
+       * mpz/prodlimbs.c: New file, mpz_prodlimbs implementation.
+       * gmp-impl.h: mpz_prodlimbs declaration.
+       * Makefile.am (MPZ_OBJECTS): add mpz/prodlimbs$U.lo .
+       * mpz/Makefile.am (libmpz_la_SOURCES): add prodlimbs.c .
+       (fac_ui.h): remove target (moved up one directory).
+       * mpz/fac_ui.c: mpz_prodlimbs removed, micro-optimisations.
+
+2012-01-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c: Remove unused tuneup variables.
+
+2012-01-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/fac_ui.c: Reduce branches in basecases.
+
+2012-01-18  Marc Glisse  <marc.glisse@inria.fr>
+
+       * doc/gmp.texi (mpf_class::mpf_class): Use mp_bitcnt_t.
+
+2012-01-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Add ultrasparc T4 support.
+
+       * demos/isprime.c (main): Run 25 millerrabin tests.
+
+2012-01-16 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/fac_ui.c (SIEVE_SEED): Define value for small limb size.
+       (mpz_oddswing_1): Reduce the number of divisions.
+       (mpz_oddfac_1): Reduce memory usage.
+       * mpn/minithres/gmp-mparam.h: Correct minimum for FAC_DSC_.
+       * tune/tuneup.c (tune_fac_ui): Likewise.
+
+2012-01-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/scan0.c (mpz_scan0): Use ~(mp_bitcnt_t) 0, rather than
+       ULONG_MAX, when returning "infinity".
+       * mpz/scan1.c (mpz_scan1): Likewise.
+
+2012-01-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/t-popc.c: Test longer bit strings.
+
+2012-01-12 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/divexact.c: Tight realloc, delayed if variables are reused.
+       * mpz/lcm.c: Smaller temp space, avoid goto.
+       * gmp-impl.h (popc_limb): avoid double & (for 8-bits limb).
+
+2012-01-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/minithres/gmp-mparam.h: New FAC_ODD_ and FAC_DSC_ thresholds.
+       * tune/tuneup.c (tune_fac_ui): Correct minimum for FAC_DSC_.
+
+2012-01-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/mul_2exp.c: Rewrite.
+       * mpz/tdiv_q_2exp.c: Rewrite.
+
+2012-01-05 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gen-fac_ui.c: Remove currently unused constants; add new odd
+       double factorial table.
+       * mpz/fac_ui.c (RECURSIVE_PROD_THRESHOLD): Increase default.
+       (mpz_oddfac_1): New function: a merge of _bc_odd and _dsc_odd.
+       (mpz_prodlimbs): More in-place computations.
+
+       * tune/tuneup.c (tune_fac_ui): min_is_always for FAC_ODD_.
+
+2012-01-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tune/tuneup.c (tune_fac_ui): Compute FAC_DSC before FAC_ODD.
+
+2011-12-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Makefile.am (fac_ui.h): Put file in top-level dir, not in mpz.
+
+2011-12-31 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tune/Makefile.am (fac_ui.c): New target.
+       (nodist_tuneup_SOURCES,CLEANFILES): Add fac_ui.c.
+       * tune/tuneup.c (mpz_fac_ui_tune): Declare prototype.
+       (fac_odd_threshold,fac_dsc_threshold): New global variables.
+       (speed_mpz_fac_ui_tune,tune_fac_ui): New functions.
+       (all): Call tune_fac_ui.
+       * gmp-impl.h (FAC_ODD_THRESHOLD,FAC_DSC_THRESHOLD):
+       New thresholds: default values, and setup for tuning.
+       (FAC_DSC_THRESHOLD_LIMIT): Define (when tuning).
+       * mpz/fac_ui.c (FAC_ODD_THRESHOLD,FAC_DSC_THRESHOLD):
+       Default values removed.
+
+2011-12-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/hamdist.c: Fix typo in a return statement.
+
+       * mpn/generic/powm_sec.c (SQR_BASECASE_MAX): Set safely from
+       SQR_TOOM2_THRESHOLD.
+
+2011-12-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/mpz/t-perfpow.c: Decrease default # of tests.
+
+2011-12-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/refmpn.c (AORS_1): Fix typo in variable type.
+
+2011-12-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/sbpi1_bdiv_q.c: Delay quotient limb stores in order to
+       allow quotient and dividend to completely overlap.
+       * mpn/generic/sbpi1_bdiv_qr.c: Likewise.
+
+2011-12-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/fac_ui.c: fac_bc_ui inlined in fac_ui.
+
+2011-12-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm_sec.c: Handle fat binaries better.
+
+       * mpz/fac_ui.c (mpz_bc_fac_1): Fix typo in allocation size.
+
+       * mpn/x86/fat/com.c: New file.
+
+       * mpn/x86_64/pentium4/aors_n.asm: Make it actually work for DOS64.
+       * mpn/x86_64/pentium4/rsh1aors_n.asm: Conditionalise jump on DOS64
+       to avoid overhead for standard ABIs.
+
+       * mpn/x86_64/gcd_1.asm: Support DOS64.
+
+2011-12-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Fix typo making HAVE_NATIVE_mpn_X fail for fat
+       functions.
+
+       * mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Add a missing break.
+
+2011-12-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gen-fac_ui.c: Generate two more tables: odd factorial, swing.
+
+       * mpz/fac_ui.c: Rewrite.
+
+2011-12-06  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd.c (mpn_hgcd): Use hgcd_reduce for first
+       recursive call.
+
+2011-12-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/mod_1_1-1.c: Redefine the mpn_ functions, not __gmpn_ (for the
+       benefit of fat builds).
+       * tune/mod_1_1-2.c: Likewise.
+
+2011-12-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/fat/lshiftc.c: New file.
+       * mpn/x86/fat/mod_1_1.c: New file.
+       * mpn/x86/fat/mod_1_2.c: New file.
+       * mpn/x86/fat/mod_1_4.c: New file.
+
+       * mpn/x86/fat/diveby3.c: Remove no longer fat function.
+       * mpn/x86_64/fat/diveby3.c: Likewise.
+
+       * mpn/x86_64/fat/gcd_1.c: Remove since always provided as asm.
+       * mpn/x86_64/fat/mode1o.c: Likewise.
+
+       * configure.in (fat_functions): Update to more relevant function set.
+       Add special handling for mod_1_N_cps functions.
+       * gmp-impl.h (struct cpuvec_t) : Corresponding changes.  Also add
+       vrious declarations for new functions.
+       * mpn/x86/x86-defs.m4 (CPUVEC_FUNCS_LIST): Corresponding changes.
+       * mpn/x86_64/x86_64-defs.m4 (CPUVEC_FUNCS_LIST): Corresponding changes.
+       * mpn/x86/fat/fat.c (__gmpn_cpuvec): Corresponding changes.
+       * mpn/x86_64/fat/fat.c (__gmpn_cpuvec): Corresponding changes.
+
+       * mpn/x86_64: Port most remaining x86_64 files to DOS64.
+
+       * mpn/x86_64/coreisbr/aors_n.asm: Add forgotten DOS64_EXIT.
+
+       * mpn/x86_64/x86_64-defs.m4 (LEA): Handle non-PIC code.
+       * mpn/x86_64/darwin.m4 (LEA): Likewise.
+
+2011-12-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fat/fat.c (MAKE_FMS): Rewrite to handle modern CPUs.
+       * mpn/x86/fat/fat.c (MAKE_FMS): Likewise.
+
+       * mpn/x86_64/darwin.m4 (PROTECT): Define to potentially useful value.
+
+2011-12-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/invert_limb_table.asm: Use PROTECT.
+       * mpn/x86_64/invert_limb.asm: Likewise.
+
+       * mpn/x86_64/darwin.m4 (PROTECT, IFELF): New defines.
+       * mpn/x86_64/dos64.m4 (PROTECT, IFELF): New defines.
+       * mpn/x86_64/x86_64-defs.m4 (PROTECT, IFELF): New defines.
+
+2011-12-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fat/fat.c: Copy fake cpuid code from x86/fat/fat.c.
+
+       * mpn/x86_64 (STD64, IFSTD): New names for ELF64, IFELF (since these
+       denote all standard calling conventions).
+
+       * mpn/x86_64: Add DOS64 ABI support to more files.
+
+       * mpn/x86_64/mod_1_1.asm: Finish DOS64 support.
+       * mpn/x86_64/mod_1_2.asm: Likewise.
+       * mpn/x86_64/mod_1_4.asm: Likewise.
+
+       * configure.in: Add GMP_NONSTD_ABI also for fat builds.
+
+       * mpn/x86_64/fat/fat_entry.asm: Rewrite to support DOS64.
+
+       * mpn/x86_64/dos64.m4 (IFDOS, IFSTD): New defines.
+       * mpn/x86_64/x86_64-defs (IFDOS, IFSTD): New defines.
+
+       * mpn/x86_64/dive_1.asm: Add DOS64 ABI support.
+       * mpn/x86_64/mode1o.asm: Likewise.
+
+       * mpn/x86_64/mod_34lsub1.asm: Enable for DOS64.
+
+       * mpn/x86_64/invert_limb.asm: Wrap .protected decl.
+
+       * gmp-impl.h (DECL_divexact_1): Fix typo in return type.
+
+       * mpn/x86_64/dos64.m4 (LEA): New define.
+       (PIC): Define.
+
+2011-11-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64: Add DOS64 ABI support to most files.
+
+2011-11-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/mul_basecase.asm: Support ABI DOS64.
+       * mpn/x86_64/sqr_basecase.asm: Support ABI DOS64.
+       * mpn/x86_64/aorsmul_1.asm: Support ABI DOS64.
+       * mpn/x86_64/mul_1.asm: Support ABI DOS64.
+
+       * mpn/x86_64/x86_64-defs.m4 (DOS64_ENTRY, DOS64_EXIT): New, empty defs.
+
+       * mpn/x86_64/dos64.m4: New file.
+
+       * mpn/asm-defs.m4 (ABI_SUPPORT): New dummy macro.
+
+       * configure.in (64-bit mingw/cygwin): Define HOST_DOS64,GMP_NONSTD_ABI.
+       No longer clear out path_64.
+       (mpn code selection loop): Handle GMP_NONSTD_ABI.
+
+       * mpn/generic/udiv_w_sdiv.c: Use CNST_LIMB for some constants.
+
+2011-11-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * x86/*: Many new gmp-mparam.h file for 64-bit CPUs in 32-bit mode.
+
+       * configure.in: Overhaul x86/x86_64 support, merging three case
+       statements into one.
+
+2011-11-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * doc/gmp.texi (Formatted Output Strings): Clarify rules for mpf_t
+       precision.
+
+       * mpn/powerpc32/p7/gmp-mparam.h: New file.
+
+       * tune/tuneup.c (tune_mu_div, tune_mu_bdiv): Up min_size to karatsuba's
+       threshold.
+
+2011-11-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/p6/aorsmul_1.asm: New file.
+
+       * configure.in: Don't fail fat builds under 64-bit DOS.
+
+       * mpn/powerpc64/mode64/aors_n.asm: Align loop for slightly better
+       power5 performance.
+
+2011-11-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-h.in (__GNU_MP_RELEASE): Renamed from typo name.
+
+2011-11-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Split x86 CPUs into more subtypes for more accurate
+       passing of gcc flags.
+
+       * mpn/powerpc32/p3-p7/aors_n.asm: New file.
+
+       * configure.in: Pass -m32 for powerpc64 with abi=32, using via _maybe
+       mechanism.
+
+       * configure.in: Support powerpc32/p3-p7 directory for affected CPUs.
+
+2011-11-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.c (routine): Add mpn_tabselect.
+       * tune/common.c (speed_mpn_tabselect): New function.
+       * tune/speed.h (SPEED_ROUTINE_MPN_COPY_CALL): New macro, made from
+       old SPEED_ROUTINE_MPN_COPY.
+       (SPEED_ROUTINE_MPN_COPY): Just invoke SPEED_ROUTINE_MPN_COPY_CALL.
+       (SPEED_ROUTINE_MPN_TABSELECT): New macro.
+
+2011-11-17  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/tuneup.c (tune_hgcd_appr): Increase stop_since_change.
+
+2011-11-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc32/tabselect.asm: New file.
+
+       * mpn/powerpc64/mode64/aorscnd_n.asm: New file.
+
+2011-11-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/speed.h (speed_mpn_hgcd_appr_lehmer): New prototype.
+       (mpn_hgcd_lehmer_itch): Likewise.
+       (mpn_hgcd_appr_lehmer): Likewise.
+       (mpn_hgcd_appr_lehmer_itch): Likewise.
+       (MPN_HGCD_LEHMER_ITCH): Deleted macro.
+
+       * tune/speed.c (routine): Added mpn_hgcd_appr_lehmer.
+
+       * tune/common.c (speed_mpn_hgcd_lehmer): Use mpn_hgcd_lehmer_itch
+       rather than similarly named macro.
+       (speed_mpn_hgcd_appr_lehmer): New function.
+
+       * tune/Makefile.am (libspeed_la_SOURCES): Added
+       hgcd_appr_lehmer.c.
+
+       * tune/hgcd_appr_lehmer.c: New file.
+
+       * tune/tuneup.c (tune_hgcd_appr): Increased min_size to 50; some
+       machines got small thresholds which appear to be bogus.
+
+2011-11-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm_sec.c (mpn_local_sqr): Remove forgotten TMP_* calls.
+       (redcify): Likewise.
+       (mpn_powm_sec): Likewise.
+
+       * mpn/generic/powm_sec.c (mpn_powm_sec): Rework scratch usage
+       (mpn_powm_sec_itch): Rewrite.
+
+       * mpn/generic/powm_sec.c (mpn_powm_sec): Use mpn_tabselect also in
+       initialisation.
+
+       * configure.in: Amend 2011-11-03 gcc_cflags change.
+
+       * mpn/powerpc64/tabselect.asm: New file.
+       * mpn/x86_64/tabselect.asm: New file.
+       * mpn/x86/tabselect.asm: New file.
+       * mpn/ia64/tabselect.asm: New file.
+
+       * mpn/asm-defs.m4 (define_mpn): Add tabselect.
+
+       * configure.in (gmp_mpn_functions): Add tabselect.
+       (HAVE_NATIVE): Add entries for addncd_n, subcnd_n, tabselect.
+
+       * mpn/generic/powm_sec.c: Remove mpn_tabselect implementation.
+       * mpn/generic/tabselect.c: New file with removed code.
+
+2011-11-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add powm_sec.c.
+
+       * mpn/generic/powm_sec.c (win_size): Use POWM_SEC_TABLE
+       (POWM_SEC_TABLE): Define default.
+
+       * tune/tuneup.c (tune_powm_sec): New function computing POWM_SEC_TABLE.
+       (all): Call new function.
+
+       * mpn/generic/powm_sec.c (win_size): Define only when
+       TUNE_PROGRAM_BUILD is not set.
+
+2011-11-13  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/tuneup.c (tune_hgcd_appr): Use default min_size.
+       (tune_hgcd_reduce): Increase max_size and step_factor, to 7000
+       and 0.04, respectively.
+
+2011-11-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/sqr_diag_addlsh1.asm: Remove.
+
+2011-11-11  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/hgcd_reduce_2.c: New file.
+       * tune/hgcd_reduce_1.c: New file.
+
+       * tune/tuneup.c (hgcd_appr_threshold): New threshold variable.
+       (hgcd_reduce_threshold): Likewise.
+       (tune_hgcd_appr): New function.
+       (tune_hgcd_reduce): New function.
+       (all): Call tune_hgcd_appr and tune_hgcd_reduce.
+
+       * tune/speed.h (speed_mpn_hgcd_reduce): Declaration.
+       (speed_mpn_hgcd_reduce_[12]): Likewise.
+       (mpn_hgcd_reduce_[12]): Likewise.
+       (SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL): New macro.
+
+       * tune/speed.c (routine): Added mpn_hgcd_reduce,
+       mpn_hgcd_reduce_1, and mpn_hgcd_reduce_2.
+
+       * tune/common.c (speed_mpn_hgcd_reduce): New function.
+       (speed_mpn_hgcd_reduce_[12]): Likewise.
+
+       * tune/Makefile.am (libspeed_la_SOURCES): Added hgcd_reduce_1.c
+       hgcd_reduce_2.c.
+       (TUNE_MPN_SRCS_BASIC): Added hgcd_appr.c and hgcd_reduce.c.
+
+       * mpn/generic/hgcd_appr.c (submul, hgcd_matrix_apply): Deleted
+       functions, earlier copied to hgcd_reduce.c.
+       (mpn_hgcd_appr): Use hgcd_reduce.
+
+2011-11-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/sqr_basecase.asm: New file.
+
+       * mpn/x86_64/aorscnd_n.asm: New file.
+
+       * tune/speed.c (routine): Add measuring of mpn_addcnd_n, mpn_subcnd_n.
+       * tune/common.c (speed_mpn_addcnd_n,speed_mpn_subcnd_n): New functions.
+       * tune/speed.h: Declare them.
+
+       * tests/devel/try.c: Add tests for mpn_addcnd_n and mpn_subcnd_n.
+       * tests/refmpn.c (refmpn_addcnd_n, refmpn_subcnd_n): New functions.
+       * tests/tests.h: Declare them.
+
+       * configure.in (gmp_mpn_functions): Add addcnd_n and subcnd_n.
+
+2011-11-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/redc_1.c: Just reduce U operand using Hensel norm, but
+       not fully canonically; leave add_n and conditional sub_n to caller.
+       Therefore omit R argument.
+
+       * mpn/generic/redc_1_sec.c: Remove.
+
+       * gmp-impl.h (mpn_redc_1): Update declaration.
+       (mpn_redc_1_sec): Remove declaration.
+
+       * configure.in (gmp_mpn_functions): Remove redc_1.
+
+       * mpn/x86_64/redc_1.asm: Adopt to new defined functionality/interface.
+       * tune/speed.h (SPEED_ROUTINE_REDC_1): Likewise.
+
+       * tests/refmpn.c (refmpn_redc_1): Likewise; also call refmpn_addmul_1
+       instead of mpn_addmul_1.
+
+       * mpn/generic/powm.c (MPN_REDC_1): New macro, use for mpn_redc_1.
+       * mpn/generic/powm_sec.c (MPN_REDC_1_SEC): New macro, use for
+       mpn_redc_1_sec.
+
+2011-11-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * dumbmp.c (mpz_sub): Abort for non-handled case.
+
+       * mpn/powerpc64/mode64/lshiftc.asm: Move file from here...
+       * mpn/powerpc64/lshiftc.asm: ...to here, with trivial modifications.
+
+       * configure.in: Pass -m32 in more cases, using _maybe mechanism.
+       Inherit default gcc_cflags in more places.
+
+       * mpn/powerpc64/mode64/p7/gmp-mparam.h: New file.
+
+2011-11-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_64/invert_limb.asm: Slight optimisation.
+
+       * configure.in (s390): Set gcc_32_cflags_maybe.
+
+       * mpn/s390_32/gmp-mparam.h: Put in proper data.
+       * mpn/s390_32/esame/gmp-mparam.h: New file.
+
+       * mpn/x86_64/bobcat/gmp-mparam.h: New file.
+
+       * mpn/s390_32/lshift.asm: New file.
+       * mpn/s390_32/rshift.asm: New file.
+       * mpn/s390_32/lshiftc.asm: New file.
+
+2011-10-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/sqr_diagonal.asm: Move from here...
+       * mpn/powerpc64/mode32/sqr_diagonal.asm: ...to here.
+
+       * mpn/powerpc64/mode64/sqr_diag_addlsh1.asm: New file.
+
+       * mpn/s390_64/sqr_basecase.asm: Rewrite sqr_diag_addlsh1 code.
+       * mpn/s390_32/esame/sqr_basecase.asm: Likewise.
+
+2011-10-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_64/lshift.asm: Complete rewrite.
+       * mpn/s390_64/rshift.asm: Likewise.
+
+       * mpn/s390_64/lshiftc.asm: New file.
+
+2011-10-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_32/esame/aors_n.asm: New file, with rewritten add/sub code.
+
+2011-10-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       From Per Olofsson:
+       * gmp-impl.h (BSWAP_LIMB): Rename variable to avoid BSWAP_LIMB_FETCH
+       clash.
+
+       * mpn/s390_32/esame/mul_basecase.asm: New file.
+
+       * mpn/s390_32/esame/sqr_basecase.asm: New file.
+
+       * mpn/s390_32/logops_n.asm: New file.
+
+       * mpn/s390_64/logops_n.asm: Fix rp=up code.  Remove a leftover insn.
+
+2011-10-26  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h (mpn_hgcd_reduce, mpn_hgcd_reduce_itch): Added
+       prototypes.
+       (HGCD_APPR_THRESHOLD): Set up threshold for tuning.
+       (HGCD_REDUCE_THRESHOLD): Likewise.
+
+       * configure.in (gmp_mpn_functions): Added hgcd_reduce.
+
+       * mpn/generic/hgcd_reduce.c: New file.
+
+2011-10-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/sqr_basecase.asm: Put intermediate result into R, don't
+       allocate any stack space.
+
+2011-10-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_64/logops_n.asm: Use nc, oc, xc when possible.
+
+       * tune/common.c (speed_mpn_and_n, speed_mpn_andn_n, etc):
+       Pass correct input args.
+
+       * mpn/s390_64/mod_34lsub1.asm: Use llgfr for zero extensions.
+
+       * mpn/s390_64/mul_basecase.asm: New file.
+
+       * mpn/s390_64/sqr_basecase.asm: New file.
+       * mpn/s390_64/sqr_diag_addlsh1.asm: Removed, lives on in sqr_basecase.
+
+       * mpn/s390_64/bdiv_dbm1c.asm: Shave off 1 c/l.
+
+       * mpn/s390_64/aorrlsh1_n.asm: New file, developed from aorslsh1_n.asm.
+       * mpn/s390_64/sublsh1_n.asm: New file.
+       * mpn/s390_64/aorslsh1_n.asm: Remove file.
+
+2011-10-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_64/logops_n.asm: New file.
+
+       * mpn/s390_64/aors_n.asm: New file, with rewritten add/sub code.
+
+2011-10-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_SQR_DIAL_ADDLSH1_CALL): New macro.
+       * tune/common.c (speed_mpn_sqr_diag_addlsh1): New function.
+       * tune/speed.c (routine): Measure mpn_sqr_diag_addlsh1.
+
+       * mpn/s390_64/sqr_diag_addlsh1.asm: Rewrite like s390_32/esame code.
+
+       * mpn/s390_32/esame/sqr_diag_addlsh1.asm: Save just needed registers.
+
+2011-10-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_32/esame/add_n.asm: Rewrite, similar to s390_64 code.
+       * mpn/s390_32/esame/add_n.asm: Likewise.
+
+2011-10-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_32/esame/aorslsh1_n.asm: New file.
+
+2011-10-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_32/esame/sqr_diag_addlsh1.asm: New file.
+
+       * mpn/s390_32/copyi.asm: New file.
+       * mpn/s390_32/copyd.asm: New file.
+
+       * mpn/s390_64/copyd.asm: Optimise.
+
+       * mpn/s390_64/copyi.asm: Rewrite along the lines of glibc memcpy.
+
+       * mpn/s390_64/aorslsh1_n.asm: New file.
+
+       * mpn/s390_64/mod_34lsub1.asm: New file.
+
+       * mpn/s390_64/sqr_diag_addlsh1.asm: New file.
+
+2011-10-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (s390): Rewrite support to handle known CPUs.
+       * config.guess: Recognise s390 CPUs.
+       * config.sub: Match s390 CPUs.
+       * acinclude.m4 (S390_PATTERN, S390X_PATTERN): New defines.
+
+2011-10-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       From Per Olofsson:
+       * mpn/generic/popham.c: Add __GMP_NOTHROW to make it match gmp.h.
+       * mpn/generic/gcd_1.c: Separate declarations and initialisers for the
+       benefit of C++.
+
+       * configure.in: AC_DEFINE HAVE_HOST_CPU_s390_zarch.
+       * longlong.h (s390): Use it.
+       (s390 umul_ppmm): Fix typo in pure C variant.
+
+2011-10-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h (s390): Put back an accidentally deleted #else.
+
+       * configure.in (s390): Unset extra_functions for s390x.
+
+2011-10-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_64/lshift.asm: Reduce register usage.
+       * mpn/s390_64/rshift.asm: Likewise.
+
+       * longlong.h (s390 umul_ppmm): With new-enough gcc, avoid asm.
+
+       From Andreas Krebbel:
+       * longlong.h (s390 umul_ppmm): Support 32-bit limbs with gcc using
+       64-bit registers.
+       (s390 udiv_qrnnd): Likewise.
+
+2011-10-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (s390x): Pass -mzarch to gcc in 32-bit mode.
+
+       * longlong.h (s390x): Add __CLOBBER_CC for relevant asm patterns.
+       * mpn/generic/mod_1_1.c (s390x add_mssaaaa): Likewise.
+
+       * mpn/s390_64/copyd.asm: New file.
+
+2011-10-10  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd_appr.c: Deleted debugging code.
+
+       * tests/mpn/t-hgcd_appr.c (main): Added -v flag.
+       (hgcd_appr_valid_p): Increased margin of non-minimality for
+       divide-and-conquer algorithm. Display bit counts only if
+       -v is used.
+
+       * mpn/generic/hgcd_appr.c (submul): New (static) function.
+       (hgcd_matrix_apply): New function.
+       (mpn_hgcd_appr_itch): Account for divide-and-conquer algorithm.
+       (mpn_hgcd_appr): Implemented divide-and-conquer.
+
+2011-10-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mod_1_1.c (add_mssaaaa): Add s390x variant.  Put arm code
+       inside __GNUC__.
+
+       * tune/time.c (STCK): Use proper memory constraint.
+
+       From Marco Trudel:
+       * tests/mpz/t-scan.c (check_ref): Fix loop end bound.
+
+2011-10-10  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h: (HGCD_APPR_THRESHOLD): New threshold.
+
+       * mpn/generic/hgcd_appr.c (mpn_hgcd_appr): Interface change.
+       Destroy inputs, let caller make working copies if needed.
+       (mpn_hgcd_appr_itch): Reduced scratch need.
+       * gmp-impl.h: Updated mpn_hgcd_appr prototype.
+       * tests/mpn/t-hgcd_appr.c (one_test): Make working copies for
+       hgcd_appr.
+       * tune/common.c (speed_mpn_hgcd_appr): Use SPEED_ROUTINE_MPN_HGCD_CALL.
+       * tune/speed.h (SPEED_ROUTINE_MPN_HGCD_APPR_CALL): Deleted.
+
+2011-10-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_64/copyi.asm: New file.
+       * mpn/s390_64/lshift.asm: New file.
+       * mpn/s390_64/rshift.asm: New file.
+
+       * mpn/s390_64/add_n.asm: Rewrite using lmg/stmg.
+       * mpn/s390_64/sub_n.asm: Likewise.
+
+       * mpn/s390_64/invert_limb.asm: Save a callee-saves register less.
+
+       * tune/time.c (getrusage_backwards_p): Properly cast printed values.
+
+       * longlong.h (s390x): Put back UDItype casts to make gcc reloading use
+       right more for constants.
+       (s390x count_leading_zeros): Disable until we support z10 specifically.
+       (s390x add_ssaaaa): Remove algsi/slgsi until we support z10.
+
+2011-10-09  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/hgcd_matrix.c (mpn_hgcd_matrix_adjust): Declare
+       matrix argument const.
+
+2011-10-08  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/t-hgcd_appr.c (hgcd_appr_valid_p): Adjusted the
+       allowed margin of non-minimality for hgcd_appr.
+
+       * mpn/generic/hgcd_appr.c (mpn_hgcd_appr): Fixed handling of
+       extra_bits, starting at zero, to ensure that we don't produce too
+       small remainders. Added a final reduction loop when we we
+       otherwise terminate with extra_bits > 0, to make the returned
+       remainders closer to minimal.
+
+2011-10-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * longlong.h (s390): Add 32-bit zarch umul_ppmm and udiv_qrnnd.
+       (s390): Overhaul 32-bit and 64-bit code.
+
+2011-10-07  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/speed.h (speed_mpn_hgcd_appr): New prototype.
+       (SPEED_ROUTINE_MPN_HGCD_APPR_CALL): New macro.
+       * tune/common.c (speed_mpn_hgcd_appr): New function.
+       * tune/speed.c (routine): Added mpn_hgcd_appr.
+
+       * tests/mpn/t-hgcd_appr.c: New file.
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-hgcd_appr.
+
+       * configure.in (gmp_mpn_functions): Added hgcd_step and hgcd_appr.
+
+       * gmp-impl.h: Added prototypes for mpn_hgcd_step,
+       mpn_hgcd_appr_itch and mpn_hgcd_appr.
+
+       * mpn/generic/hgcd_appr.c: New file.
+
+       * mpn/generic/hgcd_step.c: New file, extracted from hgcd.c.
+       (mpn_hgcd_step): Renamed, from...
+       * mpn/generic/hgcd.c (hgcd_step): ...old name. Renamed and moved
+       to hgcd_step.c.
+       (hgcd_hook): Also moved to hgcd_step.c.
+       (mpn_hgcd): Updated for hgcd_step renaming.
+
+2011-10-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_64/invert_limb.asm: New file.
+
+2011-10-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/s390_64/submul_1.asm: New file.
+       * mpn/s390_32/esame/submul_1.asm: New file.
+
+       * mpn/generic/mulmid.c (mpn_mulmid): Move a TMP_DECL to block start.
+
+       * mpn/Makefile.am (TARG_DIST): Add s390_32 and s390_64, remove s390 and
+       z8000x.
+
+       * doc/gmp.texi (Custom Allocation): Rephrase a paragraph.
+
+       * demos/factorize.c: Run 25 Miller-Rabin tests.
+
+       * mpz/nextprime.c: Run 25 mpz_millerrabin tests (was 10).
+
+2011-10-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Support s390x.
+
+       * longlong.h: Add support for 64-bit s390x.
+
+       * mpn/s390_64: New directory.
+       * mpn/s390_64/add_n.asm: New file.
+       * mpn/s390_64/sub_n.asm: New file.
+       * mpn/s390_64/mul_1.asm: New file.
+       * mpn/s390_64/addmul_1.asm: New file.
+       * mpn/s390_64/bdiv_dbm1c.asm: New file.
+       * mpn/s390_64/gmp-mparam.h: New file, taken from x86_64.
+
+       * mpn/s390_32: Directory renamed from mpn/s390.
+       * mpn/s390_32/gmp-mparam.h: New file, taken from x86_64.
+       * mpn/s390_32/esame/add_n.asm: New file.
+       * mpn/s390_32/esame/sub_n.asm: New file.
+       * mpn/s390_32/esame/mul_1.asm: New file.
+       * mpn/s390_32/esame/addmul_1.asm: New file.
+       * mpn/s390_32/esame/bdiv_dbm1c.asm: New file.
+
+2011-10-03  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpn/Makefile.am (check_PROGRAMS): Added t-mulmid.
+       * tests/mpn/t-mulmid.c: New file.
+
+       mulmid-related assembly for x86_64, from David Harvey:
+       * mpn/asm-defs.m4 (define_mpn): Added [add,sub]_err[1,2,3]_n and
+       mulmid_basecase. Also use m4_not_for_expansion on the
+       corresponding OPERATION_* symbols.
+       * mpn/x86_64/aors_err1_n.asm: New file.
+       * mpn/x86_64/aors_err2_n.asm: Likewise.
+       * mpn/x86_64/aors_err3_n.asm: Likewise.
+       * mpn/x86_64/mulmid_basecase.asm: Likewise.
+       * mpn/x86_64/core2/aors_err1_n.asm: Likewise.
+       * mpn/x86_64/gmp-mparam.h (MULMID_TOOM42_THRESHOLD): New value.
+       * mpn/x86_64/core2/gmp-mparam.h (MULMID_TOOM42_THRESHOLD): Likewise.
+
+       Tuning of mulmid, from David Harvey:
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Added mulmid.c
+       mulmid_n.c toom42_mulmid.c.
+       * tune/speed.h: Prototypes for mulmid-related functions.
+       (struct speed_params): Increased max number of sources to 5.
+       (SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL): New macro.
+       (SPEED_ROUTINE_MPN_BINARY_ERR1_N): Likewise.
+       (SPEED_ROUTINE_MPN_BINARY_ERR2_N): Likewise.
+       (SPEED_ROUTINE_MPN_BINARY_ERR3_N): Likewise.
+       (SPEED_ROUTINE_MPN_MULMID): Likewise.
+       (SPEED_ROUTINE_MPN_MULMID_N): Likewise.
+       (SPEED_ROUTINE_MPN_TOOM42_MULMID): Likewise.
+       * tune/common.c (mpn_[add,sub]_err[1,2,3]_n): New functions.
+       (speed_mpn_mulmid_basecase): New function.
+       (speed_mpn_mulmid): New function.
+       (speed_mpn_mulmid_n): New function.
+       (speed_mpn_toom42_mulmid): New function.
+       * tune/speed.c (routine): Added mpn_[add,sub]_err[1,2,3]_n,
+       mpn_mulmid_basecase, mpn_toom42_mulmid, mpn_mulmid_n, and
+       mpn_mulmid.
+       * tune/tuneup.c (mulmid_toom42_threshold): New threshold variable.
+       (tune_mulmid): New function.
+       (all): Call tune_mulmid.
+
+       Testing of mulmid, from David Harvey:
+       * tests/refmpn.c (AORS_ERR1_N): New macro.
+       (refmpn_add_err1_n, refmpn_sub_err1_n): New functions.
+       (AORS_ERR2_N): New macro.
+       (refmpn_add_err2_n, refmpn_sub_err2_n): New functions.
+       (AORS_ERR3_N): New macro.
+       (refmpn_add_err3_n, refmpn_sub_err3_n): New functions.
+       (refmpn_mulmid_basecase): New function.
+       (refmpn_toom42_mulmid): New function, wrapper for
+       refmpn_mulmid_basecase.
+       (refmpn_mulmid_n): Likewise.
+       (refmpn_mulmid): Likewise.
+       * tests/tests.h: Prototypes for new functions.
+       * tests/devel/try.c (NUM_SOURCES): Increased to 5.
+       (struct try_t): Use NUM_SOURCES and NUM_DESTS constants.
+       (SIZE_4, SIZE_6, SIZE_DIFF_PLUS_3, SIZE_ODD): New constants.
+       (OVERLAP_NOT_DST2): New flag.
+       (param_init): New mulmid-related operation types.
+       (mpn_toom42_mulmid_fun): New function.
+       (choice_array): Added mulmid-related entries.
+       (overlap_array): Extended for larger NUM_SOURCES.
+       (OVERLAP_COUNT): Handle OVERLAP_NOT_DST2.
+       (call): Support mulmid-related functions.
+       (pointer_setup): Handle SIZE_4, SIZE_6, and SIZE_DIFF_PLUS_3.
+       (SIZE_ITERATION): Handle SIZE_ODD.
+       (SIZE2_FIRST): Handle SIZE_CEIL_HALF.
+       (SIZE2_LAST): Likewise.
+
+       Implementation of mulmid, from David Harvey:
+       * mpn/generic/add_err1_n.c (mpn_add_err1_n): New file and function.
+       * mpn/generic/add_err2_n.c (mpn_add_err2_n): Likewise.
+       * mpn/generic/add_err3_n.c (mpn_add_err3_n): Likewise.
+       * mpn/generic/sub_err1_n.c (mpn_sub_err1_n): Likewise.
+       * mpn/generic/sub_err2_n.c (mpn_sub_err2_n): Likewise.
+       * mpn/generic/sub_err3_n.c (mpn_sub_err3_n): Likewise.
+       * mpn/generic/mulmid_basecase.c (mpn_mulmid_basecase): Likewise.
+       * mpn/generic/mulmid_n.c (mpn_mulmid_n): Likewise.
+       * mpn/generic/toom42_mulmid.c (mpn_toom42_mulmid): Likewise.
+       * configure.in (gmp_mpn_functions): Added mulmid-related
+       functions.
+       (GMP_MULFUNC_CHOICES): Handle aors_err1_n, aors_err2_n, and
+       aors_err3_n.
+       * gmp-impl.h: Added prototypes for mulmid functions.
+       (MPN_TOOM42_MULMID_MINSIZE): New constant.
+       (MULMID_TOOM42_THRESHOLD): New threshold.
+       (mpn_toom42_mulmid_itch): New macro.
+
+2011-10-03  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/tune-gcd-p.c (main): Fixed broken loop conditions.
+
+2011-09-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sh/sh2/submul_1.asm: Make this old submul_1 implementation
+       actually compute intended function.
+
+       * longlong.h (SH): Recognise predefs for all SH processors as defined
+       by current gcc versions.
+
+2011-09-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sh: Migrate files to '.asm'.
+       * configure.in: Recognise sh3 and sh4.
+
+2011-09-21  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (mpz_class::swap): New function.
+       (mpq_class::swap): Likewise.
+       (mpf_class::swap): Likewise.
+       (swap): New function.
+       * tests/cxx/t-assign.cc: Test the above.
+       * doc/gmp.texi (swap): Document the above.
+
+2011-08-21  Marc Glisse  <marc.glisse@inria.fr>
+
+       * tests/cxx/t-ops2.cc: check mul-div by 2.
+
+       * gmpxx.h (__GMPXX_CONSTANT): New macro (__builtin_constant_p).
+       (__gmp_binary_lshift): Move before multiplication. Optimize x << 0.
+       (__gmp_binary_rshift): Move before division. Optimize x >> 0.
+       (__gmp_binary_plus): Optimize x + 0. Rewrite rational + integer.
+       (__gmp_binary_minus): Optimize x - 0 and 0 - x.
+       Rewrite rational - integer.
+       (__gmp_binary_multiplies): Optimize x * 2^n.
+       (__gmp_binary_divides): Optimize x / 2^n.
+       (__gmp_binary_*): Deduplicate code for symmetric operations.
+
+2011-08-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * printf/doprntf.c (__gmp_doprnt_mpf): For DOPRNT_CONV_FIXED, ask for
+       one more digit.
+
+2011-08-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpf/sub.c: Fix typo in copy condition.  Delay an allocation.
+
+2011-08-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (LIMBS_PER_DIGIT_IN_BASE): Fix typo.
+
+2011-08-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (DIGITS_IN_BASEGT2_FROM_BITS): New.
+       (DIGITS_IN_BASE_FROM_BITS): Compute more accurate result.
+       (MPN_SIZEINBASE): Use DIGITS_IN_BASEGT2_FROM_BITS.
+
+       * tests/rand/t-lc2exp.c (check_bigc): Call abort after reporting error.
+
+2011-08-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/out_str.c (mpz_out_str): Reinsert accidentally deleted str_size
+       adjustment.
+
+       * gmp-impl.h (DIGITS_IN_BASE_FROM_BITS): Simplify, also avoiding
+       overflow for base 2.
+
+2011-08-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (struct bases): Add log2b and logb2 field, remove
+       chars_per_limb_exactly field.
+       (DIGITS_IN_BASE_FROM_BITS): New.
+       (DIGITS_IN_BASE_PER_LIMB): New.
+       (LIMBS_PER_DIGIT_IN_BASE): New.
+       * gen-bases.c: Generate log2b and logb2 fields; do not generate
+       chars_per_limb_exactly field.
+       * mpf/get_str.c mpf/out_str.c mpf/set_str.c mpn/generic/get_str.c
+         mpn/generic/sizeinbase.c mpq/get_str.c mpz/inp_str.c mpz/out_str.c
+         mpz/set_str.c printf/doprntf.c tune/speed.h tune/tuneup.c:
+       Use new macros.
+
+2011-08-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * dumbmp.c (mpz_root): Reinsert accidentally removed line.
+
+2011-08-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * dumbmp.c (mpz_tdiv_qr): Correctly handle dividend value being equal
+       to divisor value.
+       (mpz_root): Create reasonable starting approximation.
+       (mpz_sqrt): New function.
+       (mpz_mul_2exp): Add faster block shifting code, disabled for now.
+
+2011-07-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/arm/invert_limb.asm: Swap around some registers to silence 'as'
+       warnings.
+
+2011-07-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/dcpi1_bdiv_q.c (mpn_dcpi1_bdiv_q): Get mpn_sub_1 size
+       argument right.
+
+2011-07-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/misc/t-locale.c: Disable test for mingw.
+
+       * configure.in (x86_64 *-*-mingw*): Handle also cygwin here; clear out
+       extra_functions_64.
+
+2011-07-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Don't print newline in x86 cpuid function.
+       Rewrite x86-64 cpu recognition asm code to work under Windoze.
+
+2011-06-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * acinclude.m4 (GMP_ASM_RODATA): Fix typo in 2011-04-20 change.
+
+       * configure.in: Surround tr ranges with [] for portability.
+
+2011-05-25  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/tune-gcd-p.c (search): New function to search for minimum.
+       (main): Replaced slow linear search.
+
+2011-05-24  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/Makefile.am (EXTRA_PROGRAMS): Added tune-gcd-p. Also added
+       related automake variables.
+
+       * mpn/Makefile.am (tune-gcd-p): Deleted target.
+
+       * tune/tune-gcd-p.c: New file, extracted from mpn/generic/gcd.c
+       and updated.
+       * mpn/generic/gcd.c: Deleted the corresponding code, including
+       main function.
+
+2011-05-23  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/jacobi.c (mpz_jacobi): Simplied by swapping operands when
+       needed, to get asize >= bsize. Use the reciprocity law generalized
+       to work when one operand is even.
+
+2011-05-22  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/jacobi.c (mpz_jacobi): Another bugfix for the asize == 1
+       case. Sometimes, powers of two in b were taken into account twice.
+
+2011-05-21  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/jacobi.c (mpz_jacobi): The handling of asize == 1 was
+       broken. Rewrote it.
+
+       * tests/mpz/t-jac.c (mpz_nextprime_step): Sanity check that prime
+       candidate and step has no common factor.
+       (check_data): Added some test cases related to the asize == 1 case
+       in mpz_jacobi.
+
+2011-05-20  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h: Jacobi-related prototypes.
+
+       * configure.in (gmp_mpn_functions): Added jacobi_2, jacobi,
+       hgcd2_jacobi, hgcd_jacobi, and removed jacobi_lehmer.
+
+       * mpz/jacobi.c (STRIP_TWOS): Deleted macro.
+       (mpz_jacobi): Partially rewritten, to no longer makes the A
+       operand odd. Use new mpn_jacobi_n.
+
+       * mpn/generic/jacobi_lehmer.c: Deleted file.
+
+       * mpn/generic/jacobi.c (mpn_jacobi_n): New subquadratic jacobi
+       implementation. Supersedes jacobi_lehmer.c.
+
+       * mpn/generic/hgcd_jacobi.c (mpn_hgcd_jacobi): New file and
+       function. A copy of mpn_hgcd, using mpn_hgcd2_jacobi, and with calls to
+       mpn_jacobi_update when appropriate.
+
+       * mpn/generic/jacobi_2.c (mpn_jacobi_2): New file. Extracted from
+       jacobi_lehmer.c.
+       * mpn/generic/hgcd2_jacobi.c (mpn_hgcd2_jacobi): Likewise.
+
+       * mpn/generic/hgcd.c (hgcd_hook): Avoid using NULL.
+
+2011-05-19  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/hgcd_lehmer.c (__gmpn_hgcd_itch): Don't rename symbols for
+       the functions moved to hgcd_matrix.c.
+
+       * configure.in (gmp_mpn_functions): Added hgcd_matrix.
+
+       * mpn/generic/hgcd.c (hgcd_matrix_update_1): Deleted. Several other
+       helper functions moved to hgcd_matrix.c, see below.
+       (hgcd_hook): New function.
+       (hgcd_step): Simplified, using mpn_gcd_subdiv_step and hgcd_hook.
+
+       * mpn/generic/hgcd_matrix.c: New file.
+       (mpn_hgcd_matrix_init): Moved here, from hgcd.c.
+       (mpn_hgcd_matrix_update_q): Likewise.
+       (mpn_hgcd_matrix_mul_1): Likewise.
+       (mpn_hgcd_matrix_mul): Likewise.
+       (mpn_hgcd_matrix_adjust): Likewise.
+
+       * mpn/generic/gcd_subdiv_step.c (mpn_gcd_subdiv_step): New
+       argument s, for use by hgcd.
+       * gmp-impl.h (mpn_gcd_subdiv_step): Update declaration.
+
+       * mpn/generic/gcd.c (mpn_gcd): Pass s = 0 to mpn_gcd_subdiv_step.
+       * mpn/generic/gcdext.c (mpn_gcdext): Likewise. Also added an ASSERT.
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Likewise.
+       (mpn_gcdext_hook): Added some ASSERTs.
+       * mpn/generic/jacobi_lehmer.c (mpn_jacobi_lehmer): Likewise.
+
+2011-05-17  Niels Möller  <nisse@lysator.liu.se>
+
+       * doc/gmp.texi (mpn_gcd, mpn_gcdext): Document input requirements:
+       Must have un >= vn > 0, and V normalized.
+       * mpn/generic/gcdext.c (mpn_gcdext): Added ASSERT for input
+       normalization.
+       * mpn/generic/gcd.c (mpn_gcd): Added ASSERTs for input
+       requirements.
+
+2011-05-15  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (operator<<): Dedup.
+       * tests/cxx/t-iostream.cc: Test on compound types.
+
+       * gmpxx.h (__gmp_binary_expr): Let things happen in place: c=(a+b)/2.
+
+2011-05-10  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__gmp_unary_expr): Let things happen in place: c=-(a+b).
+       (operator>>): Clean the commenting out.
+       * tests/cxx/t-iostream.cc: New file.
+       * tests/cxx/Makefile.am: Added t-iostream.
+
+2011-05-10  Niels Möller  <nisse@lysator.liu.se>
+
+       * doc/gmp.texi (mpz_gcd): Document that gcd(0,0) = 0.
+       (mpz_gcdext): Document range for cofactors.
+
+2011-05-09  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/gcdext.c (mpz_gcdext): Increased sp allocation to bsize+1 limbs.
+       * doc/gmp.texi (mpn_gcdext): Fixed documentation of allocation
+       requirements; one extra limb is still needed for S.
+
+2011-05-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/fat/gmp-mparam.h (BMOD_1_TO_MOD_1_THRESHOLD): Define.
+       * mpn/x86_64/fat/gmp-mparam.h (BMOD_1_TO_MOD_1_THRESHOLD): Define.
+
+2011-05-08  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h: Replace unsigned long with mp_bitcnt_t in many places.
+       * doc/gmp.texi: Likewise.
+
+2011-05-06  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (mpz_class): Make constructor from mp[qf]_class explicit.
+       (mpq_class): Make constructor from mpf_class explicit.
+       * doc/gmp.texi: Document the above.
+       * NEWS: Likewise, and mention the EOF istream fix.
+       * tests/cxx/t-mix.cc: New file.
+       * tests/cxx/Makefile.am: Added t-mix.
+
+       * tests/cxx/t-assign.cc: Minor tweak.
+       * tests/cxx/t-misc.cc: Likewise.
+
+       * gmpxx.h (__gmp_resolve_temp): Remove.
+       (__gmp_set_expr): Remove some overloads.
+       (mpq_class): mpz_init_set the numerator and denominator instead of
+       mpq_init + mpq_set.
+       (mpz_class): Dedup the string constructors.
+       (mpq_class): Likewise.
+
+       * tests/cxx/t-ops3.cc: New file.
+       * tests/cxx/Makefile.am: Added t-ops3.
+
+2011-05-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/gcdext.c: Correct sgn computation.
+       Use MPZ_REALLOC.
+
+2011-05-05  Marc Glisse  <marc.glisse@inria.fr>
+
+       * mpn/x86_64/fat/fat.c: Update for Sandy Bridge.
+       * config.guess: warning to keep it in sync with fat.c.
+
+2011-05-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fat/fat_entry.asm (PIC_OR_DARWIN): New symbol.  Use it to
+       work around Darwin problems.
+
+2011-05-04  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/gcdext.c (mpz_gcdext): Reduced temporary allocations. Use
+       mpz_divexact when computing the second cofactor.
+
+2011-05-03  David Harvey  <dmharvey@cims.nyu.edu>
+
+       * configure.in: make invert_limb_table work correctly with
+       --disable-assembly (from Niels Moller)
+
+2011-05-02  Marc Glisse  <marc.glisse@inria.fr>
+
+       * .bootstrap: libtoolize doesn't need -c.
+
+       * configfsf.guess: Update to version of 2011-02-02.
+       * configfsf.sub: Update to version of 2011-03-23.
+
+2011-05-02  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/gcdext.c (mpz_gcdext): Don't allocate extra limbs at the end
+       of mpn_gcdext parameters.
+
+       * doc/gmp.texi (mpn_gcdext): Updated doc.
+
+2011-05-01  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/div_qr_2u_pi1.c (mpn_div_qr_2u_pi1): Fixed ASSERT.
+
+2011-04-30  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmp-h.in (mpz_cdiv_q_2exp): Use mp_bitcnt_t to match the definition
+       and the documentation.
+       (mpz_remove): Likewise.
+       (mpf_eq): Likewise.
+
+       * ltmain.sh: Remove.
+       * .bootstrap: Let libtoolize generate ltmain.sh.
+
+       * tests/cxx/t-ops2.cc: Add a couple tests.
+       * tests/cxx/t-rand.cc: Likewise.
+
+       * doc/gmp.texi (mpf_urandomb): Explicit the fact that it does not
+       change the precision.
+
+       * gmp-h.in (__GMP_EXTERN_INLINE): Recent g++ uses gnu_inline.
+
+2011-04-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (x86_64): Support bobcat specifically.
+       (x86): Match bobcat and bulldozer, handle like k10.
+
+2011-04-28  David Harvey  <dmharvey@cims.nyu.edu>
+
+       * README.HG: update autotools version numbers.
+
+2011-04-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.h (speed_cyclecounter): Always use PIC variant when
+       compiled with Apple's GCC.
+
+       * mpn/x86/darwin.m4 (LEA): Complete rewrite.
+       (m4append): New macro.
+
+2011-04-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sparc32/sparc-defs.m4 (changecom): Don't redefine '!' as it
+       interferes with expressions.
+
+2011-04-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * acinclude.m4 (GMP_ASM_RODATA): Make 'foo' larger to avoid clang
+       problems.
+
+2011-04-12  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86_64/invert_limb.asm [PIC]: Declare mpn_invert_limb_table
+       as .protected.
+
+2011-04-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k7/invert_limb.asm: Use deflit for Darwin bug workaround.
+       Undo 2011-03-28 change.
+
+       * mpn/asm-defs.m4 (define_mpn): Use deflit.
+
+2011-04-10  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/asm-defs.m4 (define_mpn): Added invert_limb_table.
+
+       * configure.in: Add invert_limb_table to extra_functions_64 on
+       x86_64.
+
+       * mpn/x86_64/invert_limb.asm: Changed references from approx_tab
+       mpn_invert_limb_table.
+
+       * mpn/x86_64/invert_limb_table.asm (mpn_invert_limb_table): New
+       file. Extracted approximation table from invert_limb.asm, renamed
+       and made global.
+
+2011-03-30  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86_64/div_qr_2u_pi1.asm: New file.
+
+       * configure.in (gmp_mpn_functions): Add div_qr_2u_pi1.
+
+       * gmp-impl.h (mpn_div_qr_2u_pi1): Declare.
+
+       * mpn/generic/div_qr_2u_pi1.c (mpn_div_qr_2u_pi1): Moved to
+       separate file, from...
+       * mpn/generic/div_qr_2.c: ... old location.
+
+       * mpn/generic/div_qr_2n_pi1.c: Renamed file, from...
+       * mpn/generic/div_qr_2_pi1_norm.c: ...old name.
+       * mpn/x86_64/div_qr_2n_pi1.asm: Renamed file, from...
+       * mpn/x86_64/div_qr_2_pi1_norm.asm: ...old name.
+
+       * gmp-impl.h (mpn_div_qr_2n_pi1): Use new name in declaration.
+       * tune/speed.h (speed_mpn_div_qr_2n): Likewise.
+       (speed_mpn_div_qr_2u): Likewise.
+
+       * tune/tuneup.c (tune_div_qr_2): Use new name speed_mpn_div_qr_2n.
+
+       * tune/speed.c (routine): Use new names mpn_div_qr_2n and
+       mpn_div_qr_2u, also on the command line.
+
+       * tune/common.c (speed_mpn_div_qr_2n): Renamed, from...
+       (speed_mpn_div_qr_2_norm): ... old name.
+       (speed_mpn_div_qr_2u): Renamed, from...
+       (speed_mpn_div_qr_2_unnorm): ... old name.
+
+       * mpn/generic/div_qr_2_pi1_norm.c (mpn_div_qr_2n_pi1): Renamed,
+       from...
+       (mpn_div_qr_2_pi1_norm): ...old name.
+       * mpn/x86_64/div_qr_2_pi1_norm.asm: Likewise.
+
+       * mpn/generic/div_qr_2.c (mpn_div_qr_2n_pi2): Renamed, from...
+       (mpn_div_qr_2_pi2_norm): ... old name.
+       (mpn_div_qr_2u_pi1): Renamed, from...
+       (mpn_div_qr_2_pi1_unnorm): ... old name.
+       (mpn_div_qr_2): Call functions using new names.
+
+       * mpn/asm-defs.m4: Renamed div_qr_2-functions to new names.
+
+2011-03-29  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86_64/div_qr_2_pi1_norm.asm: Updated to use a separate rp
+       argument.
+
+       * gmp-impl.h (mpn_div_qr_2_pi1_norm): Updated declaration.
+       * gmp-h.in (mpn_div_qr_2): Likewise.
+
+       * tests/mpn/t-div.c (main): Adapted to new mpn_div_qr2 interface.
+       * tune/speed.h (SPEED_ROUTINE_MPN_DIV_QR_2): Likewise.
+
+       * mpn/generic/div_qr_2.c (mpn_div_qr_2_pi2_norm): Added rp
+       argument. Don't clobber the input dividend.
+       (mpn_div_qr_2_pi1_unnorm): Likewise.
+       (mpn_div_qr_2): Likewise.
+       * mpn/generic/div_qr_2_pi1_norm.c (mpn_div_qr_2_pi1_norm): Likewise.
+
+2011-03-29  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86/k7/invert_limb.asm: Use mov rather than push and pop.
+       Earlier load of divisor from stack.
+
+2011-03-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k7/invert_limb.asm: Protect movzwl register parameters from
+       being interpreted as m4 macro parameters.
+
+2011-03-22  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86_64/div_qr_2_pi1_norm.asm: Copied optimized inner loop
+       from divrem_2.asm.
+
+       * mpn/x86_64/div_qr_2_pi1_norm.asm: First working, but poorly
+       optimized, implementation.
+
+       * mpn/asm-defs.m4 (define_mpn): Added div_qr_2_pi[12]_*norm.
+
+       * mpn/generic/div_qr_2_pi1_norm.c (mpn_div_qr_2_pi1_norm): Moved
+       to separate file, from...
+       * mpn/generic/div_qr_2.c: ... old location.
+
+       * gmp-impl.h (mpn_div_qr_2_pi1_norm): Declare.
+
+       * configure.in (gmp_mpn_functions): Added div_qr_2_pi1_norm.
+
+2011-03-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (powerpc): Reinsert lost AIX cpu_path 32-bit handling.
+       Reinsert lost linux/bsd cpu_path handling.
+
+       * mpn/generic/mod_1_1.c: Disable powerpc asm for _LONG_LONG_LIMB.
+       * mpn/generic/div_qr_2.c: Likewise.
+
+       * mpn/generic/div_qr_2.c: Use asm just for gcc.
+       Make powerpc add_sssaaaa work for 32-bit case, and use less strict
+       constraints.
+
+2011-03-21  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/tuneup.c (div_qr_2_pi2_threshold): New global variable.
+       (tune_div_qr_2): New function.
+       (all): Call tune_div_qr_2.
+
+       * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Added div_qr_2.c.
+
+       * gmp-impl.h (DIV_QR_2_PI2_THRESHOLD): Setup for tuning.
+
+       New 4/2 division loop, based on Torbjörn's work:
+       * mpn/generic/div_qr_2.c (add_sssaaaa, add_csaac): New macros.
+       (udiv_qr_4by2): New macro.
+       (invert_4by2): New function.
+       (mpn_div_qr_2_pi2_norm): New function.
+       (DIV_QR_2_PI2_THRESHOLD): New threshold.
+       (mpn_div_qr_2_pi1_norm): Renamed, from...
+       (mpn_div_qr_2_norm): ... old name.
+       (mpn_div_qr_2_pi1_unnorm): Renamed, from...
+       (mpn_div_qr_2_unnorm): ... old name.
+       (mpn_div_qr_2): Use mpn_div_qr_2_pi2_norm for large enough
+       normalized divisors.
+
+       * gmp-impl.h (udiv_qr_3by2): Avoid a copy.
+
+2011-03-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (hppa): Under linux, treat 64-bit processors as if they
+       were 32-bit processors.
+
+       * mpn/generic/addcnd_n.c: New file.
+       * mpn/asm-defs.m4 (define_mpn): Add addcnd_n and subcnd_n.
+       * configure.in (gmp_mpn_functions): Add addcnd_n.
+       * gmp-impl.h (mpn_addcnd_n): Declare.
+
+       * mpn/generic/subcnd_n.c: Combine nails and non-nails functions.
+
+       * gmp-impl.h (invert_pi1): Prepend _ to local variables, protect
+       parameters within () where necessary.
+
+       * mpn/asm-defs.m4 (define_mpn): Add div_qr_2.
+       * configure.in (gmp_mpn_functions): Reinsert mercurial-bug-removed
+       line.
+
+2011-03-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (powerpc): Add cpu_path for all three ABIs.
+       Rename "aix64" to "mode64" for consistency.
+
+2011-03-16  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__gmp_binary_not_equal): Remove, use !__gmp_binary_equal.
+       (__gmp_binary_less_equal): Remove, use !__gmp_binary_greater.
+       (__gmp_binary_greater_equal): Remove, use !__gmp_binary_less.
+       * tests/cxx/t-ops2.cc: Typo.
+
+2011-03-20  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/common.c (speed_mpn_div_qr_2_norm): New function.
+       (speed_mpn_div_qr_2_unnorm): New function.
+       * tune/speed.c (routine): Recognize above functions.
+       * tune/speed.h: Declarations for above functions.
+       (SPEED_ROUTINE_MPN_DIV_QR_2): New macro.
+
+       * tests/mpn/t-div.c (main): Added tests for mpn_divrem_2 and
+       mpn_div_qr_2.
+
+       * mpn/generic/div_qr_2.c (mpn_div_qr_2): New file and function.
+       Intended to eventually replace divrem_2.
+       * configure.in (gmp_mpn_functions): Add div_qr_2.
+
+2011-03-16  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__gmp_set_expr): Remove broken declarations.
+
+2011-03-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/fac_ui.c (mpz_fac_ui): Use MPZ_REALLOC for standard, conditional
+       reallocation.
+
+2011-03-19  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/divrem_2.c (mpn_divrem_2): Fixed comment and assert
+       regarding q and n overlap.
+
+2011-03-16  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__mpz_set_ui_safe): New inline function.
+       (__mpz_set_si_safe): Likewise.
+       (__GMPXX_TMPZ_UI): Use the new function.
+       (__GMPXX_TMPZ_SI): Likewise.
+       (__GMPXX_TMPQ_UI): Likewise.
+       (__GMPXX_TMPQ_SI): Likewise.
+       * tests/cxx/t-ops2.cc: test converting 0 to stack mpq_t.
+
+2011-03-15  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h (__GMPXX_TMPQ_UI): New macro.
+       (__GMPXX_TMPQ_SI): New macro.
+       (struct __gmp_binary_multiplies): Rewrite, using the new macros.
+       (struct __gmp_binary_divides): Likewise.
+
+       * gmpxx.h (__GMPZ_ULI_LIMBS): Rewrite.
+       * tests/cxx/t-ops2.cc: test converting ULONG_MIN to stack mpq_t.
+
+2011-03-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_interpolate_16pts.c: Remove ambiguity.
+
+2011-03-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mul): Set tuning min size considering print skew.
+
+       * doc/gmp.texi: Make reference to "Formatted I/O" chapters from type
+       specific I/O sections.
+
+       * mpn/alpha/add_n.asm: Add _nc entry point.
+       * mpn/alpha/sub_n.asm: Likewise.
+       * mpn/mips64/add_n.asm: Likewise.
+       * mpn/mips64/sub_n.asm: Likewise.
+       * mpn/sparc64/ultrasparc1234/add_n.asm: Likewise.
+       * mpn/sparc64/ultrasparc1234/sub_n: Likewise.
+
+2011-03-13  Marc Glisse  <marc.glisse@inria.fr>
+
+       * tests/cxx/t-ops2.cc: New file.
+       * tests/cxx/Makefile.am: Added t-ops2.
+
+2011-03-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/toom32_mul.c (mpn_toom32_mul): Make 'hi' be limb-sized
+       for better code.
+
+       * gmp-impl.h (MPN_IORD_U): Handle x86_64 as well as x86_32.  Generate
+       no code for incrementing by constant 0.
+
+2011-03-12  Marc Glisse  <marc.glisse@inria.fr>
+
+       * gmpxx.h: Rename __GMPXX_TMP_* to __GMPXX_TMPZ_*. Use in more places.
+
+2011-03-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/rshift.asm: Accept/return values correctly also for
+       32-bit ABI.
+       * mpn/powerpc64/lshift.asm: Likewise.
+
+       * tune/powerpc.asm: Use powerpc syntax, not power syntax.
+
+       * tune/common.c (speed_udiv_qrnnd_preinv1, etc): Remove.
+       * tune/speed.c (routine): Remove udiv_qrnnd_preinv1, etc.
+
+2011-03-12  Marc Glisse  <marc.glisse@inria.fr>
+
+       * tests/cxx/t-istream.cc: Restrict mpq test in t-istream -s.
+
+       * gmpxx.h: Remove leftover #undefs.
+
+2011-03-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (udiv_qrnnd_preinv1, udiv_qrnnd_preinv2,
+       udiv_qrnnd_preinv2gen): Remove obsolete macros.
+       (udiv_qrnnd_preinv): New name for udiv_qrnnd_preinv3.
+
+2011-03-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h: Declare many mpn_{sub,add}lsh*_n_ip[12] functions/macros.
+       * mpn/generic/toom_interpolate_5pts.c: Use mpn_sublsh1_n_ip1.
+
+       * tests/devel/try.c: Tests for {add,sub}lsh*_n_ip[12].
+       * tests/refmpn.c: New reference for mpn_{add,sub}lsh*_n_ip[12].
+       * tests/tests.h: Declarations for reference functions above.
+
+       * tune/common.c: New speed_mpn_{add,sub}lsh*_n_ip[12] functions.
+       * tune/speed.h: Prototypes for functions above.
+       * tune/speed.c: Support for mpn_{add,sub}lsh*_n_ip[12].
+
+       * mpn/x86/k7/sublsh1_n.asm: Replaced generic sublsh1 code with faster _ip1.
+       * mpn/x86/atom/sublsh1_n.asm: Changed PROLOGUE accordingly.
+
+       * configure.in: Define HAVE_NATIVE_mpn_addlsh*_n*_ip[12].
+       * mpn/asm-defs.m4: Declare mpn_addlsh*_n*_ip[12].
+
+2011-03-10  Marc Glisse  <marc.glisse@inria.fr>
+
+       * tests/cxx/t-istream.cc: Explicit conversion to streampos.
+
+2011-03-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/atom/sse2/mul_basecase.asm: Suppress wind-down rp updates.
+
+       * Move new aorrlsh_n.asm to new k8 dir.  Revert
+       mpn/x86_64/aorrlsh_n.asm.
+       * configure.in: Setup path for new k8 directory.
+
+2011-03-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/pentium4/sse2/bdiv_dbm1c.asm: New file, was in atom.
+       * mpn/x86/atom/sse2/bdiv_dbm1c.asm: Grab file above.
+
+2011-03-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/aorrlsh_n.asm: Complete rewrite.
+
+       * mpn/x86_64/core2/aorrlsh_n.asm: New file, grabbing another asm file.
+
+2011-03-09  Marc Glisse  <marc.glisse@inria.fr>
+
+       * tests/cxx/t-ostream.cc: Use bool instead of int.
+       * tests/cxx/t-istream.cc: Likewise.
+       * tests/cxx/t-misc.cc: Likewise.
+
+       * cxx/ismpznw.cc: Don't clear eofbit.
+       * cxx/ismpq.cc: Likewise.
+       * cxx/ismpf.cc: Likewise.
+       * tests/cxx/t-istream.cc: Test accordingly.
+
+2011-03-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/sse2/bdiv_dbm1c.asm: New file.
+
+2011-03-09  Marc Glisse  <marc.glisse@inria.fr>
+
+       * doc/gmp.texi: Remove void return type from constructors. Document
+       explicit constructors. Document mpf_class::mpf_class(mpf_t).
+
+2011-03-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/sse2/sqr_basecase.asm: Postponed pushes. Cleaned
+       outer loop exit.
+
+2011-03-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/gcd_1.asm: Workaround Oracle assembler bug.
+
+       * mpn/x86/atom/sse2/mul_basecase.asm: Replace addmul_1 loops.
+       Tweak outer loop rp updates.
+
+2011-03-06  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/atom/sse2/sqr_basecase.asm: New file.
+
+2011-03-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/bdiv_dbm1c.asm: Write proper feed-in code.
+
+2011-03-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/addmul_2.asm: Rewrite for linear performance.
+
+2011-03-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mod_1_1.c (add_mssaaaa): Canonicalise layout.  Add arm
+       variant.  Enable sparc64 code and powerpc code (the latter for 32-bit
+       and 64-bit).
+
+       * mpn/generic/sqrtrem.c (mpn_dc_sqrtrem): Use mpn_addlsh1_n.
+
+       * gmp-impl.h (mpn_addlsh_nc, mpn_rsblsh_nc): Declare.
+       * mpn/asm-defs.m4: Likewise.
+
+       * mpn/x86_64/coreisbr/aorrlsh_n.asm: Disable mpn_rsblsh_n due to
+       carry-in issues.
+       * mpn/x86_64/coreinhm/aorrlsh_n.asm: Likewise.
+       * mpn/x86_64/coreisbr/aorrlsh2_n.asm: Likewise.
+
+2011-03-03  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/mod_1_1.c (add_mssaaaa): For x86 and x86_64, treat m
+       as in output operand only. Added sparc32 implementation. Also
+       added #if:ed out attempts at sparc64 and powerpc64.
+
+       * tune/tuneup.c (tune_mod_1): Record result of MOD_1_1P_METHOD
+       measurement for use by mpn_mod_1_tune. And omit measurement if
+       mpn_mod_1_1p is native assebly code.
+
+       * mpn/generic/mod_1.c (mpn_mod_1_1p) [TUNE_PROGRAM_BUILD]: Macro
+       to check mod_1_1p_method and call the right function.
+       (mpn_mod_1_1p_cps) [TUNE_PROGRAM_BUILD]: Likewise.
+
+       * gmp-impl.h (MOD_1_1P_METHOD) [TUNE_PROGRAM_BUILD]: Define macro.
+       (mod_1_1p_method) [TUNE_PROGRAM_BUILD]: Declare variable.
+
+2011-03-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/coreinhm/aorrlsh_n.asm: New file.
+       * mpn/x86_64/coreisbr/aorrlsh_n.asm: New file.
+
+2011-03-01  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p_cps): Eliminated a neg and
+       two mov instructions.
+
+       * mpn/x86/k7/mod_1_1.asm (mpn_mod_1_1p_cps): Simplified
+       computation, analogous to recent x86_64/mod_1_1.asm changes.
+       (mpn_mod_1_1p): Corresponding changes. Don't shift b.
+
+       * mpn/sparc64/mod_1_4.c (mpn_mod_1s_4p_cps): Use udiv_rnnd_preinv
+       rather than udiv_rnd_preinv.
+       (mpn_mod_1s_4p): Likewise.
+
+2011-03-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/pentium4/sse2/mul_1.asm: Swap entry insns to share more code
+       between entry points.
+       * mpn/x86/pentium4/sse2/addmul_1.asm: Likewise.
+
+       * mpz/divegcd.c: Rewrite, as per Marc Glisse's suggestion.  Also fix
+       problem with passing a longlong limb to a _ui function.
+
+       * gmp-impl.h (udiv_qrnnd_preinv3): Cast truth value to mask's type.
+       (udiv_rnnd_preinv): Likewise.
+       * mpn/generic/mod_1_1.c (mpn_mod_1_1p): Likewise.
+
+2011-02-28  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/mod_1_1.c (add_mssaaaa): Typo fix, define
+       add_mssaaaa, not add_sssaaaa.
+
+       * tune/tuneup.c (tune_mod_1): Measure mpn_mod_1_1_1 and
+       mpn_mod_1_1_2, to set MOD_1_1P_METHOD.
+
+       * tune/speed.c (routine): Added mpn_mod_1_1_1 and mpn_mod_1_1_2.
+
+       * tune/speed.h: Declare speed_mpn_mod_1_1_1, speed_mpn_mod_1_1_2,
+       mpn_mod_1_1p_1, mpn_mod_1_1p_2, mpn_mod_1_1p_cps_1, and
+       mpn_mod_1_1p_cps_2.
+
+       * tune/common.c (speed_mpn_mod_1_1_1): New function.
+       (speed_mpn_mod_1_1_2): New function.
+
+       * tune/Makefile.am (libspeed_la_SOURCES): Added mod_1_1-1.c
+       mod_1_1-2.c.
+
+       * tune/mod_1_1-1.c: New file.
+       * tune/mod_1_1-2.c: New file.
+
+       * mpn/generic/mod_1_1.c: Implemented an algorithm with fewer
+       multiplications, configured via MOD_1_1P_METHOD.
+
+       * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p_cps): Simplified
+       computation of B2modb, use B^2 mod (normalized b).
+       (mpn_mod_1_1p): Corresponding changes. Don't shift b.
+
+       * mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Use udiv_rnnd_preinv rather
+       than udiv_rnd_preinv.
+       (mpn_mod_1_1p): Likewise.
+       * mpn/generic/mod_1_4.c: Analogous changes.
+       * mpn/generic/mod_1_3.c: Analogous changes.
+       * mpn/generic/mod_1_2.c: Analogous changes.
+       * mpn/generic/mod_1.c: Analogous changes.
+       * mpn/generic/pre_mod_1.c: Analogous changes.
+
+       * gmp-impl.h (udiv_qrnnd_preinv3): Eliminated unpredictable branch
+       using masking logic. Further optimization of the nl == constant 0
+       case, similar to udiv_rnd_preinv.
+       (udiv_rnnd_preinv): Likewise.
+       (udiv_rnd_preinv): Deleted, use udiv_rnnd_preinv with nl == 0
+       instead.
+
+       * tests/mpn/t-divrem_1.c (check_data): Added testcase to exercise
+       the nl == constant 0 special case in udiv_qrnnd_preinv3.
+
+2011-02-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/rootrem.c (mpn_rootrem): Combine two similar scalar
+       divisions.  Misc minor cleanup.
+
+       * mpn/x86/atom/sse2/aorsmul_1.asm: Shorten software pipeline.
+
+       * mpn/x86/atom/mul_basecase.asm: Remove file no longer used.
+
+       * mpn/generic/rootrem.c (mpn_rootrem_internal): Delay O(log(U))
+       allocations until they are known to be needed.
+
+2011-02-27 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/sse2/mul_1.asm: New code.
+
+2011-02-27  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h (udiv_rnnd_preinv): New macro.
+
+2011-02-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/atom/sse2/mul_basecase.asm: New file.
+
+2011-02-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/sse2/aorsmul_1.asm: Optimise non-loop code.
+
+2011-02-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/aorsmul_1.asm: Add MULFUNC_PROLOGUE.
+       * mpn/m68k/mc68020/aorsmul_1.asm: Likewise.
+
+       * mpn/powerpc64/mode64/aorsmul_1.asm: Add missing MULFUNC_PROLOGUE.
+       * mpn/m68k/mc68020/aorsmul_1.asm: Likewise.
+
+2011-02-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/atom/sse2/aorsmul_1.asm: New file.
+       * mpn/x86/atom/aorsmul_1.asm: File removed.
+
+2011-02-25 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/sse2/divrem_1.asm: New file (was in x86/atom).
+       * mpn/x86/atom/sse2/mul_1.asm: Likewise.
+       * mpn/x86/atom/sse2/popcount.asm: Likewise.
+       * mpn/x86/atom/divrem_1.asm: ReMoved (in sse2/ now).
+       * mpn/x86/atom/mul_1.asm: Likewise.
+       * mpn/x86/atom/popcount.asm: Likewise.
+
+       * configure.in: Set up mmx path for atom.
+       * mpn/x86/atom/mmx/copyd.asm: New file (was in x86/atom).
+       * mpn/x86/atom/mmx/copyi.asm: Likewise.
+       * mpn/x86/atom/mmx/hamdist.asm: Likewise.
+       * mpn/x86/atom/copyd.asm: ReMoved (in mmx/ now).
+       * mpn/x86/atom/copyi.asm: Likewise.
+       * mpn/x86/atom/hamdist.asm: Likewise.
+
+2011-02-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/atom/sse2/mod_1_1.asm: New file.
+       * mpn/x86/atom/sse2/mod_1_4.asm: New file.
+       * configure.in: Set up sse2 path for atom.
+
+       * mpn/x86/p6/sse2/mod_1_1.asm: New file.
+       * mpn/x86/p6/sse2/mod_1_4.asm: Fix typo in MULFUNC_PROLOGUE.
+
+2011-02-24  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86/k7/mod_1_1.asm (mpn_mod_1_1p): Rewrite using the same
+       algorithm as the x86_64 version.
+
+2011-02-23 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/logops_n.asm: New file (same loop as aors_n).
+
+2011-02-23  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Shaved off one
+       instruction and one register in the inner loop. Rearranged
+       registers slightly, and no longer needs the callee-save register
+       %r12.
+
+2011-02-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Export SHLD_SLOW and SHRD_SLOW to config.m4, also
+       fixing typo in exporting code.
+
+       * mpn/x86_64/nano/gmp-mparam.h (SHLD_SLOW, SHRD_SLOW): Define.
+       * mpn/x86_64/atom/gmp-mparam.h (SHLD_SLOW, SHRD_SLOW): Define.
+
+2011-02-22  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Rewrite.
+
+2011-02-22 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/lshiftc.asm: New file (a copy of lshift.asm with a handful of neg added).
+
+2011-02-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/aors_n.asm: Move _nc entry to after main code.  Align loop
+       and _n entry for claimed performance.  Normalise mnemonic usage.
+
+       * mpn/x86/atom/aorrlsh1_n.asm: New file (code from rsblsh_1, slightly
+       slower for addlsh_1 for large operands, but much faster for small).
+       * mpn/x86/atom/addlsh1_n.asm: Remove.
+       * mpn/x86/atom/rsblsh1_n.asm: Remove.
+
+2011-02-20  Marc Glisse  <marc.glisse@inria.fr>
+
+       * mpq/aors.c: Rewrite to remove redundant division.
+
+2011-02-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/atom/lshift.asm: New file.
+       * mpn/x86/atom/rshift.asm: Normalise mnemonic usage.
+
+       * gmp-impl.h (mpn_divexact_by7): Relax inclusion condition.
+
+       * mpz/divegcd.c (mpz_divexact_by5): New conditionally enabled function.
+       (mpz_divexact_by3): Wrap inside appropriate conditions.
+       (mpz_divexact_gcd): Rewrite.
+
+       * mpn/x86/bdiv_dbm1c.asm: Save a jump.
+
+2011-02-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/aorslshC_n.asm: New file.
+       * mpn/x86/atom/sublsh2_n.asm: New file.
+
+       * mpn/x86/atom/aors_n.asm: New code.
+       * mpn/x86/atom/rshift.asm: Atom64 code adapted to 32-bit.
+       * mpn/x86/atom/lshift.asm: Likewise.
+
+2011-02-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/atom/rsh1aors_n.asm: New file.
+
+       * mpn/x86_64/atom/lshift.asm: New file.
+       * mpn/x86_64/atom/rshift.asm: New file.
+       * mpn/x86_64/atom/lshiftc.asm: New file.
+
+2011-02-17 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/aorsmul_1.asm: Small improvements for small sizes.
+       * mpn/x86/atom/aorrlshC_n.asm: Tiny size improvements.
+
+2011-02-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Fix k8/k10 32-bit path setup problem.
+
+2011-02-16 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/aorsmul_1.asm: Revive an old k7/aorsmul.
+
+2011-02-14 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h (mpn_sublsh_n): Declare.
+       * mpn/asm-defs.m4: Likewise.
+
+       * mpn/x86/atom/aorrlshC_n.asm: New file (was k7).
+       * mpn/x86/k7/aorrlshC_n.asm: ReMoved.
+       * mpn/x86/atom/aorrlsh2_n.asm: Grab atom/aorrlshC_n.asm.
+       * mpn/x86/atom/rsblsh1_n.asm: Grab atom/aorrlshC_n.asm.
+
+2011-02-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/atom/aorrlsh2_n.asm: New file.
+
+2011-02-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/aorrlsh_n.asm: Minor tweaks, update c/l numbers.
+
+       * mpn/x86_64/atom/sublsh1_n.asm: New file.
+
+       * mpn/x86_64/atom/aorrlsh1_n.asm: New file.
+
+2011-02-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/mod_1_1.asm: Fix Darwin syntax issues.
+
+2011-02-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/mod_1_4.asm: Tune away a cycle for 970.
+
+2011-02-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/k7/addlsh1_n.asm: Faster core loop (Torbjorn's).
+
+       * configure.in: Add HAVE_NATIVE_{add,sub,rsb}lsh{,1,2}_nc.
+       * tests/tests.h: refmpn_{add,sub,rsb}lsh{,1,2}_nc prototypes.
+       * tests/refmpn.c: New refmpn_{add,sub,rsb}lsh{,1,2}_nc.
+       * tests/devel/try.c: Tests for mpn_{add,sub,rsb}lsh{,1,2}_nc.
+
+       * mpn/x86/k7/aorrlshC_n.asm: New file.
+       * mpn/x86/atom/aorrlsh2_n.asm: Grab k7/aorrlshC_n.asm.
+       * mpn/x86/atom/rsblsh1_n.asm: Grab k7/aorrlshC_n.asm.
+
+2011-02-06 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/k7/addlsh1_n.asm: New file.
+       * mpn/x86/k7/sublsh1_n.asm: New file.
+       * mpn/x86/atom/addlsh1_n.asm: Grab k7/addlsh1_n.asm.
+       * mpn/x86/atom/sublsh1_n.asm: Grab k7/sublsh1_n.asm.
+
+2011-02-05  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (mpn_addlsh1_nc, mpn_addlsh2_nc, mpn_sublsh1_nc,
+       mpn_sublsh2_nc, mpn_rsblsh1_nc, mpn_rsblsh2_nc): Declare.
+       * mpn/asm-defs.m4: Likewise.
+
+       * mpn/x86_64/coreisbr/aorrlshC_n.asm: New file.
+       * mpn/x86_64/coreisbr/aorrlsh1_n.asm: New file.
+       * mpn/x86_64/coreisbr/aorrlsh2_n.asm: New file.
+
+       * mpn/x86_64/coreisbr/aors_n.asm: New file, based on old
+       atom/aors_n.asm.
+       * mpn/x86_64/atom/aors_n.asm: Grab coreisbr/aors_n.asm.
+
+2011-02-05 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * gmp-impl.h (mpn_toom6_mul_n_itch): Handle threshold == zero.
+       (mpn_toom8_mul_n_itch): Likewise.
+       (MPN_TOOM6H_MIN, MPN_TOOM8H_MIN): Define.
+       * tests/mpn/t-toom6h.c: No tests below MPN_TOOM6H_MIN.
+       * tests/mpn/t-toom8h.c: No tests below MPN_TOOM8H_MIN.
+
+       * mpz/lucnum_ui.c: Use mpn_addlsh2_n.
+
+2011-02-04  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/atom/rsh1aors_n.asm: Add a MULFUNC_PROLOGUE.
+       * mpn/x86_64/atom/dive_1.asm: Likewise.
+       * mpn/x86_64/atom/popcount.asm: Likewise.
+       * mpn/x86_64/core2/popcount.asm: Likewise.
+       * mpn/x86_64/coreinhm/hamdist.asm: Likewise.
+       * mpn/x86_64/coreinhm/popcount.asm: Likewise.
+       * mpn/x86_64/nano/popcount.asm: Likewise.
+       * mpn/x86_64/pentium4/popcount.asm: Likewise.
+
+2011-02-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/mode1o.asm: New file, grabbing another asm file.
+       * mpn/x86/atom/mul_1.asm: Claim mul_1c.
+
+2011-02-02  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_HGCD_CALL): Fixed one
+       speed_operand_dst call.
+
+2011-02-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.h (struct speed_params): Allow for 4 dst operands.
+       * tune/common.c (TOLERANCE): Increase from 0.5% to 1%.
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_HGCD_CALL): New macro, mainly based
+       on old speed_mpn_hgcd, but with speed_operand_src calls (as suggested
+       by Niels).
+       * tune/common.c (speed_mpn_hgcd): Invoke SPEED_ROUTINE_MPN_HGCD_CALL.
+       (speed_mpn_hgcd_lehmer): Likewise.
+
+       * configure.in: Set up 32-bit x86 paths for new corei* CPU strings.
+
+2011-01-31  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Recognise new Intel processors.
  
         * config.guess: Support 'coreinhm' and 'coreisbr'.
         * config.sub: Likewise.
         * configure.in: Likewise.
  
-2011-01-25 Marco Bodrato <bodrato@mail.dm.unipi.it>
+2011-01-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Support x86/geode.
+       * mpn/x86/geode/gmp-mparam.h: New file.
+
+2011-01-29 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/addlsh1_n.asm: Removed.
+       * mpn/x86/atom/rsh1add_n.asm: Likewise.
+
+2011-01-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/alpha/ev6/slot.pl: Add some missing insns.
+
+2011-01-28 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/atom/copyd.asm: New file, grabbing another asm file.
+       * mpn/x86/atom/copyi.asm: Likewise.
+       * mpn/x86/atom/aors_n.asm: Likewise.
+       * mpn/x86/atom/addlsh1_n.asm: Likewise.
+       * mpn/x86/atom/aorsmul_1.asm: Likewise.
+       * mpn/x86/atom/bdiv_q_1.asm: Likewise.
+       * mpn/x86/atom/dive_1.asm: Likewise.
+       * mpn/x86/atom/divrem_1.asm: Likewise.
+       * mpn/x86/atom/hamdist.asm: Likewise.
+       * mpn/x86/atom/logops_n.asm: Likewise.
+       * mpn/x86/atom/lshift.asm: Likewise.
+       * mpn/x86/atom/mod_34lsub1.asm: Likewise.
+       * mpn/x86/atom/mul_1.asm: Likewise.
+       * mpn/x86/atom/mul_basecase.asm: Likewise.
+       * mpn/x86/atom/popcount.asm: Likewise.
+       * mpn/x86/atom/rsh1add_n.asm: Likewise.
+       * mpn/x86/atom/rshift.asm: Likewise.
+       * mpn/x86/atom/sqr_basecase.asm: Likewise.
+
+2011-01-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/atom/rsh1aors_n.asm: New file, grabbing another asm file.
+       * mpn/x86_64/atom/popcount.asm: Likewise.
+       * mpn/x86_64/atom/dive_1.asm: Likewise.
+       * mpn/x86_64/nano/popcount.asm: Likewise.
+
+2011-01-26  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/alpha/invert_limb.asm: Complete rewrite.
+
+2011-01-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc32/invert_limb.asm: New file.
+
+2011-01-25 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/pentium4/sse2/bdiv_q_1.asm: New file.
+       * mpn/x86/k7/bdiv_q_1.asm: New file.
+
+2011-01-24  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mul_n, tune_sqr): Loop, re-measuring thresholds
+       until no tiny ranges remain.
+
+2011-01-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/ia64/mul_2.asm: Tweak to 1.5 c/l, less overhead.
+
+       * mpn/ia64/addmul_2.asm: Rewrite, adding mpn_addmul_2s entry point.
+
+2011-01-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/ia64/aors_n.asm: Fix some incorrect bundle types.
+
+       * mpn/ia64/sqr_diagonal.asm: Remove.
+
+       * mpn/ia64/sqr_diag_addlsh1.asm: New file.
+
+       * mpn/ia64/ia64-defs.m4: Define some shorter convenience mnemonics.
+
+       * mpn/generic/sqr_basecase.c (MPN_SQR_DIAG_ADDLSH1): New macro, using
+       new function mpn_sqr_diag_addlsh1 or defining its equivalent.
+
+       * gmp-impl.h (mpn_addmul_2s): Declare.
+       (mpn_sqr_diag_addlsh1): Declare.
+       * mpn/asm-defs.m4 (define_mpn): Add addmul_2s and sqr_diag_addlsh1.
+
+       * configure.in: Add HAVE_NATIVEs for mpn_sqr_diag_addlsh1 and
+       mpn_addmul_2s.
+       (gmp_mpn_functions_optional): Add sqr_diag_addlsh1.
+
+2011-01-21 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/devel/try.c: Initial support for mpn_bdiv_q_1.
+       * mpn/x86/pentium/bdiv_q_1.asm: New file.
+       * mpn/x86/p6/bdiv_q_1.asm: New file.
+
+2011-01-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.c (run_gnuplot): Update to current gnuplot syntax.
+
+       * mpn/powerpc64/mode64/aorsmul_1.asm: Trim away 0.5 c/l for submul_1
+       for POWER5.
+
+2011-01-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/core2/rsh1aors_n.asm: New file.
+
+2011-01-18 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/x86/bdiv_q_1.asm: New file (same core alg. as dive_1).
+
+2011-01-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/divexact.c: Avoid COPY if not needed.
+
+2011-01-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * gmp-impl.h (struct cpuvec_t): Add field bmod_1_to_mod_1_threshold.
+       * configure.in (fat_thresholds): Add BMOD_1_TO_MOD_1_THRESHOLD.
+
+2011-01-13 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/mul.c: Remove redundant size computation.
+
+2011-01-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/devel/try.c (types enum): Add TYPE_MUL_5 and TYPE_MUL_6.
+       (param_init): Support new types.
+       (choice_array): Support testing of mpn_mul_5 and mpn_mul_6.
+       (call): Support new routines.
+
+       * tests/refmpn.c (refmpn_mul_5, refmpn_mul_6): New functions.
+       * tests/tests.h (refmpn_mul_5, refmpn_mul_6): Declare.
+       Remove parameter names from some other functions.
+
+       * gmp-impl.h (mpn_mul_5, mpn_mul_6): Declare.
+       * mpn/asm-defs.m4: Likewise, also declare mpn_addmul_5, mpn_addmul_6,
+       mpn_addmul_7, and mpn_addmul_8.
+
+       * configure.in (gmp_mpn_functions_optional): Add mul_5 and mul_6.
+
+       * tune/speed.c (routine): Add measuring of mpn_mul_5 and mpn_mul_6.
+       * tune/common.c (speed_mpn_mul_5, speed_mpn_mul_6): New functions.
+       * tune/speed.h: Declare new functions.
+
+2011-01-03 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpz/aors.h: Remove #ifdef BERKELEY_MP, and cleanup.
+       * mpz/cmp.c: Likewise.
+       * mpz/gcd.c: Likewise.
+       * mpz/mul.c: Likewise.
+       * mpz/powm.c: Likewise.
+       * mpz/set.c: Likewise.
+       * mpz/sqrtrem.c: Likewise.
+       * mpz/tdiv_qr.c: Likewise.
+
+2010-12-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/minithres/gmp-mparam.h: Update with several recent thresholds.
+
+2010-12-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/k7/mod_1_1.asm: Canonicalise cmov forms.
+       * mpn/x86/k7/mod_1_4.asm: Likewise.
+       * mpn/x86/pentium4/sse2/mod_1_1.asm: Likewise.
+       * mpn/x86/pentium4/sse2/mod_1_4.asm: Likewise.
+       * mpn/x86_64/core2/divrem_1.asm: Likewise.
+       * mpn/x86_64/divrem_1.asm: Likewise.
+       * mpn/x86_64/mod_1_1.asm: Likewise.
+       * mpn/x86_64/mod_1_2.asm: Likewise.
+       * mpn/x86_64/mod_1_4.asm: Likewise.
+
+       * mpn/x86/k7/gcd_1.asm: Rewrite.  Remove slow 'div' loop.  Call
+       mpn_mod_1 for operands with mode than BMOD_1_TO_MOD_1_THRESHOLD limbs.
+       Misc cleanups.
+
+2010-12-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/gcd_1.asm: Call mpn_mod_1 for operands with mode than
+       BMOD_1_TO_MOD_1_THRESHOLD limbs.
+
+       * configure.in: Generalise code for putting THRESHOLDs in config.m4.
+       Add BMOD_1_TO_MOD_1_THRESHOLD to list.
+
+       * mpn/x86_64/core2/divrem_1.asm: Tweak slightly, correct cycle counts.
+
+       * mpn/x86_64/addmul_2.asm: Remove constant index.
+       * mpn/x86_64/lshiftc.asm: Likewise.
+       * mpn/x86_64/pentium4/lshift.asm: Likewise.
+       * mpn/x86_64/pentium4/lshiftc.asm: Likewise.
+       * mpn/x86_64/pentium4/rshift.asm: Likewise.
+
+2010-12-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/mod_34lsub1.asm: Complete rewrite.
+       * mpn/x86_64/pentium4/mod_34lsub1.asm: New file, old
+       mpn/x86_64/mod_34lsub1.asm.
+
+2010-12-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/vmx/popcount.asm: Rewrite to use vperm count table.
+
+2010-12-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mp-h.in: Remove.
+       * configure.in: Remove mp-h.in from AC_OUTPUT invocation.
+
+2010-12-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/mod.c: Rewrite.
+
+       * mpn/x86_64/corei/popcount.asm: New file.
+       * mpn/x86_64/corei/hamdist.asm: New file.
+
+       * mpn/x86_64/k10/hamdist.asm: New file.
+
+       * configure.in: Amend last change for lame /bin/sh.
+
+2010-12-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in: Comment out M4=m4-not-needed.
+
+       * mpn/x86_64/k10/popcount.asm: New file.
+       * configure.in: Setup special path for k10 and later AMD CPUs.
+       Remove special x86_64'k8' path, since directory is non-existent.
+
+2010-12-11  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sparc32/ultrasparct1: New directory.
+       * mpn/sparc32/ultrasparct1/add_n.asm: New file.
+       * mpn/sparc32/ultrasparct1/sub_n.asm: New file.
+       * mpn/sparc32/ultrasparct1/mul_1.asm: New file.
+       * mpn/sparc32/ultrasparct1/addmul_1.asm: New file.
+       * mpn/sparc32/ultrasparct1/submul_1.asm: New file.
+       * mpn/sparc32/ultrasparct1/sqr_diagonal.asm: New file.
+
+       * config.guess: Support Ultrasparc T2 and T3.
+       * config.sub: Likewise.
+       * configure.in: Likewise.
+
+       * config.guess: Generalise BSD Sparc recognition by allowing any
+       caps (needed for OpenBSD which spells things innovatively).
+
+2010-12-01  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Match new AMD processors, allow finer distinctions
+       among old ones.
+       * acinclude.m4 (X86_64_PATTERN): Likewise.
+       * config.sub: Likewise.
+       * configure.in: Rudimentarily support new AMD processors.
+
+       * configure.in (--enable_assembly): New option.
+       (target none-*-*): Disable, give error.
+
+2010-11-29  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/x86-defs.m4 (LEA): Support non-PIC code.
+       * mpn/x86/darwin.m4 (LEA): Likewise.
+
+       * tests/amd64call.asm: Rewrite for code size, and to match calls and
+       returns.
+
+       * tests/x86call.asm: Rewrite for code size, to support PIC, and to
+       match calls and returns.
+       * tests/x86check.c: Rewrite.
  
-       * mpz/mul.c: Remove redundant size computation.
+2010-11-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpz/get_str.c: Make all bases either work or return an error.
+       * mpz/out_str.c: Likewise.
+       * mpq/get_str.c: Likewise.
+       * mpf/get_str.c: Likewise.
+
+2010-11-14  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tests/misc/t-printf.c: Add explicit casts for type conversions.
+       * mpn/generic/toom62_mul.c: Likewise.
+
+2010-11-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/get_d.c: Misc cleanup.  Fail with a syntax error for
+       non-IEEE fp formats.
+
+       * tests/devel/try.c (malloc_region): Add explicit casts for type
+       conversions.
+
+       * acinclude.m4 (GMP_ASM_RODATA): Make test code snippet C++ compatible.
+       (GMP_C_DOUBLE_FORMAT): Likewise.
+       (GMP_FUNC_VSNPRINTF): Likewise.
+
+       * config.guess (x86): Make test C snippet C++ compatible.
+
+2010-11-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * Makefile.am: Remove mpbsd.
+       * configure.in: Remove mpbsd.
+       * doc/configuration: Remove mpbsd mentions.
+       * doc/gmp.texi: Remove mpbsd docs.
+       * tests/Makefile.am: Remove mpbsd.
+       * libmp.sym: Remove.
+       * mpbsd: Remove directory and files.
+       * tests/mpbsd: Remove directory and files.
  
  2010-11-11  Torbjorn Granlund  <tege@gmplib.org>
  
         * mpn/x86_64/atom/aors_n.asm: Don't rely on ZF after 'bt' insn.
         Use 64-bit 'test' to support operands of 2^32 limbs and more.
  
+       * rand: New directory, move rand*.c and randmt.h here.
+       * rand/Makefile.am: New file.
+       * Makefile.am (SUBDIRS): Add rand.
+       (RANDOM_OBJECTS): New variable.
+       (libgmp_la_SOURCES): Remove random objects.
+       (libgmp_la_DEPENDENCIES): Add RANDOM_OBJECTS.
+       * configure.in (AC_OUTPUT): Add rand/Makefile.
+
+       * ansi2knr.1: File removed.
+       * ansi2knr.c: File removed.
+
  2010-11-10  Torbjorn Granlund  <tege@gmplib.org>
  
-       [These changes were made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
+       Make it possible to compile GMP with g++:
+
+       * gmp-impl.h: Declare __gmp_digit_value_tab here.
+       * mpbsd/min.c: ...not here.
+       * mpbsd/xtom.c: ...nor here.
+       * mpf/set_str.c: ...nor here.
+       * mpz/inp_str.c: ...nor here.
+       * mpz/set_str.c: ...nor here.
+
+       * mpn/generic/toom43_mul.c: Add casts for logical operations on enums.
+       * mpn/generic/toom44_mul.c: Likewise.
+       * mpn/generic/toom4_sqr.c: Likewise.
+       * mpn/generic/toom52_mul.c: Likewise.
+       * mpn/generic/toom53_mul.c: Likewise.
+       * mpn/generic/toom62_mul.c: Likewise.
+
+       * mpz/clrbit.c: Clean up typing using MPZ_REALLOC.
+       * mpz/setbit.c: Likewise.
+
+       * mpz/powm.c: Avoid variable name 'new'.
+
+       * randlc2x.c: Add explicit casts for type conversions.
+       * tests/misc/t-printf.c: Likewise.
+       * tests/misc/t-scanf.c: Likewise.
+       * tests/misc.c: Likewise.
+       * tests/mpz/convert.c: Likewise.
+       * tests/refmpn.c: Likewise.
+
+       * tests/tests.h: Unconditionally use <sstream> for now.
+
+       * tests/memory.c: Include "tests.h.
  
         * mp_get_fns.c: Add a __GMP_NOTHROW for coherency with prototype.
         * mp_set_fns.c: Likewise.
@@ -530,22 +4511,35 @@
         * mpz/sizeinbase.c: Likewise.
         * mpz/swap.c: Likewise.
         * mpz/tstbit.c: Likewise.
+       * tal-reent.c: Likewise.
  
  2010-11-09  Torbjorn Granlund  <tege@gmplib.org>
  
-       [This change was made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
+       * configure.in: Get rid of K&R support.
+       * Makefile.am: Likewise.
+       * mpn/Makefile.am: Likewise.
+       * doc/configuration: Update docs wrt K&R support.
+       * doc/gmp.texi: Likewise.
  
         * configure.in (AC_INIT): Amend bug reporting address with manual
         reference.
  
  2010-11-06  Torbjorn Granlund  <tege@gmplib.org>
  
+       * config.guess: If cpuid says we have 32bit-only x86 but
+       configfsf.guess return x86_64, return the latter.
+
         * mpn/x86_64/aors_n.asm: Rewrite not to rely on ZF after 'bt' insn.
  
+2010-10-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/trialdiv.c: Update documentation.
+
  2010-10-04  Torbjorn Granlund  <tege@gmplib.org>
  
         * mpn/x86_64/gcd_1.asm: Use m4_lshift to avoid << operator.
+       * mpn/x86_64/aorrlshC_n.asm: Likewise.
+       * mpn/x86_64/pentium4/aorslshC_n.asm: Likewise.
         * mpn/x86/k7/gcd_1.asm: Likewise.
  
  2010-08-20  Niels Möller  <nisse@lysator.liu.se>
@@ -555,11 +4549,49 @@
         touch it. Fixed the case that no assembler files are used, and
         GMP_PROG_M4 is omitted.
  
+2010-08-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/fat/fat.c: Recognise many more processors.
+
+2010-06-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/divrem_2.asm: Tune.
+
+2010-06-19  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/speed.h (SPEED_ROUTINE_MPN_MOD_1_1): Pass normalized
+       divisor to the benchmarked function.
+
+2010-06-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p_cps): Rewrite.
+       * mpn/x86_64/mod_1_2.asm (mpn_mod_1s_2p_cps): Rewrite.
+       * mpn/x86_64/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+
+       * gmp-impl.h (udiv_rnd_preinv): Simplify.
+
+       * mpn/x86/k7/mod_1_1.asm: New file.
+       * mpn/x86/pentium4/sse2/mod_1_1.asm (mpn_mod_1_1p_cps): Rewrite.
+       * mpn/x86/k7/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+       * mpn/x86/pentium4/sse2/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+
+       * mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Store results as they are
+       computed.
+       * mpn/generic/mod_1_2.c (mpn_mod_1s_2p_cps): Likewise.
+       * mpn/generic/mod_1_4.c (mpn_mod_1s_4p_cps): Likewise.
+
+       * mpn/x86/k7/invert_limb.asm: Moved from mpn/x86/invert_limb.asm.
+
  2010-06-15  Niels Möller  <nisse@lysator.liu.se>
  
         * tests/mpn/Makefile.am (check_PROGRAMS): Added t-mod_1.
         * tests/mpn/t-mod_1.c: New file.
  
+2010-05-25  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): Trim out space
+       for inverse, since that is passed in already.
+
  2010-05-24  Torbjorn Granlund  <tege@gmplib.org>
  
         * mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): New function.
@@ -568,43 +4600,340 @@
         * tune/speed.h (SPEED_ROUTINE_MPN_MUPI_DIV_QR): Pass parameters right
         for new itch function.
  
+       * mpn/powerpc32/lshiftc.asm: New file.
+
+2010-05-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/tuneup.c (tune_mod_1): Revert to version of 2010-05-06.
+
+2010-05-17  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (ia64): Get 32-bit sizeof test right.
+
+       * tune/tuneup.c (tune_mod_1): Undo unintensional change to tuning of
+       PREINV_MOD_1_TO_MOD_1_THRESHOLD.
+
+2010-05-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sparc64/mod_1.c: Rewrite.
+       * mpn/sparc64/sparc64.h (umul_ppmm_s): New macro.
+       * mpn/sparc64/mod_1_4.c: New file.
+
+       * mpn/generic/divrem_1.c: Minor cleanup.
+       * mpn/generic/mod_1.c: Likewise.
+       * mpn/generic/mod_1_1.c: Likewise.
+       * mpn/generic/mod_1_2.c: Likewise.
+       * mpn/generic/mod_1_3.c: Likewise.
+       * mpn/generic/mod_1_4.c: Likewise.
+
+       * configure.in (ia64-hpux): Do sizeof tests for 32-bit and 64-bit ABI.
+
+       * tune/tuneup.c (tune_mod_1): Completely finish MOD_1_N tuning before
+       tuning MOD_1U_TO_MOD_1_1_THRESHOLD.
+
  2010-05-14  Torbjorn Granlund  <tege@gmplib.org>
  
         * mpn/generic/redc_2.c: Use asm code just for GNU C.
  
+2010-05-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sparc64/ultrasparc1234: New directory.  Move all code that uses
+       floating-point into this directory.
+       * configure.in: Point to ultrasparc1234 for appropriate CPUs.
+
+       * mpn/sparc64/ultrasparct1/add_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/addlsh2_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/addmul_1.asm: New file.
+       * mpn/sparc64/ultrasparct1/lshift.asm: New file.
+       * mpn/sparc64/ultrasparct1/mul_1.asm: New file.
+       * mpn/sparc64/ultrasparct1/rsblsh2_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/rshift.asm: New file.
+       * mpn/sparc64/ultrasparct1/sublsh1_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/sublshC_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/addlsh1_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/addlshC_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/lshiftc.asm: New file.
+       * mpn/sparc64/ultrasparct1/rsblsh1_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/rsblshC_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/sub_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/sublsh2_n.asm: New file.
+       * mpn/sparc64/ultrasparct1/submul_1.asm: New file.
+       * mpn/sparc64/ultrasparct1/gmp-mparam.h: New file.
+
+       * configure.in: Give ultrasparct1 and ultrasparct2 special code path.
+
+       * mpn/x86_64/pentium4/gmp-mparam.h: Disable mpn_addlsh_n, mpn_rsblsh_n.
+
+2010-05-12  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/jacobi.c (mpz_jacobi): Fixed off-by-one error in use of
+       scratch space.
+
+       * tune/common.c (speed_mpz_powm_sec): New function.
+       * tune/speed.h: Declare speed_mpz_powm_sec.
+       * tune/speed.c (routine): Added speed_mpz_powm_sec.
+
+       * tune/common.c (speed_mpn_addlsh_n, speed_mpn_sublsh_n)
+       (speed_mpn_rsblsh_n): New functions.
+       * tune/speed.h: Declare new functions.
+       * tune/speed.c (routine): Add new functions.
+
+2010-05-12  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/mod_1_4.asm: Tune for more processors.
+
+       * mpn/x86_64/pentium4/lshiftc.asm: New file.
+
+2010-05-11  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/jacobi.c (mpz_jacobi): Deleted old implementation.
+       Reorganized new implementation, to handle small inputs efficiently.
+
+       * tests/mpz/t-jac.c (check_large_quotients): Reduced test sizes.
+       (check_data): One more input pair related to a fixed bug.
+       (main): Enable check_large_quotients.
+
+2010-05-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/aorrlsh2_n.asm: Fix typo.
+
+2010-05-09  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/aorrlshC_n.asm: New file based on aorrlsh2_n.asm.
+       * mpn/x86_64/aorrlsh2_n.asm: Now just include aorrlshC_n.asm.
+       * mpn/x86_64/core2/aorrlsh1_n.asm: New file, include ../aorrlshC_n.asm.
+       * mpn/x86_64/core2/aorrlsh2_n.asm: Likewise.
+
+       * mpn/x86_64/core2/sublshC_n.asm: New file based on aorslsh1_n.asm.
+       * mpn/x86_64/core2/aorslsh1_n.asm: Remove.
+       * mpn/x86_64/core2/sublsh1_n.asm: Just include sublshC_n.asm.
+       * mpn/x86_64/core2/sublsh2_n.asm: Likewise.
+
+2010-05-08  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/atom/gmp-mparam.h: Disable mpn_rsh1add_n, mpn_rsh1sub_n.
+
+       * mpn/x86_64/pentium4/aorslshC_n.asm: New file based on aorslsh1_n.asm.
+       * mpn/x86_64/pentium4/aorslsh1_n.asm: Now just include aorslshC_n.asm.
+       * mpn/x86_64/pentium4/aorslsh2_n.asm: New file.
+
+2010-05-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/sparc64: Support operands of >= 2^32 limbs.
+
+       * mpn/sparc64/lshiftc.asm: New file.
+
+       * mpn/ia64/divrem_2.asm: Complete rewrite.
+
  2010-05-06  Torbjorn Granlund  <tege@gmplib.org>
  
-       [This change was made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
+       * tune/tuneup.c (all): Don't call tune_divrem_2.
+
+       * mpn/generic/divrem_2.c: Complete rewrite.
  
         * tune/tuneup.c (tune_mod_1): Fix typo.
  
  2010-05-05  Torbjorn Granlund  <tege@gmplib.org>
  
-       [These changes were made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
+       * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Use macro register names.
+       (mpn_mod_1_1p_cps): Rewrite.
+
+       * mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Micro-optimise.
  
         * longlong.h: Undo 2009-03-01 change for powerpc64, it gives poor code.
  
+       * mpn/x86/pentium4/sse2/mod_1_1.asm: New file.
+
+       * mpn/powerpc64/mode64/mod_1_1.asm: New file.
+
         * tune/tuneup.c (tune_mod_1): Use more typical divisor, for the benefit
         of machines with early-out multipliers.
  
  2010-05-04  Torbjorn Granlund  <tege@gmplib.org>
  
-       [This change was made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
-
         * tune/tuneup.c (tune_mod_1): Fix typo.
  
+       * mpn/generic/mod_1_1.c: Undo last change.
+       * mpn/x86_64/mod_1_1.asm: Likewise.
+
+2010-05-03  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/jacobi_lehmer.c (jacobi_hook): New function.
+       (mpn_jacobi_subdiv_step): Deleted function.
+       (mpn_jacobi_lehmer): Use general mpn_gcd_subdiv_step.
+
+       * mpn/generic/gcd_subdiv_step.c (mpn_gcd_subdiv_step): Reorganized
+       to use a single hook function.
+       * mpn/generic/gcdext.c (mpn_gcdext): Adapted to new hook
+       interface.
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_hook): New unified hook
+       function.
+       * mpn/generic/gcd.c (gcd_hook): Renamed from gcd_done, and adapted
+       to new hook interface.
+       * gmp-impl.h (gcd_subdiv_step_hook): New typedef, now a function
+       type, not a struct.
+       (mpn_gcdext_hook): Declare.
+
  2010-05-03  Torbjorn Granlund  <tege@gmplib.org>
  
-       [This change was made after the 5.0.2 release, but inserted here to
-       match the change chronology of the main repository.]
+       * mpn/generic/mod_1_1.c: Avoid multiply for 2 limb feed-in.
+       * mpn/generic/mod_1_2.c: Likewise.
+       * mpn/generic/mod_1_3.c: Likewise.
+       * mpn/generic/mod_1_4.c: Likewise.
+       * mpn/x86_64/mod_1_1.asm: Likewise.
+       * mpn/x86_64/mod_1_2.asm: Likewise.
+       * mpn/x86_64/mod_1_4.asm: Likewise.
+       * mpn/x86/k7/mod_1_4.asm: Likewise.
+       * mpn/x86/pentium4/sse2/mod_1_4.asm: Likewise.
+       * mpn/alpha/ev6/mod_1_4.asm: Likewise.
  
         * tune/tuneup.c (tune_mod_1): Measure MOD_1_1_TO_MOD_1_2_THRESHOLD and
         MOD_1_2_TO_MOD_1_4_THRESHOLD before MOD_1U_TO_MOD_1_1_THRESHOLD for
         correctness.
  
+       * mpn/powerpc64/sqr_diagonal.asm: Complete rewrite.
+
+       * mpn/powerpc64/mode64/mod_1_4.asm: New file.
+
+2010-05-02  Torbjorn Granlund  <tege@gmplib.org>
+
+       * config.guess: Recognise power7.
+
+       * configure.in: Major overhaul of powerpc support.
+
+       * mpn/powerpc64/p6/lshift.asm: New file.
+       * mpn/powerpc64/p6/lshiftc.asm: Likewise.
+       * mpn/powerpc64/p6/rshift.asm: Likewise.
+
+2010-04-30  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (powerpc64): Support CPU specific mode-less subdirs.
+
+       * mpn/powerpc64/aix.m4 (PROLOGUE_cpu): Use "named csect" making
+       requested aignment actually honoured.
+
+2010-04-30  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/jacobi_lehmer.c (mpn_jacobi_2): Fixed handling of
+       the case bl == 1. Fixed missing application of reciprocity.
+
+2010-04-29  Niels Möller  <nisse@lysator.liu.se>
+
+       * configure.in (gmp_mpn_functions): Deleted gcdext_subdiv_step.
+
+       * mpn/generic/gcdext.c (mpn_gcdext): Use new generalized
+       mpn_gcd_subdiv_step.
+
+       * mpn/generic/gcdext_lehmer.c (gcdext_update): New function.
+       (gcdext_done): New function.
+       (gcdext_hook): New const hook struct.
+       (mpn_gcdext_lehmer_n): Use new generalized mpn_gcd_subdiv_step.
+
+       * mpn/generic/gcd.c (gcd_done): New function.
+       (gcd_hook): New const hook struct.
+       (mpn_gcd): Adapted to new mpn_gcd_subdiv_step interface.
+
+       * mpn/generic/gcd_subdiv_step.c (mpn_gcd_subdiv_step): Reorganized
+       function. Added hook function pointers to the argument list, so
+       the same function can be used for gcd, gcdext, and jacobi.
+
+       * gmp-impl.h (struct gcd_subdiv_step_hook): New struct.
+       (mpn_gcdext_subdiv_step): Deleted prototype.
+       (struct gcdext_ctx): New struct.
+       (gcdext_hook): Declare const struct.
+       (mpn_gcd_subdiv_step): Updated prototype.
+
+       * mpn/generic/gcdext_subdiv_step.c: Deleted file.
+
+2010-04-28  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/lshift.asm: Rewrite.
+       * mpn/powerpc64/rshift.asm: Likewise.
+       * mpn/powerpc64/mode64/lshiftc.asm: New file.
+
+       * mpn/powerpc64/aix.m4: Align functions to 32-byte boundary.
+       * mpn/powerpc64/darwin.m4: Likewise.
+       * mpn/powerpc64/elf.m4: Likewise.
+
+2010-04-28  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpz/t-jac.c (check_data): Added some more test cases.
+
+       * mpn/generic/jacobi_lehmer.c (mpn_jacobi_2): Bugfix, count
+       trailing zeros, not leading.
+
+2010-04-27  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/p6/mul_basecase.asm: New file.
+
+2010-04-23  Niels Möller  <nisse@lysator.liu.se>
+
+       * gmp-impl.h (MPN_GCD_LEHMER_N_ITCH): Deleted.
+       (mpn_gcd_lehmer_n): Deleted declaration.
+
+       * mpn/generic/gcd.c (gcd_2): Moved from gcd_lehmer.c.
+       (mpn_gcd): Inlined the code from mpn_gcd_lehmer_n. Also use
+       MPN_GCD_SUBDIV_STEP_ITCH rather than MPN_GCD_LEHMER_N_ITCH.
+
+2010-04-22  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/bdiv_dbm1c.asm: Swap multiply insns to make them
+       consecutive, for the benefit of POWER6.
+
+       * mpn/powerpc64/mode64/p6/gmp-mparam.h: New file.
+
+2010-04-21  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/gcd_lehmer.c: Deleted file.
+
+       * mpn/powerpc64/mode64/divrem_1.asm: Swap multiply insns to make them
+       consecutive, for the benefit of POWER6.
+       * mpn/powerpc64/mode64/dive_1.asm: Likewise.
+       * mpn/powerpc64/mode64/divrem_2.asm: Likewise.
+       * mpn/powerpc64/mode64/mul_1.asm: Likewise.
+       * mpn/powerpc64/mode64/aorsmul_1.asm: Likewise.
+
+       * mpn/powerpc64/mode64/aorslshC_n.asm: Swap ldx operands as a temporary
+       workaround for POWER6 pipeline glitch.
+
+2010-04-19  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpz/jacobi.c (mpz_jacobi): New implementation using
+       mpn_jacobi_lehmer. Currently #if:ed out.
+
+       * mpn/generic/jacbase.c (mpn_jacobi_base)
+       [JACOBI_BASE_METHOD < 4]: Support inputs with a >= b.
+
+       * gmp-impl.h (mpn_jacobi_lehmer): Added prototype.
+       (jacobi_table): Declare.
+       (mpn_jacobi_init): New inline function.
+       (mpn_jacobi_finish): Likewise.
+       (mpn_jacobi_update): Likewise.
+
+       * mpn/generic/jacobi_lehmer.c (mpn_jacobi_lehmer): New file, new
+       function.
+
+       * configure.in (gmp_mpn_functions): Added jacobi_lehmer.
+
+2010-04-14  Niels Möller  <nisse@lysator.liu.se>
+
+       * configure.in (gmp_mpn_functions): Added
+       matrix22_mul1_inverse_vector.
+       * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Added
+       matrix22_mul1_inverse_vector.c.
+
+       * gmp-impl.h (mpn_matrix22_mul1_inverse_vector): Updated for
+       rename of mpn_matrix22_mul1_inverse_vector.
+       * mpn/generic/gcd_lehmer.c (mpn_gcd_lehmer_n): Likewise.
+       * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Likewise.
+       * mpn/generic/hgcd.c (hgcd_step): Likewise.
+
+       * mpn/generic/matrix22_mul1_inverse_vector.c
+       (mpn_matrix22_mul1_inverse_vector): New file, function moved and
+       renamed...
+       * mpn/generic/hgcd2.c (mpn_hgcd_mul_matrix1_inverse_vector):
+       ...from here.
+
  2010-04-12  Torbjorn Granlund  <tege@gmplib.org>
  
         * tests/mpn/t-toom6h.c (SIZE_LOG): Define.
@@ -612,45 +4941,310 @@
  
  2010-04-10  Torbjorn Granlund  <tege@gmplib.org>
  
+       * mpn/ia64/lorrshift.asm: Rewrite feed-in and wind-down code.
+
+       * mpn/ia64/aorslsh1_n.asm: Adapt to new aorslsh1_n.
+       * mpn/ia64/aorslsh1_n.asm: Likewise.
+
+       * mpn/ia64/aors_n.asm: Complete rewrite.
+       * mpn/ia64/aorslsh1_n.asm: Likewise.
+
+       * mpn/ia64/add_n_sub_n.asm: Misc cleanups.  Add slotting comments.
+
+       * mpn/ia64/lshiftc.asm: New file.
+
+       * mpn/x86_64/pentium4/gmp-mparam.h: No longer disable rsh1add_n and
+       rsh1sub_n; instead disable rsblsh1_n, addlsh2_n, rsblsh2_n.
+
         * mpn/x86/divrem_2.asm: Use "orb" instead of "or" to work around
         Solaris assembler bug.
         * mpn/x86_64/mpn/x86_64/divrem_2.asm: Likewise.
  
+       * mpn/x86/aors_n.asm: Use operand-less shift-by-1 insn form.
+       * mpn/x86/pentium/aors_n.asm: Likewise.
+       * mpn/x86_64/invert_limb.asm: Likewise.
+
+       * mpn/x86_64/pentium4/aors_n.asm: Let non-nc code fall into nc code.
+
+       * mpn/x86_64/pentium4/rsh1aors_n.asm: New file.
+
  2010-03-25  Torbjorn Granlund  <tege@gmplib.org>
  
+       * mpn/ia64/add_n_sub_n.asm: New file.
+
         * mpn/generic/toom33_mul.c: Fix mpn_add_n_sub_n usage.
         * mpn/generic/toom3_sqr.c: Likewise.
         * mpn/generic/toom63_mul.c: Likewise.
  
+       * mpn/generic/add_n_sub_n.c: Renamed from addsub_n.c.
+
+2010-03-23  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/divrem_2.asm: Use mpn_invert_limb instead of div insn.
+
+       * mpn/ia64/aorslshC_n.asm: New file, generalised from last iteration of
+       aorslsh1_n.asm.
+       * mpn/ia64/aorslsh1_n.asm: Use aorslshC_n.asm.
+       * mpn/ia64/aorslsh1_n.asm: New file, use aorslshC_n.asm.
+
+2010-03-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/invert_limb.asm: Rewrite to exploit cancellation
+       in the Newton iteration.
+
+2010-03-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * mpn/generic/toom_interpolate_8pts.c: Use mpn_sublsh2_n.
+
+2010-03-20  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/powerpc64/mode64/aorslshC_n.asm: New file, generalised from
+       last iteration of aorslsh1_n.asm.
+       * mpn/powerpc64/mode64/aorslsh1_n.asm: Use aorslshC_n.asm.
+       * mpn/powerpc64/mode64/aorslsh1_n.asm: New file, use aorslshC_n.asm.
+
  2010-03-19  Torbjorn Granlund  <tege@gmplib.org>
  
+       * mpn/x86_64/nano/dive_1.asm: New file.
+
+       * mpn/x86_64/divrem_1.asm: Avoid shld since it is slow on several CPU
+       types.  Unconditionally provide code for normalised and unnormalised
+       divisors.  Cleanup labels.
+
+       * mpn/x86_64/core2/divrem_1.asm: Remove special code for normalised
+       divisors.  Cleanup labels.
+
         * mpn/generic/toom_interpolate_6pts.c: Call mpn_sublsh2_n and
         mpn_sublsh_n with correct args.
  
+       * tests/devel/try.c: Use enum for TYPE_*.
+
+       * tests/devel/try.c: Test mpn_sublsh2_n.
+       * tests/refmpn.c (refmpn_sublsh2_n): New function.
+       * tests/tests.h (refmpn_sublsh2_n): Declare.
+
+       * mpn/powerpc64/mode64/aorslsh1_n.asm: New file, with faster
+       mpn_addlsh1_n and mpn_sublsh1_n.
+       * mpn/powerpc64/mode64/addlsh1_n.asm: Delete.
+       * mpn/powerpc64/mode64/sublsh1_n.asm: Delete.
+
+2010-03-18  Torbjorn Granlund  <tege@gmplib.org>
+
+       * configure.in (*-*-aix): Define gcc_32_cflags_maybe, ar_32_flags and
+       nm_32_flags.
+
+       * mpn/x86/pentium4/sse2/addlsh1_n.asm: Tune for slightly better speed.
+       Misc cleanups.  Add cycle table.
+
+       * mpn/x86_64/copyi.asm: Update cycle table.
+       * mpn/x86_64/copyd.asm: Likewise.
+       * mpn/x86_64/rsh1aors_n.asm: Likewise.
+       * mpn/x86_64/dive_1.asm: Likewise.
+
+       * mpn/x86/pentium4/sse2/add_n.asm: Misc cleanups.  Add cycle table.
+       * mpn/x86/pentium4/sse2/sub_n.asm: Likewise.
+
+2010-03-16  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/divrem_1.asm: Use mpn_invert_limb instead of div insn.
+       * mpn/x86_64/core2/divrem_1.asm: Likewise.
+
+       * tune/speed.c (routine): Add FLAG_R_OPTIONAL for many binops.
+
+2010-03-15  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/alpha/ev6/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+
+       * mpn/ia64/aors_n.asm: Insert explicitly typed nops to trigger intended
+       bundling.
+       * mpn/ia64/aorslsh1_n.asm: Likewise.
+       * mpn/ia64/dive_1.asm: Likewise.
+
+2010-03-13  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86/pentium4/sse2/submul_1.asm: Rewrite.
+
+       * mpn/powerpc64/mode64/aorsmul_1.asm: New file, faster than old code
+       for both mpn_addmul_1 and mpn_submul_1.
+       * mpn/powerpc64/mode64/addmul_1.asm: Remove.
+       * mpn/powerpc64/mode64/submul_1.asm: Remove.
+
+2010-03-11  Niels Möller  <nisse@lysator.liu.se>
+
+       * mpn/generic/gcd_lehmer.c (gcd_2): Use sub_ddmmss.
+
+       * mpn/generic/jacbase.c (mpn_jacobi_base): Reorganized the
+       JACOBI_BASE_METHOD 4 slightly. Now requires that b > 1.
+
+2010-03-10  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/divrem_1.asm: Make fraction code take documented # of
+       cycles.  Annotate code for more CPUs.  Misc cleanups.
+       * mpn/x86_64/core2/divrem_1.asm: Annotate code for more CPUs.
+
+       * mpn/alpha/ev6/mod_1_4.asm: New file.
+
+       * mpn/ia64/mod_34lsub1.asm: New file.
+
+       * doc/gmp.texi (Language Bindings): Update Python site, add Ruby.
+
+2010-03-10  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/tuneup.c (tune_jacobi_base): Consider mpn_jacobi_base_4.
+       * tune/speed.c (routine): Added mpn_jacobi_base_4.
+       * tune/common.c (speed_mpn_jacobi_base_4): New function.
+       * tune/speed.h (speed_mpn_jacobi_base_4): Declare it.
+       * tune/Makefile.am (libspeed_la_SOURCES): Added jacbase4.c.
+       * tune/jacbase4.c: New file.
+
+       * mpn/generic/jacbase.c (mpn_jacobi_base): New function, for
+       JACOBI_BASE_METHOD 4.
+
+2010-03-09  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpz/t-jac.c (check_large_quotients): Also generate inputs
+       with large quotients and a large gcd.
+
+2010-03-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/mpz/t-bin.c (randomwalk): New test-generator function.
+
+2010-03-07  Torbjorn Granlund  <tege@gmplib.org>
+
+       * tune/speed.c (routine): Force r argument for several mod_1 calls.
+
  2010-03-06  Torbjorn Granlund  <tege@gmplib.org>
  
+       * mpn/x86_64/divrem_1.asm: Disable SPECIAL_CODE_FOR_NORMALIZED_DIVISOR.
+       Misc clean up.
+
+       * mpn/x86_64/mod_1_1.asm: New file.
+       * mpn/x86_64/mod_1_2.asm: New file.
+       * mpn/x86_64/mod_1_4.asm: Update cycle counts.
+
         * tests/tests.h (TESTS_REPS): Fix typo.
  
+2010-03-03  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/x86_64/core2/divrem_1.asm: New file.
+
+2010-02-26  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/speed.c (routine): Added udiv_qrnnd_preinv3.
+
+       * tune/common.c (speed_udiv_qrnnd_preinv3): New function.
+       * tune/speed.h: Added prototype for it.
+
+2010-02-26  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpz/t-jac.c (check_large_quotients): New test. Currently
+       disabled, since it's quite slow.
+       (mpz_nextprime_step): New function.
+
  2010-02-26  Torbjorn Granlund  <tege@gmplib.org>
  
         * mpn/pa64/aors_n.asm: Fix typo in last change.
  
+2010-02-25  Niels Möller  <nisse@lysator.liu.se>
+
+       * tests/mpz/t-jac.c (ref_jacobi): New reference implementation,
+       using factorization and legendre symbols computed by powm.
+
+       * tests/devel/try.c (param_init, call): Don't pass negative values
+       for the second argument to mpz_jacobi and refmpz_jacobi.
+
+       * tests/refmpz.c (refmpz_jacobi): Require that b is odd and positive.
+
+       * tests/devel/try.c (param_init): Support mpz_legendre.
+       (choice_array): Added mpz_kronecker (apparently forgotten) and
+       mpz_legendre.
+       (call): Added TYPE_MPZ_LEGENDRE.
+       (try_one): Added support for DATA_SRC1_ODD_PRIME.
+
+       * tests/refmpz.c (refmpz_legendre): Rewrote using powm.
+
  2010-02-25  Torbjorn Granlund  <tege@gmplib.org>
  
+       * config.guess: Make "corei" default for unrecognised Intel P6 CPUs.
+
         * tests/mpz/t-perfpow.c (check_random): Use mp_limb_t type for limb
         variables.
  
+       * tests/mpn/t-toom6h.c (COUNT): Define.
+       * tests/mpn/t-toom8h.c (COUNT): Define.
+
         * tests/mpn/t-div.c: Cast a switch index to placate HP's cc.
         * tests/mpn/t-bdiv.c: Likewise.
  
         * mpn/pa64/aors_n.asm: Fix support of the 2.0n ABI.
  
+2010-02-24 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+       * tests/mpz/t-bin.c (data): Replace (2k,k), tested by twos ().
+       * tests/mpf/t-inp_str.c (data): Test also "+" in the exponent.
+
  2010-02-23  Torbjorn Granlund  <tege@gmplib.org>
  
-       * mpn/generic/mod_1_3.c: Cast a switch index.
+       * mpn/generic/mod_1_3.c: Cast a switch index to placate HP's cc.
  
         * mpn/generic/sqrtrem.c: Use CNST_LIMB.
  
+2010-02-20  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/speed.h (mpn_gcd_accel): Deleted prototype.
+       (mpn_hgcd_lehmer): New prototype.
+       (MPN_HGCD_LEHMER_ITCH): New macro (previously in gmp-impl.h).
+
+       * tune/Makefile.am (libspeed_la_SOURCES): Added hgcd_lehmer.c.
+       * tune/hgcd_lehmer.c: New file.
+       * tune/gcd_accel.c: Deleted obsolete file.
+
+       * gmp-impl.h (MPN_HGCD_LEHMER_ITCH): Deleted macro.
+
+       * mpn/generic/hgcd.c (mpn_hgcd_lehmer): Deleted function,
+       (mpn_hgcd): Don't call mpn_hgcd_lehmer, instead use inlined loop
+       around hgcd_step.
+       (mpn_hgcd_itch): Substitute n for MPN_HGCD_LEHMER_ITCH (n).
+
+2010-02-19  Niels Möller  <nisse@lysator.liu.se>
+
+       * Makefile.am (mpn/jacobitab.h): Added the rules needed to
+       generate this file.
+
+       * gen-jacobitab.c: New file.
+
+2010-02-19  Torbjorn Granlund  <tege@gmplib.org>
+
+       * mpn/generic/powm.c: Honour SQR_BASECASE_THRESHOLD in innerloop
+       expansions.
+
+2010-02-16  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/time.c (cgt_works_p): Added rudimentary sanity check for
+       clock_gettime working.
+
+2010-02-15  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/time.c (speed_time_init): Make use of cycle counter
+       configurable, via the speed_option_cycles_broken flag.
+       * tune/common.c (speed_option_cycles_broken): New global variable.
+       (speed_option_set): Recognize option "cycles-broken".
+
+       * tune/time.c (cycles_works_p): Deleted hack to disable cycle
+       counter on linux. Needs to be replaced by something more
+       selective.
+
+2010-02-11  Niels Möller  <nisse@lysator.liu.se>
+
+       * tune/time.c (speed_time_init): Fix speed_time_string when using
+       clock_gettime.
+       (cycles_works_p): On linux, don't use the cycle counter.
+
+       * tune/Makefile.am: Add $(TUNE_LIBS) when linking programs.
+
+       * configure.in: Check if -lrt is needed for clock_gettime, and if
+       so, add that flag to TUNE_LIBS.
+
  2010-02-07  Torbjorn Granlund  <tege@gmplib.org>
  
         * tune/tuneup.c (tune_redc): Set min_size and min_is_always when
@@ -1294,7 +5888,7 @@
  
         * mpn/generic/mu_div_q.c (mpn_mu_div_q_itch): New function.
  
-2009-12-22  Niels Möller  <<nisse@lysator.liu.se>>
+2009-12-22  Niels Möller  <nisse@lysator.liu.se>
  
         * mpn/generic/sbpi1_div_q.c: Use udiv_qr_3by2.  Intended to change
         nothing after preprocessing.
@@ -2295,7 +6889,7 @@
         * tune/Makefile.am (libspeed_la_SOURCES): Remove sb_div.c and sb_inv.c.
         (TUNE_MPN_SRCS_BASIC): Remove sb_divrem_mn.c.
         * tune/common.c (speed_mpn_dcpi1_div_qr_n): New function.
-        Remove mpn_sb_divrem_mn related functions.
+       Remove mpn_sb_divrem_mn related functions.
         * tune/speed.c (routine): Remove entries related to mpn_dc_divrem and
         mpn_sb_divrem.
         (routine): New entry for mpn_dc_div_qr_n.
@@ -2769,7 +7363,7 @@
  
         * nextprime.c: New file.
         * gmp-impl.h (gmp_primesieve_t, gmp_init_primesieve, gmp_nextprime):
-       Declare
+       Declare.
         * Makefile.am (libgmp_la_SOURCES): Add nextprime.c.
  
  2009-06-11  Torbjorn Granlund  <tege@gmplib.org>
@@ -20227,8 +24821,8 @@
         * mpn/Makefile.am: Remove incorrect comment.
         * mpn/Makefile.in: Regenerate.
  
-       * gmp.h: Rename most of the random number functions, structs and
-         some of the struct members.
+       * gmp.h: Rename most of the random number functions, structs and some
+       of the struct members.
         * rand.c (gmp_randinit): Likewise.
         * randclr.c (gmp_randclear): Likewise.
         * randlc.c (gmp_randinit_lc): Likewise.
@@ -25576,7 +30170,7 @@ Fri Dec 13 23:10:02 1991  Torbjorn Granlund  (tege@zevs.sics.se)
  
         * mpz_do_sqrt: Simplify special case for U == 0.
         * m*sqrt*.c, mpz_perfsqr.c (mpz_perfect_square_p):
-         Rename _mpz_impl_sqrt to _mpz_do_sqrt.
+       Rename _mpz_impl_sqrt to _mpz_do_sqrt.
  
  Fri Dec 13 12:52:28 1991  Torbjorn Granlund  (tege@zevs.sics.se)
  
diff --git a/Makefile.am b/Makefile.am

index 37f4cc622a4bd43457fff593b0671d6b505f0342..ad5810a8e81f7c96d3144e306f7c51fd800d5755 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -2,7 +2,7 @@
  
  
  # Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+# 2006, 2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -25,7 +25,7 @@
  # Makefiles in subdirectories, but here we must omit it so automake gives
  # the actual ansi2knr build rule, not "cd $(top_builddir) && make ansi2knr".
  #
-AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies ansi2knr
+# AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies
  
  
  # Libtool -version-info for libgmp.la and libmp.la.  See "Versioning" in the
@@ -71,6 +71,10 @@ AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies ansi2knr
  #        5.0.3   10:3:0    6:3:2   4:23:1
  #        5.0.4   10:4:0    6:4:2   4:24:1
  #        5.0.5   10:5:0    6:5:2   4:25:1
+#        5.1.0   11:0:1    7:0:3     -
+#        5.1.1   11:1:1    7:1:3     -
+#        5.1.2   11:2:1    7:2:3     -
+#        5.1.3   11:3:1    7:3:3     -
  #
  # Starting at 3:0:0 is a slight abuse of the versioning system, but it
  # ensures we're past soname libgmp.so.2, which was used on Debian GNU/Linux
@@ -83,20 +87,16 @@ AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies ansi2knr
  # it's still good to get the shared library filename (like
  # libgmpxx.so.3.0.4) incrementing, to make it clear which GMP it's from.
  
-LIBGMP_LT_CURRENT =  10
-LIBGMP_LT_REVISION = 5
-LIBGMP_LT_AGE =      0
+LIBGMP_LT_CURRENT    = 11
+LIBGMP_LT_REVISION   = 3
+LIBGMP_LT_AGE        = 1
  
-LIBGMPXX_LT_CURRENT =  6
-LIBGMPXX_LT_REVISION = 5
-LIBGMPXX_LT_AGE =      2
+LIBGMPXX_LT_CURRENT  = 7
+LIBGMPXX_LT_REVISION = 3
+LIBGMPXX_LT_AGE      = 3
  
-LIBMP_LT_CURRENT =  4
-LIBMP_LT_REVISION = 25
-LIBMP_LT_AGE =      1
  
-
-SUBDIRS = tests mpn mpz mpq mpf printf scanf cxx mpbsd demos tune doc
+SUBDIRS = tests mpn mpz mpq mpf printf scanf rand cxx demos tune doc
  
  EXTRA_DIST = configfsf.guess configfsf.sub .gdbinit INSTALL.autoconf
  
@@ -116,8 +116,8 @@ EXTRA_DIST += gmpxx.h
  #
  includeexecdir = $(exec_prefix)/include
  include_HEADERS = $(GMPXX_HEADERS_OPTION)
-nodist_includeexec_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION)
-lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION) $(MPBSD_LTLIBRARIES_OPTION)
+nodist_includeexec_HEADERS = gmp.h
+lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION)
  
  BUILT_SOURCES = gmp.h
  
@@ -161,16 +161,17 @@ MPZ_OBJECTS = mpz/abs$U.lo mpz/add$U.lo mpz/add_ui$U.lo                   \
    mpz/cong$U.lo mpz/cong_2exp$U.lo mpz/cong_ui$U.lo                    \
    mpz/divexact$U.lo mpz/divegcd$U.lo mpz/dive_ui$U.lo                  \
    mpz/divis$U.lo mpz/divis_ui$U.lo mpz/divis_2exp$U.lo mpz/dump$U.lo   \
-  mpz/export$U.lo mpz/fac_ui$U.lo mpz/fdiv_q$U.lo                      \
+  mpz/export$U.lo mpz/mfac_uiui$U.lo                                   \
+  mpz/2fac_ui$U.lo mpz/fac_ui$U.lo mpz/oddfac_1$U.lo mpz/prodlimbs$U.lo        \
    mpz/fdiv_q_ui$U.lo mpz/fdiv_qr$U.lo mpz/fdiv_qr_ui$U.lo              \
-  mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo                                   \
+  mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo mpz/fdiv_q$U.lo                   \
    mpz/fdiv_ui$U.lo mpz/fib_ui$U.lo mpz/fib2_ui$U.lo mpz/fits_sint$U.lo \
    mpz/fits_slong$U.lo mpz/fits_sshort$U.lo mpz/fits_uint$U.lo          \
    mpz/fits_ulong$U.lo mpz/fits_ushort$U.lo mpz/gcd$U.lo                        \
    mpz/gcd_ui$U.lo mpz/gcdext$U.lo mpz/get_d$U.lo mpz/get_d_2exp$U.lo   \
    mpz/get_si$U.lo mpz/get_str$U.lo mpz/get_ui$U.lo mpz/getlimbn$U.lo   \
    mpz/hamdist$U.lo                                                     \
-  mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo          \
+  mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo          \
    mpz/inp_raw$U.lo mpz/inp_str$U.lo mpz/invert$U.lo                    \
    mpz/ior$U.lo mpz/iset$U.lo mpz/iset_d$U.lo mpz/iset_si$U.lo          \
    mpz/iset_str$U.lo mpz/iset_ui$U.lo mpz/jacobi$U.lo mpz/kronsz$U.lo   \
@@ -181,7 +182,8 @@ MPZ_OBJECTS = mpz/abs$U.lo mpz/add$U.lo mpz/add_ui$U.lo                     \
    mpz/n_pow_ui$U.lo mpz/neg$U.lo mpz/nextprime$U.lo                    \
    mpz/out_raw$U.lo mpz/out_str$U.lo mpz/perfpow$U.lo mpz/perfsqr$U.lo  \
    mpz/popcount$U.lo mpz/pow_ui$U.lo mpz/powm$U.lo mpz/powm_sec$U.lo    \
-  mpz/powm_ui$U.lo mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo  \
+  mpz/powm_ui$U.lo mpz/primorial_ui$U.lo                               \
+  mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo                   \
    mpz/realloc$U.lo mpz/realloc2$U.lo mpz/remove$U.lo                   \
    mpz/root$U.lo mpz/rootrem$U.lo mpz/rrandomb$U.lo mpz/scan0$U.lo      \
    mpz/scan1$U.lo mpz/set$U.lo mpz/set_d$U.lo mpz/set_f$U.lo            \
@@ -225,19 +227,18 @@ SCANF_OBJECTS =                                                   \
    scanf/scanf$U.lo scanf/sscanf$U.lo scanf/sscanffuns$U.lo     \
    scanf/vfscanf$U.lo scanf/vscanf$U.lo scanf/vsscanf$U.lo
  
+RANDOM_OBJECTS =                                                       \
+  rand/rand$U.lo rand/randclr$U.lo rand/randdef$U.lo rand/randiset$U.lo        \
+  rand/randlc2s$U.lo rand/randlc2x$U.lo rand/randmt$U.lo               \
+  rand/randmts$U.lo rand/rands$U.lo rand/randsd$U.lo rand/randsdui$U.lo        \
+  rand/randbui$U.lo rand/randmui$U.lo
+
  # no $U for C++ files
  CXX_OBJECTS =                                                          \
    cxx/isfuns.lo cxx/ismpf.lo cxx/ismpq.lo cxx/ismpz.lo cxx/ismpznw.lo  \
-  cxx/osdoprnti.lo cxx/osfuns.lo                                       \
+  cxx/limits.lo cxx/osdoprnti.lo cxx/osfuns.lo                         \
    cxx/osmpf.lo cxx/osmpq.lo cxx/osmpz.lo
  
-MPBSD_OBJECTS = mpbsd/add$U.lo mpbsd/tdiv_qr$U.lo mpbsd/set$U.lo       \
-  mpbsd/powm$U.lo mpbsd/sub$U.lo mpbsd/cmp$U.lo mpbsd/mfree$U.lo       \
-  mpbsd/mtox$U.lo mpbsd/realloc$U.lo mpbsd/gcd$U.lo mpbsd/itom$U.lo    \
-  mpbsd/min$U.lo mpbsd/mul$U.lo mpbsd/mout$U.lo mpbsd/rpow$U.lo                \
-  mpbsd/sdiv$U.lo mpbsd/sqrtrem$U.lo mpbsd/xtom$U.lo
-
-
  # In libtool 1.5 it doesn't work to build libgmp.la from the convenience
  # libraries like mpz/libmpz.la.  Or rather it works, but it ends up putting
  # PIC objects into libgmp.a if shared and static are both built.  (The PIC
@@ -252,17 +253,15 @@ MPBSD_OBJECTS = mpbsd/add$U.lo mpbsd/tdiv_qr$U.lo mpbsd/set$U.lo  \
  # -export-symbols, since the tune and speed programs, and perhaps some of
  # the test programs, want to access undocumented symbols.
  
-libgmp_la_SOURCES = gmp-impl.h longlong.h randmt.h                     \
+libgmp_la_SOURCES = gmp-impl.h longlong.h                              \
    assert.c compat.c errno.c extract-dbl.c invalid.c memory.c           \
    mp_bpl.c mp_clz_tab.c mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c \
-  rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
-  randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c version.c  \
-  nextprime.c
+  version.c nextprime.c primesieve.c
  EXTRA_libgmp_la_SOURCES = tal-debug.c tal-notreent.c tal-reent.c
  libgmp_la_DEPENDENCIES = @TAL_OBJECT@          \
    $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS) \
    $(MPN_OBJECTS) @mpn_objs_in_libgmp@          \
-  $(PRINTF_OBJECTS)  $(SCANF_OBJECTS)
+  $(PRINTF_OBJECTS)  $(SCANF_OBJECTS) $(RANDOM_OBJECTS)
  libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES)
  libgmp_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMP_LDFLAGS) \
    -version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)
@@ -282,28 +281,6 @@ libgmpxx_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMPXX_LDFLAGS) \
    -version-info $(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)
  
  
-# The selected mpz objects here support mpz/powm.c (built as mpbsd/powm.lo)
-# and can probably be removed when that switches to an mpn implementation.
-# (Apart from mpz/n_pow_ui$U.lo, which supports mpbsd/rpow.c)
-
-if WANT_MPBSD
-MPBSD_HEADERS_OPTION = mp.h
-MPBSD_LTLIBRARIES_OPTION = libmp.la
-endif
-BUILT_SOURCES += mp.h
-libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c     \
-  mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c nextprime.c
-libmp_la_DEPENDENCIES = $(srcdir)/libmp.sym                            \
-  @TAL_OBJECT@ $(MPBSD_OBJECTS) $(MPN_OBJECTS) @mpn_objs_in_libmp@     \
-  mpz/add$U.lo mpz/gcdext$U.lo mpz/invert$U.lo mpz/mul$U.lo            \
-  mpz/n_pow_ui$U.lo mpz/realloc$U.lo mpz/set$U.lo mpz/sub$U.lo         \
-  mpz/tdiv_q$U.lo
-libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
-libmp_la_LDFLAGS = $(GMP_LDFLAGS) \
-  -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE) \
-  -export-symbols $(srcdir)/libmp.sym
-EXTRA_DIST += libmp.sym
-
  
  install-data-hook:
         @echo ''
@@ -322,9 +299,6 @@ install-data-hook:
         @echo ''
  
  
-# The ansi2knr setups for the build programs are the same as the normal
-# automake ansi2knr rules, but using $(CC_FOR_BUILD) instead of $(CC).
-#
  # The "test -f" support for srcdir!=builddir is similar to the automake .c.o
  # etc rules, but with each foo.c explicitly, since $< is not portable
  # outside an inference rule.
@@ -341,19 +315,16 @@ install-data-hook:
  # the .h files are not properly expressed for the various objects that use
  # them.
  
-EXTRA_DIST += dumbmp.c
-
-mpz/fac_ui.h: gen-fac_ui$(EXEEXT_FOR_BUILD)
-       ./gen-fac_ui $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpz/fac_ui.h || (rm -f mpz/fac_ui.h; exit 1)
-BUILT_SOURCES += mpz/fac_ui.h
+EXTRA_DIST += bootstrap.c
  
-gen-fac_ui$(EXEEXT_FOR_BUILD): gen-fac_ui$(U_FOR_BUILD).c dumbmp.c
-       $(CC_FOR_BUILD) `test -f 'gen-fac_ui$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac_ui$(U_FOR_BUILD).c -o gen-fac_ui$(EXEEXT_FOR_BUILD)
-DISTCLEANFILES += gen-fac_ui$(EXEEXT_FOR_BUILD)
-EXTRA_DIST += gen-fac_ui.c
+fac_table.h: gen-fac$(EXEEXT_FOR_BUILD)
+       ./gen-fac $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >fac_table.h || (rm -f fac_table.h; exit 1)
+BUILT_SOURCES += fac_table.h
  
-gen-fac_ui_.c: gen-fac_ui.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fac_ui.c; then echo $(srcdir)/gen-fac_ui.c; else echo gen-fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fac_ui_.c || rm -f gen-fac_ui_.c
+gen-fac$(EXEEXT_FOR_BUILD): gen-fac$(U_FOR_BUILD).c bootstrap.c
+       $(CC_FOR_BUILD) `test -f 'gen-fac$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac$(U_FOR_BUILD).c -o gen-fac$(EXEEXT_FOR_BUILD)
+DISTCLEANFILES += gen-fac$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-fac.c
  
  
  fib_table.h: gen-fib$(EXEEXT_FOR_BUILD)
@@ -364,14 +335,11 @@ mpn/fib_table.c: gen-fib$(EXEEXT_FOR_BUILD)
         ./gen-fib table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/fib_table.c || (rm -f mpn/fib_table.c; exit 1)
  BUILT_SOURCES += mpn/fib_table.c
  
-gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c dumbmp.c
+gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c bootstrap.c
         $(CC_FOR_BUILD) `test -f 'gen-fib$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fib$(U_FOR_BUILD).c -o gen-fib$(EXEEXT_FOR_BUILD)
  DISTCLEANFILES += gen-fib$(EXEEXT_FOR_BUILD)
  EXTRA_DIST += gen-fib.c
  
-gen-fib_.c: gen-fib.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fib.c; then echo $(srcdir)/gen-fib.c; else echo gen-fib.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fib_.c || rm -f gen-fib_.c
-
  
  mp_bases.h: gen-bases$(EXEEXT_FOR_BUILD)
         ./gen-bases header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mp_bases.h || (rm -f mp_bases.h; exit 1)
@@ -381,43 +349,44 @@ mpn/mp_bases.c: gen-bases$(EXEEXT_FOR_BUILD)
         ./gen-bases table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/mp_bases.c || (rm -f mpn/mp_bases.c; exit 1)
  BUILT_SOURCES += mpn/mp_bases.c
  
-gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c dumbmp.c
+gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c bootstrap.c
         $(CC_FOR_BUILD) `test -f 'gen-bases$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-bases$(U_FOR_BUILD).c -o gen-bases$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
  DISTCLEANFILES += gen-bases$(EXEEXT_FOR_BUILD)
  EXTRA_DIST += gen-bases.c
  
-gen-bases_.c: gen-bases.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-bases.c; then echo $(srcdir)/gen-bases.c; else echo gen-bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-bases_.c || rm -f gen-bases_.c
-
-
  
  trialdivtab.h: gen-trialdivtab$(EXEEXT_FOR_BUILD)
         ./gen-trialdivtab $(GMP_LIMB_BITS) 8000 >trialdivtab.h || (rm -f trialdivtab.h; exit 1)
  BUILT_SOURCES += trialdivtab.h
  
-gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c dumbmp.c
+gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c bootstrap.c
         $(CC_FOR_BUILD) `test -f 'gen-trialdivtab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-trialdivtab$(U_FOR_BUILD).c -o gen-trialdivtab$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
  DISTCLEANFILES += gen-trialdivtab$(EXEEXT_FOR_BUILD)
  EXTRA_DIST += gen-trialdivtab.c
  
-gen-trialdivtab_.c: gen-trialdivtab.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-trialdivtab.c; then echo $(srcdir)/gen-trialdivtab.c; else echo gen-trialdivtab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-trialdivtab_.c || rm -f gen-trialdivtab_.c
  
+mpn/jacobitab.h: gen-jacobitab$(EXEEXT_FOR_BUILD)
+       ./gen-jacobitab >mpn/jacobitab.h || (rm -f mpn/jacobitab.h; exit 1)
+BUILT_SOURCES += mpn/jacobitab.h
  
+gen-jacobitab$(EXEEXT_FOR_BUILD): gen-jacobitab$(U_FOR_BUILD).c
+       $(CC_FOR_BUILD) `test -f 'gen-jacobitab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-jacobitab$(U_FOR_BUILD).c -o gen-jacobitab$(EXEEXT_FOR_BUILD)
+DISTCLEANFILES += gen-jacobitab$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-jacobitab.c
  
  
  mpn/perfsqr.h: gen-psqr$(EXEEXT_FOR_BUILD)
         ./gen-psqr $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/perfsqr.h || (rm -f mpn/perfsqr.h; exit 1)
  BUILT_SOURCES += mpn/perfsqr.h
  
-gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c dumbmp.c
+gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c bootstrap.c
         $(CC_FOR_BUILD) `test -f 'gen-psqr$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-psqr$(U_FOR_BUILD).c -o gen-psqr$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
  DISTCLEANFILES += gen-psqr$(EXEEXT_FOR_BUILD)
  EXTRA_DIST += gen-psqr.c
  
-gen-psqr_.c: gen-psqr.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-psqr.c; then echo $(srcdir)/gen-psqr.c; else echo gen-psqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-psqr_.c || rm -f gen-psqr_.c
-
+# Distribute mini-gmp. Test sources copied by dist-hook.
+EXTRA_DIST += mini-gmp/README mini-gmp/mini-gmp.c mini-gmp/mini-gmp.h \
+             mini-gmp/tests/Makefile mini-gmp/tests/run-tests
  
  # Avoid: CVS - cvs directories
  #        *~  - emacs backups
@@ -429,6 +398,33 @@ gen-psqr_.c: gen-psqr.c $(ANSI2KNR)
  dist-hook:
         -find $(distdir) \( -name CVS -type d \) -o -name "*~" -o -name ".#*" \
                 | xargs rm -rf
+       cp "$(srcdir)"/mini-gmp/tests/*.[ch] "$(distdir)/mini-gmp/tests"
  #      grep -F $(VERSION) $(srcdir)/Makefile.am \
-#              | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE) *$(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)"
+#              | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)"
  #      test -z "`sed -n 's/^# *[0-9]*\.[0-9]*\.[0-9]* *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\).*/A\1\nB\2\nC\3/p' $(srcdir)/Makefile.am | grep -v 'A6:3:3\|B3:5:0\|C4:7:1' | sort | uniq -d`"
+
+.PHONY: check-mini-gmp clean-mini-gmp
+
+check-mini-gmp:
+       abs_srcdir="`cd $(srcdir) && pwd`" ; \
+       $(MKDIR_P) mini-gmp/tests \
+       && cd mini-gmp/tests \
+       && LD_LIBRARY_PATH="../../.libs:$$LD_LIBRARY_PATH" \
+          DYLD_LIBRARY_PATH="../../.libs:$$DYLD_LIBRARY_PATH" \
+          $(MAKE) -f "$$abs_srcdir/mini-gmp/tests/Makefile" \
+               VPATH="$$abs_srcdir/mini-gmp/tests" \
+               srcdir="$$abs_srcdir/mini-gmp/tests" \
+               MINI_GMP_DIR="$$abs_srcdir/mini-gmp" \
+               LDFLAGS="-L../../.libs" \
+               LIBS="-lgmp -lm" \
+               CC="$(CC_FOR_BUILD)" EXTRA_CFLAGS="-g -I../.." check
+
+clean-mini-gmp:
+       if [ -d mini-gmp/tests ] ; then \
+         abs_srcdir="`cd $(srcdir) && pwd`" ; \
+         cd mini-gmp/tests \
+         && $(MAKE) -f "$$abs_srcdir/mini-gmp/tests/Makefile" clean ; \
+       fi
+
+clean-local: clean-mini-gmp
+distclean-local: clean-mini-gmp
diff --git a/Makefile.in b/Makefile.in

index 7728bf1ea0aaab59f82ac587c1257a1a6a1736a9..c51a2600371c5c312168dd7724c871c4a484805d 100644 (file)
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -16,7 +16,7 @@
  @SET_MAKE@
  
  # Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+# 2006, 2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -33,8 +33,91 @@
  # You should have received a copy of the GNU Lesser General Public License
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
+# The following options are the same as AM_INIT_AUTOMAKE in configure.in,
+# except no $(top_builddir) on ansi2knr.  That directory is wanted for the
+# Makefiles in subdirectories, but here we must omit it so automake gives
+# the actual ansi2knr build rule, not "cd $(top_builddir) && make ansi2knr".
+#
+# AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies
+
+# Libtool -version-info for libgmp.la and libmp.la.  See "Versioning" in the
+# libtool manual.
+#
+#      CURRENT:REVISION:AGE
+#
+# 1. No interfaces changed, only implementations (good): Increment REVISION.
+#
+# 2. Interfaces added, none removed (good): Increment CURRENT, increment
+#    AGE, set REVISION to 0.
+#
+# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
+#    CURRENT, set AGE and REVISION to 0.
+#
+# Do this separately for libgmp, libgmpxx and libmp, and only for releases.
+#
+#        GMP      -version-info
+#       release   libgmp  libgmpxx libmp
+#        2.0.x      -        -       -
+#        3.0      3:0:0      -     3:0:0
+#        3.0.1    3:1:0      -     3:0:0
+#        3.1      4:0:1      -     4:0:1
+#        3.1.1    4:1:1      -     4:1:1
+#        4.0      5:0:2    3:0:0   4:2:1
+#        4.0.1    5:1:2    3:1:0   4:3:1
+#        4.1      6:0:3    3:2:0   4:4:1
+#        4.1.1    6:1:3    3:3:0   4:5:1
+#        4.1.2    6:2:3    3:4:0   4:6:1
+#        4.1.3    6:3:3    3:5:0   4:7:1
+#        4.1.4    6:3:3    3:5:0   4:7:1       WRONG, same as 4.1.3!
+#        4.2      6:0:3    3:2:0   4:4:1       REALLY WRONG, same as 4.1!
+#        4.2.1    7:1:4    4:1:1   4:10:1      WRONG for libgmpxx
+#        4.2.2    7:2:4    4:2:0   4:11:1
+#        4.2.3    7:3:4    4:3:0   4:12:1
+#        4.2.4    7:4:4    4:4:0   4:13:1
+#        4.3.0    8:0:5    5:0:1   4:14:1
+#        4.3.1    8:1:5    5:1:1   4:15:1      WRONG Really used same as 4.3.0
+#        4.3.2    8:2:5    5:2:1   4:16:1
+#        5.0.0    9:0:6    6:0:2   4:20:1      Should have been 10:0:0
+#        5.0.1   10:1:0    6:1:2   4:21:1
+#        5.0.2   10:2:0    6:2:2   4:22:1
+#        5.0.3   10:3:0    6:3:2   4:23:1
+#        5.0.4   10:4:0    6:4:2   4:24:1
+#        5.0.5   10:5:0    6:5:2   4:25:1
+#        5.1.0   11:0:1    7:0:3     -
+#        5.1.1   11:1:1    7:1:3     -
+#        5.1.2   11:2:1    7:2:3     -
+#        5.1.3   11:3:1    7:3:3     -
+#
+# Starting at 3:0:0 is a slight abuse of the versioning system, but it
+# ensures we're past soname libgmp.so.2, which was used on Debian GNU/Linux
+# packages of gmp 2.  Pretend gmp 2 was 2:0:0, so the interface changes for
+# gmp 3 mean 3:0:0 is right.
+#
+# We interpret "implementation changed" in item "1." above as meaning any
+# release, ie. the REVISION is incremented every time (if nothing else).
+# Even if we thought the code generated will be identical on all systems,
+# it's still good to get the shared library filename (like
+# libgmpxx.so.3.0.4) incrementing, to make it clear which GMP it's from.
+
  
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -53,24 +136,22 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = @ANSI2KNR@
  subdir = .
  DIST_COMMON = README $(am__configure_deps) $(am__include_HEADERS_DIST) \
         $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
-       $(srcdir)/config.in $(srcdir)/gmp-h.in $(srcdir)/mp-h.in \
-       $(top_srcdir)/configure AUTHORS COPYING COPYING.LIB ChangeLog \
-       INSTALL NEWS ansi2knr.1 ansi2knr.c config.guess config.sub \
-       install-sh ltmain.sh missing ylwrap
+       $(srcdir)/config.in $(srcdir)/gmp-h.in $(top_srcdir)/configure \
+       AUTHORS COPYING COPYING.LIB ChangeLog INSTALL NEWS \
+       config.guess config.sub install-sh ltmain.sh missing ylwrap
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
   configure.lineno config.status.lineno
  mkinstalldirs = $(install_sh) -d
  CONFIG_HEADER = config.h
-CONFIG_CLEAN_FILES = gmp.h mp.h gmp-mparam.h
+CONFIG_CLEAN_FILES = gmp.h gmp-mparam.h
  CONFIG_CLEAN_VPATH_FILES =
  am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
  am__vpath_adj = case $$p in \
@@ -93,18 +174,22 @@ am__nobase_list = $(am__nobase_strip_setup); \
  am__base_list = \
    sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
    sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
  am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" \
         "$(DESTDIR)$(includeexecdir)"
  LTLIBRARIES = $(lib_LTLIBRARIES)
  am__DEPENDENCIES_1 = $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS) \
-       $(MPN_OBJECTS) $(PRINTF_OBJECTS) $(SCANF_OBJECTS)
-am_libgmp_la_OBJECTS = assert$U.lo compat$U.lo errno$U.lo \
-       extract-dbl$U.lo invalid$U.lo memory$U.lo mp_bpl$U.lo \
-       mp_clz_tab$U.lo mp_dv_tab$U.lo mp_minv_tab$U.lo \
-       mp_get_fns$U.lo mp_set_fns$U.lo rand$U.lo randclr$U.lo \
-       randdef$U.lo randiset$U.lo randlc2s$U.lo randlc2x$U.lo \
-       randmt$U.lo randmts$U.lo rands$U.lo randsd$U.lo randsdui$U.lo \
-       randbui$U.lo randmui$U.lo version$U.lo nextprime$U.lo
+       $(MPN_OBJECTS) $(PRINTF_OBJECTS) $(SCANF_OBJECTS) \
+       $(RANDOM_OBJECTS)
+am_libgmp_la_OBJECTS = assert.lo compat.lo errno.lo extract-dbl.lo \
+       invalid.lo memory.lo mp_bpl.lo mp_clz_tab.lo mp_dv_tab.lo \
+       mp_minv_tab.lo mp_get_fns.lo mp_set_fns.lo version.lo \
+       nextprime.lo primesieve.lo
  libgmp_la_OBJECTS = $(am_libgmp_la_OBJECTS)
  libgmp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
         $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
@@ -115,18 +200,6 @@ libgmpxx_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
         $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
         $(CXXFLAGS) $(libgmpxx_la_LDFLAGS) $(LDFLAGS) -o $@
  @WANT_CXX_TRUE@am_libgmpxx_la_rpath = -rpath $(libdir)
-am__DEPENDENCIES_2 = $(srcdir)/libmp.sym $(MPBSD_OBJECTS) \
-       $(MPN_OBJECTS) mpz/add$U.lo mpz/gcdext$U.lo mpz/invert$U.lo \
-       mpz/mul$U.lo mpz/n_pow_ui$U.lo mpz/realloc$U.lo mpz/set$U.lo \
-       mpz/sub$U.lo mpz/tdiv_q$U.lo
-am_libmp_la_OBJECTS = assert$U.lo errno$U.lo memory$U.lo mp_bpl$U.lo \
-       mp_clz_tab$U.lo mp_dv_tab$U.lo mp_minv_tab$U.lo \
-       mp_get_fns$U.lo mp_set_fns$U.lo nextprime$U.lo
-libmp_la_OBJECTS = $(am_libmp_la_OBJECTS)
-libmp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(libmp_la_LDFLAGS) \
-       $(LDFLAGS) -o $@
-@WANT_MPBSD_TRUE@am_libmp_la_rpath = -rpath $(libdir)
  DEFAULT_INCLUDES = -I.@am__isrc@
  depcomp =
  am__depfiles_maybe =
@@ -149,9 +222,9 @@ CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(libgmp_la_SOURCES) $(EXTRA_libgmp_la_SOURCES) \
-       $(libgmpxx_la_SOURCES) $(libmp_la_SOURCES)
+       $(libgmpxx_la_SOURCES)
  DIST_SOURCES = $(libgmp_la_SOURCES) $(EXTRA_libgmp_la_SOURCES) \
-       $(libgmpxx_la_SOURCES) $(libmp_la_SOURCES)
+       $(libgmpxx_la_SOURCES)
  RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
         html-recursive info-recursive install-data-recursive \
         install-dvi-recursive install-exec-recursive \
@@ -159,6 +232,11 @@ RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
         install-pdf-recursive install-ps-recursive install-recursive \
         installcheck-recursive installdirs-recursive pdf-recursive \
         ps-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  am__include_HEADERS_DIST = gmpxx.h
  HEADERS = $(include_HEADERS) $(nodist_includeexec_HEADERS)
  RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive        \
@@ -173,9 +251,11 @@ DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
  distdir = $(PACKAGE)-$(VERSION)
  top_distdir = $(distdir)
  am__remove_distdir = \
-  { test ! -d "$(distdir)" \
-    || { find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
-         && rm -fr "$(distdir)"; }; }
+  if test -d "$(distdir)"; then \
+    find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+      && rm -rf "$(distdir)" \
+      || { sleep 5 && rm -rf "$(distdir)"; }; \
+  else :; fi
  am__relativize = \
    dir0=`pwd`; \
    sed_first='s,^\([^/]*\)/.*$$,\1,'; \
@@ -204,6 +284,8 @@ am__relativize = \
  DIST_ARCHIVES = $(distdir).tar.gz
  GZIP_ENV = --best
  distuninstallcheck_listfiles = find . -type f -print
+am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
+  | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
  distcleancheck_listfiles = find . -type f -print
  ABI = @ABI@
  ACLOCAL = @ACLOCAL@
@@ -303,8 +385,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -351,7 +433,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -365,82 +446,14 @@ target_alias = @target_alias@
  top_build_prefix = @top_build_prefix@
  top_builddir = @top_builddir@
  top_srcdir = @top_srcdir@
+LIBGMP_LT_CURRENT = 11
+LIBGMP_LT_REVISION = 3
+LIBGMP_LT_AGE = 1
+LIBGMPXX_LT_CURRENT = 7
+LIBGMPXX_LT_REVISION = 3
+LIBGMPXX_LT_AGE = 3
+SUBDIRS = tests mpn mpz mpq mpf printf scanf rand cxx demos tune doc
  
-# The following options are the same as AM_INIT_AUTOMAKE in configure.in,
-# except no $(top_builddir) on ansi2knr.  That directory is wanted for the
-# Makefiles in subdirectories, but here we must omit it so automake gives
-# the actual ansi2knr build rule, not "cd $(top_builddir) && make ansi2knr".
-#
-AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies ansi2knr
-
-# Libtool -version-info for libgmp.la and libmp.la.  See "Versioning" in the
-# libtool manual.
-#
-#      CURRENT:REVISION:AGE
-#
-# 1. No interfaces changed, only implementations (good): Increment REVISION.
-#
-# 2. Interfaces added, none removed (good): Increment CURRENT, increment
-#    AGE, set REVISION to 0.
-#
-# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
-#    CURRENT, set AGE and REVISION to 0.
-#
-# Do this separately for libgmp, libgmpxx and libmp, and only for releases.
-#
-#        GMP      -version-info
-#       release   libgmp  libgmpxx libmp
-#        2.0.x      -        -       -
-#        3.0      3:0:0      -     3:0:0
-#        3.0.1    3:1:0      -     3:0:0
-#        3.1      4:0:1      -     4:0:1
-#        3.1.1    4:1:1      -     4:1:1
-#        4.0      5:0:2    3:0:0   4:2:1
-#        4.0.1    5:1:2    3:1:0   4:3:1
-#        4.1      6:0:3    3:2:0   4:4:1
-#        4.1.1    6:1:3    3:3:0   4:5:1
-#        4.1.2    6:2:3    3:4:0   4:6:1
-#        4.1.3    6:3:3    3:5:0   4:7:1
-#        4.1.4    6:3:3    3:5:0   4:7:1       WRONG, same as 4.1.3!
-#        4.2      6:0:3    3:2:0   4:4:1       REALLY WRONG, same as 4.1!
-#        4.2.1    7:1:4    4:1:1   4:10:1      WRONG for libgmpxx
-#        4.2.2    7:2:4    4:2:0   4:11:1
-#        4.2.3    7:3:4    4:3:0   4:12:1
-#        4.2.4    7:4:4    4:4:0   4:13:1
-#        4.3.0    8:0:5    5:0:1   4:14:1
-#        4.3.1    8:1:5    5:1:1   4:15:1      WRONG Really used same as 4.3.0
-#        4.3.2    8:2:5    5:2:1   4:16:1
-#        5.0.0    9:0:6    6:0:2   4:20:1      Should have been 10:0:0
-#        5.0.1   10:1:0    6:1:2   4:21:1
-#        5.0.2   10:2:0    6:2:2   4:22:1
-#        5.0.3   10:3:0    6:3:2   4:23:1
-#        5.0.4   10:4:0    6:4:2   4:24:1
-#        5.0.5   10:5:0    6:5:2   4:25:1
-#
-# Starting at 3:0:0 is a slight abuse of the versioning system, but it
-# ensures we're past soname libgmp.so.2, which was used on Debian GNU/Linux
-# packages of gmp 2.  Pretend gmp 2 was 2:0:0, so the interface changes for
-# gmp 3 mean 3:0:0 is right.
-#
-# We interpret "implementation changed" in item "1." above as meaning any
-# release, ie. the REVISION is incremented every time (if nothing else).
-# Even if we thought the code generated will be identical on all systems,
-# it's still good to get the shared library filename (like
-# libgmpxx.so.3.0.4) incrementing, to make it clear which GMP it's from.
-LIBGMP_LT_CURRENT = 10
-LIBGMP_LT_REVISION = 5
-LIBGMP_LT_AGE = 0
-LIBGMPXX_LT_CURRENT = 6
-LIBGMPXX_LT_REVISION = 5
-LIBGMPXX_LT_AGE = 2
-LIBMP_LT_CURRENT = 4
-LIBMP_LT_REVISION = 25
-LIBMP_LT_AGE = 1
-SUBDIRS = tests mpn mpz mpq mpf printf scanf cxx mpbsd demos tune doc
-
-# The ansi2knr setups for the build programs are the same as the normal
-# automake ansi2knr rules, but using $(CC_FOR_BUILD) instead of $(CC).
-#
  # The "test -f" support for srcdir!=builddir is similar to the automake .c.o
  # etc rules, but with each foo.c explicitly, since $< is not portable
  # outside an inference rule.
@@ -456,9 +469,13 @@ SUBDIRS = tests mpn mpz mpq mpf printf scanf cxx mpbsd demos tune doc
  # build-system stuff over and done with at the start.  Also, dependencies on
  # the .h files are not properly expressed for the various objects that use
  # them.
+
+# Distribute mini-gmp. Test sources copied by dist-hook.
  EXTRA_DIST = configfsf.guess configfsf.sub .gdbinit INSTALL.autoconf \
-       gmpxx.h libmp.sym dumbmp.c gen-fac_ui.c gen-fib.c gen-bases.c \
-       gen-trialdivtab.c gen-psqr.c
+       gmpxx.h bootstrap.c gen-fac.c gen-fib.c gen-bases.c \
+       gen-trialdivtab.c gen-jacobitab.c gen-psqr.c mini-gmp/README \
+       mini-gmp/mini-gmp.c mini-gmp/mini-gmp.h \
+       mini-gmp/tests/Makefile mini-gmp/tests/run-tests
  @WANT_CXX_TRUE@GMPXX_HEADERS_OPTION = gmpxx.h
  
  # gmp.h and mp.h are architecture dependent, mainly since they encode the
@@ -472,14 +489,16 @@ EXTRA_DIST = configfsf.guess configfsf.sub .gdbinit INSTALL.autoconf \
  #
  includeexecdir = $(exec_prefix)/include
  include_HEADERS = $(GMPXX_HEADERS_OPTION)
-nodist_includeexec_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION)
-lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION) $(MPBSD_LTLIBRARIES_OPTION)
-BUILT_SOURCES = gmp.h mp.h mpz/fac_ui.h fib_table.h mpn/fib_table.c \
-       mp_bases.h mpn/mp_bases.c trialdivtab.h mpn/perfsqr.h
+nodist_includeexec_HEADERS = gmp.h
+lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION)
+BUILT_SOURCES = gmp.h fac_table.h fib_table.h mpn/fib_table.c \
+       mp_bases.h mpn/mp_bases.c trialdivtab.h mpn/jacobitab.h \
+       mpn/perfsqr.h
  DISTCLEANFILES = $(BUILT_SOURCES) config.m4 @gmp_srclinks@ \
-       gen-fac_ui$(EXEEXT_FOR_BUILD) gen-fib$(EXEEXT_FOR_BUILD) \
+       gen-fac$(EXEEXT_FOR_BUILD) gen-fib$(EXEEXT_FOR_BUILD) \
         gen-bases$(EXEEXT_FOR_BUILD) \
-       gen-trialdivtab$(EXEEXT_FOR_BUILD) gen-psqr$(EXEEXT_FOR_BUILD)
+       gen-trialdivtab$(EXEEXT_FOR_BUILD) \
+       gen-jacobitab$(EXEEXT_FOR_BUILD) gen-psqr$(EXEEXT_FOR_BUILD)
  
  # Tell gmp.h it's building gmp, not an application, used by windows DLL stuff.
  INCLUDES = -D__GMP_WITHIN_GMP
@@ -517,16 +536,17 @@ MPZ_OBJECTS = mpz/abs$U.lo mpz/add$U.lo mpz/add_ui$U.lo                   \
    mpz/cong$U.lo mpz/cong_2exp$U.lo mpz/cong_ui$U.lo                    \
    mpz/divexact$U.lo mpz/divegcd$U.lo mpz/dive_ui$U.lo                  \
    mpz/divis$U.lo mpz/divis_ui$U.lo mpz/divis_2exp$U.lo mpz/dump$U.lo   \
-  mpz/export$U.lo mpz/fac_ui$U.lo mpz/fdiv_q$U.lo                      \
+  mpz/export$U.lo mpz/mfac_uiui$U.lo                                   \
+  mpz/2fac_ui$U.lo mpz/fac_ui$U.lo mpz/oddfac_1$U.lo mpz/prodlimbs$U.lo        \
    mpz/fdiv_q_ui$U.lo mpz/fdiv_qr$U.lo mpz/fdiv_qr_ui$U.lo              \
-  mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo                                   \
+  mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo mpz/fdiv_q$U.lo                   \
    mpz/fdiv_ui$U.lo mpz/fib_ui$U.lo mpz/fib2_ui$U.lo mpz/fits_sint$U.lo \
    mpz/fits_slong$U.lo mpz/fits_sshort$U.lo mpz/fits_uint$U.lo          \
    mpz/fits_ulong$U.lo mpz/fits_ushort$U.lo mpz/gcd$U.lo                        \
    mpz/gcd_ui$U.lo mpz/gcdext$U.lo mpz/get_d$U.lo mpz/get_d_2exp$U.lo   \
    mpz/get_si$U.lo mpz/get_str$U.lo mpz/get_ui$U.lo mpz/getlimbn$U.lo   \
    mpz/hamdist$U.lo                                                     \
-  mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo          \
+  mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo          \
    mpz/inp_raw$U.lo mpz/inp_str$U.lo mpz/invert$U.lo                    \
    mpz/ior$U.lo mpz/iset$U.lo mpz/iset_d$U.lo mpz/iset_si$U.lo          \
    mpz/iset_str$U.lo mpz/iset_ui$U.lo mpz/jacobi$U.lo mpz/kronsz$U.lo   \
@@ -537,7 +557,8 @@ MPZ_OBJECTS = mpz/abs$U.lo mpz/add$U.lo mpz/add_ui$U.lo                     \
    mpz/n_pow_ui$U.lo mpz/neg$U.lo mpz/nextprime$U.lo                    \
    mpz/out_raw$U.lo mpz/out_str$U.lo mpz/perfpow$U.lo mpz/perfsqr$U.lo  \
    mpz/popcount$U.lo mpz/pow_ui$U.lo mpz/powm$U.lo mpz/powm_sec$U.lo    \
-  mpz/powm_ui$U.lo mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo  \
+  mpz/powm_ui$U.lo mpz/primorial_ui$U.lo                               \
+  mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo                   \
    mpz/realloc$U.lo mpz/realloc2$U.lo mpz/remove$U.lo                   \
    mpz/root$U.lo mpz/rootrem$U.lo mpz/rrandomb$U.lo mpz/scan0$U.lo      \
    mpz/scan1$U.lo mpz/set$U.lo mpz/set_d$U.lo mpz/set_f$U.lo            \
@@ -580,19 +601,19 @@ SCANF_OBJECTS = \
    scanf/scanf$U.lo scanf/sscanf$U.lo scanf/sscanffuns$U.lo     \
    scanf/vfscanf$U.lo scanf/vscanf$U.lo scanf/vsscanf$U.lo
  
+RANDOM_OBJECTS = \
+  rand/rand$U.lo rand/randclr$U.lo rand/randdef$U.lo rand/randiset$U.lo        \
+  rand/randlc2s$U.lo rand/randlc2x$U.lo rand/randmt$U.lo               \
+  rand/randmts$U.lo rand/rands$U.lo rand/randsd$U.lo rand/randsdui$U.lo        \
+  rand/randbui$U.lo rand/randmui$U.lo
+
  
  # no $U for C++ files
  CXX_OBJECTS = \
    cxx/isfuns.lo cxx/ismpf.lo cxx/ismpq.lo cxx/ismpz.lo cxx/ismpznw.lo  \
-  cxx/osdoprnti.lo cxx/osfuns.lo                                       \
+  cxx/limits.lo cxx/osdoprnti.lo cxx/osfuns.lo                         \
    cxx/osmpf.lo cxx/osmpq.lo cxx/osmpz.lo
  
-MPBSD_OBJECTS = mpbsd/add$U.lo mpbsd/tdiv_qr$U.lo mpbsd/set$U.lo       \
-  mpbsd/powm$U.lo mpbsd/sub$U.lo mpbsd/cmp$U.lo mpbsd/mfree$U.lo       \
-  mpbsd/mtox$U.lo mpbsd/realloc$U.lo mpbsd/gcd$U.lo mpbsd/itom$U.lo    \
-  mpbsd/min$U.lo mpbsd/mul$U.lo mpbsd/mout$U.lo mpbsd/rpow$U.lo                \
-  mpbsd/sdiv$U.lo mpbsd/sqrtrem$U.lo mpbsd/xtom$U.lo
-
  
  # In libtool 1.5 it doesn't work to build libgmp.la from the convenience
  # libraries like mpz/libmpz.la.  Or rather it works, but it ends up putting
@@ -607,18 +628,16 @@ MPBSD_OBJECTS = mpbsd/add$U.lo mpbsd/tdiv_qr$U.lo mpbsd/set$U.lo  \
  # Currently, for libgmp, unlike libmp below, we're not using
  # -export-symbols, since the tune and speed programs, and perhaps some of
  # the test programs, want to access undocumented symbols.
-libgmp_la_SOURCES = gmp-impl.h longlong.h randmt.h                     \
+libgmp_la_SOURCES = gmp-impl.h longlong.h                              \
    assert.c compat.c errno.c extract-dbl.c invalid.c memory.c           \
    mp_bpl.c mp_clz_tab.c mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c \
-  rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
-  randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c version.c  \
-  nextprime.c
+  version.c nextprime.c primesieve.c
  
  EXTRA_libgmp_la_SOURCES = tal-debug.c tal-notreent.c tal-reent.c
  libgmp_la_DEPENDENCIES = @TAL_OBJECT@          \
    $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS) \
    $(MPN_OBJECTS) @mpn_objs_in_libgmp@          \
-  $(PRINTF_OBJECTS)  $(SCANF_OBJECTS)
+  $(PRINTF_OBJECTS)  $(SCANF_OBJECTS) $(RANDOM_OBJECTS)
  
  libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES)
  libgmp_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMP_LDFLAGS) \
@@ -635,32 +654,12 @@ libgmpxx_la_LIBADD = $(libgmpxx_la_DEPENDENCIES)
  libgmpxx_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMPXX_LDFLAGS) \
    -version-info $(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)
  
-
-# The selected mpz objects here support mpz/powm.c (built as mpbsd/powm.lo)
-# and can probably be removed when that switches to an mpn implementation.
-# (Apart from mpz/n_pow_ui$U.lo, which supports mpbsd/rpow.c)
-@WANT_MPBSD_TRUE@MPBSD_HEADERS_OPTION = mp.h
-@WANT_MPBSD_TRUE@MPBSD_LTLIBRARIES_OPTION = libmp.la
-libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c     \
-  mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c nextprime.c
-
-libmp_la_DEPENDENCIES = $(srcdir)/libmp.sym                            \
-  @TAL_OBJECT@ $(MPBSD_OBJECTS) $(MPN_OBJECTS) @mpn_objs_in_libmp@     \
-  mpz/add$U.lo mpz/gcdext$U.lo mpz/invert$U.lo mpz/mul$U.lo            \
-  mpz/n_pow_ui$U.lo mpz/realloc$U.lo mpz/set$U.lo mpz/sub$U.lo         \
-  mpz/tdiv_q$U.lo
-
-libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
-libmp_la_LDFLAGS = $(GMP_LDFLAGS) \
-  -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE) \
-  -export-symbols $(srcdir)/libmp.sym
-
  all: $(BUILT_SOURCES) config.h
         $(MAKE) $(AM_MAKEFLAGS) all-recursive
  
  .SUFFIXES:
  .SUFFIXES: .c .cc .lo .o .obj
-am--refresh:
+am--refresh: Makefile
         @:
  $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
         @for dep in $?; do \
@@ -696,10 +695,8 @@ $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
  $(am__aclocal_m4_deps):
  
  config.h: stamp-h1
-       @if test ! -f $@; then \
-         rm -f stamp-h1; \
-         $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
-       else :; fi
+       @if test ! -f $@; then rm -f stamp-h1; else :; fi
+       @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
  
  stamp-h1: $(srcdir)/config.in $(top_builddir)/config.status
         @rm -f stamp-h1
@@ -713,11 +710,8 @@ distclean-hdr:
         -rm -f config.h stamp-h1
  gmp.h: $(top_builddir)/config.status $(srcdir)/gmp-h.in
         cd $(top_builddir) && $(SHELL) ./config.status $@
-mp.h: $(top_builddir)/config.status $(srcdir)/mp-h.in
-       cd $(top_builddir) && $(SHELL) ./config.status $@
  install-libLTLIBRARIES: $(lib_LTLIBRARIES)
         @$(NORMAL_INSTALL)
-       test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
         @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
         list2=; for p in $$list; do \
           if test -f $$p; then \
@@ -725,6 +719,8 @@ install-libLTLIBRARIES: $(lib_LTLIBRARIES)
           else :; fi; \
         done; \
         test -z "$$list2" || { \
+         echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+         $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
           echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
           $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
         }
@@ -746,27 +742,16 @@ clean-libLTLIBRARIES:
           echo "rm -f \"$${dir}/so_locations\""; \
           rm -f "$${dir}/so_locations"; \
         done
-libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES) 
+libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES) $(EXTRA_libgmp_la_DEPENDENCIES) 
         $(libgmp_la_LINK) -rpath $(libdir) $(libgmp_la_OBJECTS) $(libgmp_la_LIBADD) $(LIBS)
-libgmpxx.la: $(libgmpxx_la_OBJECTS) $(libgmpxx_la_DEPENDENCIES) 
+libgmpxx.la: $(libgmpxx_la_OBJECTS) $(libgmpxx_la_DEPENDENCIES) $(EXTRA_libgmpxx_la_DEPENDENCIES) 
         $(libgmpxx_la_LINK) $(am_libgmpxx_la_rpath) $(libgmpxx_la_OBJECTS) $(libgmpxx_la_LIBADD) $(LIBS)
-libmp.la: $(libmp_la_OBJECTS) $(libmp_la_DEPENDENCIES) 
-       $(libmp_la_LINK) $(am_libmp_la_rpath) $(libmp_la_OBJECTS) $(libmp_la_LIBADD) $(LIBS)
  
  mostlyclean-compile:
         -rm -f *.$(OBJEXT)
  
  distclean-compile:
         -rm -f *.tab.c
-./ansi2knr: ansi2knr.$(OBJEXT)
-       $(LINK) ansi2knr.$(OBJEXT) $(LIBS)
-ansi2knr.$(OBJEXT): $(CONFIG_HEADER)
-
-clean-krextra:
-       -rm -f ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -776,82 +761,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-assert_.c: assert.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/assert.c; then echo $(srcdir)/assert.c; else echo assert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-compat_.c: compat.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/compat.c; then echo $(srcdir)/compat.c; else echo compat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-errno_.c: errno.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/errno.c; then echo $(srcdir)/errno.c; else echo errno.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-extract-dbl_.c: extract-dbl.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/extract-dbl.c; then echo $(srcdir)/extract-dbl.c; else echo extract-dbl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invalid_.c: invalid.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invalid.c; then echo $(srcdir)/invalid.c; else echo invalid.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-memory_.c: memory.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/memory.c; then echo $(srcdir)/memory.c; else echo memory.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_bpl_.c: mp_bpl.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bpl.c; then echo $(srcdir)/mp_bpl.c; else echo mp_bpl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_clz_tab_.c: mp_clz_tab.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_clz_tab.c; then echo $(srcdir)/mp_clz_tab.c; else echo mp_clz_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_dv_tab_.c: mp_dv_tab.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_dv_tab.c; then echo $(srcdir)/mp_dv_tab.c; else echo mp_dv_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_get_fns_.c: mp_get_fns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_get_fns.c; then echo $(srcdir)/mp_get_fns.c; else echo mp_get_fns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_minv_tab_.c: mp_minv_tab.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_minv_tab.c; then echo $(srcdir)/mp_minv_tab.c; else echo mp_minv_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_set_fns_.c: mp_set_fns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_set_fns.c; then echo $(srcdir)/mp_set_fns.c; else echo mp_set_fns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nextprime_.c: nextprime.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nextprime.c; then echo $(srcdir)/nextprime.c; else echo nextprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rand_.c: rand.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rand.c; then echo $(srcdir)/rand.c; else echo rand.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randbui_.c: randbui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randbui.c; then echo $(srcdir)/randbui.c; else echo randbui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randclr_.c: randclr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randclr.c; then echo $(srcdir)/randclr.c; else echo randclr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randdef_.c: randdef.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randdef.c; then echo $(srcdir)/randdef.c; else echo randdef.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randiset_.c: randiset.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randiset.c; then echo $(srcdir)/randiset.c; else echo randiset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randlc2s_.c: randlc2s.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc2s.c; then echo $(srcdir)/randlc2s.c; else echo randlc2s.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randlc2x_.c: randlc2x.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc2x.c; then echo $(srcdir)/randlc2x.c; else echo randlc2x.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randmt_.c: randmt.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randmt.c; then echo $(srcdir)/randmt.c; else echo randmt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randmts_.c: randmts.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randmts.c; then echo $(srcdir)/randmts.c; else echo randmts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randmui_.c: randmui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randmui.c; then echo $(srcdir)/randmui.c; else echo randmui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rands_.c: rands.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rands.c; then echo $(srcdir)/rands.c; else echo rands.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randsd_.c: randsd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsd.c; then echo $(srcdir)/randsd.c; else echo randsd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randsdui_.c: randsdui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsdui.c; then echo $(srcdir)/randsdui.c; else echo randsdui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tal-debug_.c: tal-debug.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tal-debug.c; then echo $(srcdir)/tal-debug.c; else echo tal-debug.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tal-notreent_.c: tal-notreent.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tal-notreent.c; then echo $(srcdir)/tal-notreent.c; else echo tal-notreent.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tal-reent_.c: tal-reent.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tal-reent.c; then echo $(srcdir)/tal-reent.c; else echo tal-reent.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-version_.c: version.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/version.c; then echo $(srcdir)/version.c; else echo version.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-assert_.$(OBJEXT) assert_.lo compat_.$(OBJEXT) compat_.lo \
-errno_.$(OBJEXT) errno_.lo extract-dbl_.$(OBJEXT) extract-dbl_.lo \
-invalid_.$(OBJEXT) invalid_.lo memory_.$(OBJEXT) memory_.lo \
-mp_bpl_.$(OBJEXT) mp_bpl_.lo mp_clz_tab_.$(OBJEXT) mp_clz_tab_.lo \
-mp_dv_tab_.$(OBJEXT) mp_dv_tab_.lo mp_get_fns_.$(OBJEXT) \
-mp_get_fns_.lo mp_minv_tab_.$(OBJEXT) mp_minv_tab_.lo \
-mp_set_fns_.$(OBJEXT) mp_set_fns_.lo nextprime_.$(OBJEXT) \
-nextprime_.lo rand_.$(OBJEXT) rand_.lo randbui_.$(OBJEXT) randbui_.lo \
-randclr_.$(OBJEXT) randclr_.lo randdef_.$(OBJEXT) randdef_.lo \
-randiset_.$(OBJEXT) randiset_.lo randlc2s_.$(OBJEXT) randlc2s_.lo \
-randlc2x_.$(OBJEXT) randlc2x_.lo randmt_.$(OBJEXT) randmt_.lo \
-randmts_.$(OBJEXT) randmts_.lo randmui_.$(OBJEXT) randmui_.lo \
-rands_.$(OBJEXT) rands_.lo randsd_.$(OBJEXT) randsd_.lo \
-randsdui_.$(OBJEXT) randsdui_.lo tal-debug_.$(OBJEXT) tal-debug_.lo \
-tal-notreent_.$(OBJEXT) tal-notreent_.lo tal-reent_.$(OBJEXT) \
-tal-reent_.lo version_.$(OBJEXT) version_.lo : $(ANSI2KNR)
  
  .cc.o:
         $(CXXCOMPILE) -c -o $@ $<
@@ -875,8 +784,11 @@ distclean-libtool:
         -rm -f libtool config.lt
  install-includeHEADERS: $(include_HEADERS)
         @$(NORMAL_INSTALL)
-       test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)"
         @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+       if test -n "$$list"; then \
+         echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
+         $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
+       fi; \
         for p in $$list; do \
           if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
           echo "$$d$$p"; \
@@ -890,13 +802,14 @@ uninstall-includeHEADERS:
         @$(NORMAL_UNINSTALL)
         @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
         files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
-       test -n "$$files" || exit 0; \
-       echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \
-       cd "$(DESTDIR)$(includedir)" && rm -f $$files
+       dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
  install-nodist_includeexecHEADERS: $(nodist_includeexec_HEADERS)
         @$(NORMAL_INSTALL)
-       test -z "$(includeexecdir)" || $(MKDIR_P) "$(DESTDIR)$(includeexecdir)"
         @list='$(nodist_includeexec_HEADERS)'; test -n "$(includeexecdir)" || list=; \
+       if test -n "$$list"; then \
+         echo " $(MKDIR_P) '$(DESTDIR)$(includeexecdir)'"; \
+         $(MKDIR_P) "$(DESTDIR)$(includeexecdir)" || exit 1; \
+       fi; \
         for p in $$list; do \
           if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
           echo "$$d$$p"; \
@@ -910,9 +823,7 @@ uninstall-nodist_includeexecHEADERS:
         @$(NORMAL_UNINSTALL)
         @list='$(nodist_includeexec_HEADERS)'; test -n "$(includeexecdir)" || list=; \
         files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
-       test -n "$$files" || exit 0; \
-       echo " ( cd '$(DESTDIR)$(includeexecdir)' && rm -f" $$files ")"; \
-       cd "$(DESTDIR)$(includeexecdir)" && rm -f $$files
+       dir='$(DESTDIR)$(includeexecdir)'; $(am__uninstall_files_from_dir)
  
  # This directory's subdirectories are mostly independent; you can cd
  # into them and run `make' without going through this Makefile.
@@ -1083,13 +994,10 @@ distdir: $(DISTFILES)
         done
         @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
           if test "$$subdir" = .; then :; else \
-           test -d "$(distdir)/$$subdir" \
-           || $(MKDIR_P) "$(distdir)/$$subdir" \
-           || exit 1; \
-         fi; \
-       done
-       @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
-         if test "$$subdir" = .; then :; else \
+           $(am__make_dryrun) \
+             || test -d "$(distdir)/$$subdir" \
+             || $(MKDIR_P) "$(distdir)/$$subdir" \
+             || exit 1; \
             dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
             $(am__relativize); \
             new_distdir=$$reldir; \
@@ -1124,7 +1032,11 @@ dist-gzip: distdir
         $(am__remove_distdir)
  
  dist-bzip2: distdir
-       tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
+       tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
+       $(am__remove_distdir)
+
+dist-lzip: distdir
+       tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
         $(am__remove_distdir)
  
  dist-lzma: distdir
@@ -1132,7 +1044,7 @@ dist-lzma: distdir
         $(am__remove_distdir)
  
  dist-xz: distdir
-       tardir=$(distdir) && $(am__tar) | xz -c >$(distdir).tar.xz
+       tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
         $(am__remove_distdir)
  
  dist-tarZ: distdir
@@ -1163,6 +1075,8 @@ distcheck: dist
           bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
         *.tar.lzma*) \
           lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\
+       *.tar.lz*) \
+         lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
         *.tar.xz*) \
           xz -dc $(distdir).tar.xz | $(am__untar) ;;\
         *.tar.Z*) \
@@ -1172,7 +1086,7 @@ distcheck: dist
         *.zip*) \
           unzip $(distdir).zip ;;\
         esac
-       chmod -R a-w $(distdir); chmod a+w $(distdir)
+       chmod -R a-w $(distdir); chmod u+w $(distdir)
         mkdir $(distdir)/_build
         mkdir $(distdir)/_inst
         chmod a-w $(distdir)
@@ -1182,6 +1096,7 @@ distcheck: dist
           && am__cwd=`pwd` \
           && $(am__cd) $(distdir)/_build \
           && ../configure --srcdir=.. --prefix="$$dc_install_base" \
+           $(AM_DISTCHECK_CONFIGURE_FLAGS) \
             $(DISTCHECK_CONFIGURE_FLAGS) \
           && $(MAKE) $(AM_MAKEFLAGS) \
           && $(MAKE) $(AM_MAKEFLAGS) dvi \
@@ -1210,8 +1125,16 @@ distcheck: dist
           list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
           sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
  distuninstallcheck:
-       @$(am__cd) '$(distuninstallcheck_dir)' \
-       && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
+       @test -n '$(distuninstallcheck_dir)' || { \
+         echo 'ERROR: trying to run $@ with an empty' \
+              '$$(distuninstallcheck_dir)' >&2; \
+         exit 1; \
+       }; \
+       $(am__cd) '$(distuninstallcheck_dir)' || { \
+         echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
+         exit 1; \
+       }; \
+       test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
            || { echo "ERROR: files left after uninstall:" ; \
                 if test -n "$(DESTDIR)"; then \
                   echo "  (check DESTDIR support)"; \
@@ -1230,7 +1153,7 @@ distcleancheck: distclean
  check-am: all-am
  check: $(BUILT_SOURCES)
         $(MAKE) $(AM_MAKEFLAGS) check-recursive
-all-am: Makefile $(ANSI2KNR) $(LTLIBRARIES) $(HEADERS) config.h
+all-am: Makefile $(LTLIBRARIES) $(HEADERS) config.h
  installdirs: installdirs-recursive
  installdirs-am:
         for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includeexecdir)"; do \
@@ -1247,10 +1170,15 @@ install-am: all-am
  
  installcheck: installcheck-recursive
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -1266,14 +1194,14 @@ maintainer-clean-generic:
         -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
  clean: clean-recursive
  
-clean-am: clean-generic clean-krextra clean-libLTLIBRARIES \
-       clean-libtool mostlyclean-am
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool clean-local \
+       mostlyclean-am
  
  distclean: distclean-recursive
         -rm -f $(am__CONFIG_DISTCLEAN_FILES)
         -rm -f Makefile
  distclean-am: clean-am distclean-compile distclean-generic \
-       distclean-hdr distclean-libtool distclean-tags
+       distclean-hdr distclean-libtool distclean-local distclean-tags
  
  dvi: dvi-recursive
  
@@ -1325,7 +1253,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-recursive
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-recursive
@@ -1345,23 +1273,23 @@ uninstall-am: uninstall-includeHEADERS uninstall-libLTLIBRARIES \
  
  .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
         all all-am am--refresh check check-am clean clean-generic \
-       clean-krextra clean-libLTLIBRARIES clean-libtool ctags \
+       clean-libLTLIBRARIES clean-libtool clean-local ctags \
         ctags-recursive dist dist-all dist-bzip2 dist-gzip dist-hook \
-       dist-lzma dist-shar dist-tarZ dist-xz dist-zip distcheck \
-       distclean distclean-compile distclean-generic distclean-hdr \
-       distclean-libtool distclean-tags distcleancheck distdir \
-       distuninstallcheck dvi dvi-am html html-am info info-am \
-       install install-am install-data install-data-am \
-       install-data-hook install-dvi install-dvi-am install-exec \
-       install-exec-am install-html install-html-am \
+       dist-lzip dist-lzma dist-shar dist-tarZ dist-xz dist-zip \
+       distcheck distclean distclean-compile distclean-generic \
+       distclean-hdr distclean-libtool distclean-local distclean-tags \
+       distcleancheck distdir distuninstallcheck dvi dvi-am html \
+       html-am info info-am install install-am install-data \
+       install-data-am install-data-hook install-dvi install-dvi-am \
+       install-exec install-exec-am install-html install-html-am \
         install-includeHEADERS install-info install-info-am \
         install-libLTLIBRARIES install-man \
         install-nodist_includeexecHEADERS install-pdf install-pdf-am \
         install-ps install-ps-am install-strip installcheck \
         installcheck-am installdirs installdirs-am maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags tags-recursive uninstall uninstall-am \
         uninstall-includeHEADERS uninstall-libLTLIBRARIES \
         uninstall-nodist_includeexecHEADERS
  
@@ -1382,14 +1310,11 @@ install-data-hook:
         @echo '+-------------------------------------------------------------+'
         @echo ''
  
-mpz/fac_ui.h: gen-fac_ui$(EXEEXT_FOR_BUILD)
-       ./gen-fac_ui $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpz/fac_ui.h || (rm -f mpz/fac_ui.h; exit 1)
-
-gen-fac_ui$(EXEEXT_FOR_BUILD): gen-fac_ui$(U_FOR_BUILD).c dumbmp.c
-       $(CC_FOR_BUILD) `test -f 'gen-fac_ui$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac_ui$(U_FOR_BUILD).c -o gen-fac_ui$(EXEEXT_FOR_BUILD)
+fac_table.h: gen-fac$(EXEEXT_FOR_BUILD)
+       ./gen-fac $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >fac_table.h || (rm -f fac_table.h; exit 1)
  
-gen-fac_ui_.c: gen-fac_ui.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fac_ui.c; then echo $(srcdir)/gen-fac_ui.c; else echo gen-fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fac_ui_.c || rm -f gen-fac_ui_.c
+gen-fac$(EXEEXT_FOR_BUILD): gen-fac$(U_FOR_BUILD).c bootstrap.c
+       $(CC_FOR_BUILD) `test -f 'gen-fac$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac$(U_FOR_BUILD).c -o gen-fac$(EXEEXT_FOR_BUILD)
  
  fib_table.h: gen-fib$(EXEEXT_FOR_BUILD)
         ./gen-fib header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >fib_table.h || (rm -f fib_table.h; exit 1)
@@ -1397,42 +1322,36 @@ fib_table.h: gen-fib$(EXEEXT_FOR_BUILD)
  mpn/fib_table.c: gen-fib$(EXEEXT_FOR_BUILD)
         ./gen-fib table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/fib_table.c || (rm -f mpn/fib_table.c; exit 1)
  
-gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c dumbmp.c
+gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c bootstrap.c
         $(CC_FOR_BUILD) `test -f 'gen-fib$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fib$(U_FOR_BUILD).c -o gen-fib$(EXEEXT_FOR_BUILD)
  
-gen-fib_.c: gen-fib.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fib.c; then echo $(srcdir)/gen-fib.c; else echo gen-fib.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fib_.c || rm -f gen-fib_.c
-
  mp_bases.h: gen-bases$(EXEEXT_FOR_BUILD)
         ./gen-bases header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mp_bases.h || (rm -f mp_bases.h; exit 1)
  
  mpn/mp_bases.c: gen-bases$(EXEEXT_FOR_BUILD)
         ./gen-bases table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/mp_bases.c || (rm -f mpn/mp_bases.c; exit 1)
  
-gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c dumbmp.c
+gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c bootstrap.c
         $(CC_FOR_BUILD) `test -f 'gen-bases$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-bases$(U_FOR_BUILD).c -o gen-bases$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
  
-gen-bases_.c: gen-bases.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-bases.c; then echo $(srcdir)/gen-bases.c; else echo gen-bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-bases_.c || rm -f gen-bases_.c
-
  trialdivtab.h: gen-trialdivtab$(EXEEXT_FOR_BUILD)
         ./gen-trialdivtab $(GMP_LIMB_BITS) 8000 >trialdivtab.h || (rm -f trialdivtab.h; exit 1)
  
-gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c dumbmp.c
+gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c bootstrap.c
         $(CC_FOR_BUILD) `test -f 'gen-trialdivtab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-trialdivtab$(U_FOR_BUILD).c -o gen-trialdivtab$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
  
-gen-trialdivtab_.c: gen-trialdivtab.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-trialdivtab.c; then echo $(srcdir)/gen-trialdivtab.c; else echo gen-trialdivtab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-trialdivtab_.c || rm -f gen-trialdivtab_.c
+mpn/jacobitab.h: gen-jacobitab$(EXEEXT_FOR_BUILD)
+       ./gen-jacobitab >mpn/jacobitab.h || (rm -f mpn/jacobitab.h; exit 1)
+
+gen-jacobitab$(EXEEXT_FOR_BUILD): gen-jacobitab$(U_FOR_BUILD).c
+       $(CC_FOR_BUILD) `test -f 'gen-jacobitab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-jacobitab$(U_FOR_BUILD).c -o gen-jacobitab$(EXEEXT_FOR_BUILD)
  
  mpn/perfsqr.h: gen-psqr$(EXEEXT_FOR_BUILD)
         ./gen-psqr $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/perfsqr.h || (rm -f mpn/perfsqr.h; exit 1)
  
-gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c dumbmp.c
+gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c bootstrap.c
         $(CC_FOR_BUILD) `test -f 'gen-psqr$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-psqr$(U_FOR_BUILD).c -o gen-psqr$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
  
-gen-psqr_.c: gen-psqr.c $(ANSI2KNR)
-       $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-psqr.c; then echo $(srcdir)/gen-psqr.c; else echo gen-psqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-psqr_.c || rm -f gen-psqr_.c
-
  # Avoid: CVS - cvs directories
  #        *~  - emacs backups
  #        .#* - cvs merge originals
@@ -1443,10 +1362,37 @@ gen-psqr_.c: gen-psqr.c $(ANSI2KNR)
  dist-hook:
         -find $(distdir) \( -name CVS -type d \) -o -name "*~" -o -name ".#*" \
                 | xargs rm -rf
+       cp "$(srcdir)"/mini-gmp/tests/*.[ch] "$(distdir)/mini-gmp/tests"
  #      grep -F $(VERSION) $(srcdir)/Makefile.am \
-#              | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE) *$(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)"
+#              | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)"
  #      test -z "`sed -n 's/^# *[0-9]*\.[0-9]*\.[0-9]* *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\).*/A\1\nB\2\nC\3/p' $(srcdir)/Makefile.am | grep -v 'A6:3:3\|B3:5:0\|C4:7:1' | sort | uniq -d`"
  
+.PHONY: check-mini-gmp clean-mini-gmp
+
+check-mini-gmp:
+       abs_srcdir="`cd $(srcdir) && pwd`" ; \
+       $(MKDIR_P) mini-gmp/tests \
+       && cd mini-gmp/tests \
+       && LD_LIBRARY_PATH="../../.libs:$$LD_LIBRARY_PATH" \
+          DYLD_LIBRARY_PATH="../../.libs:$$DYLD_LIBRARY_PATH" \
+          $(MAKE) -f "$$abs_srcdir/mini-gmp/tests/Makefile" \
+               VPATH="$$abs_srcdir/mini-gmp/tests" \
+               srcdir="$$abs_srcdir/mini-gmp/tests" \
+               MINI_GMP_DIR="$$abs_srcdir/mini-gmp" \
+               LDFLAGS="-L../../.libs" \
+               LIBS="-lgmp -lm" \
+               CC="$(CC_FOR_BUILD)" EXTRA_CFLAGS="-g -I../.." check
+
+clean-mini-gmp:
+       if [ -d mini-gmp/tests ] ; then \
+         abs_srcdir="`cd $(srcdir) && pwd`" ; \
+         cd mini-gmp/tests \
+         && $(MAKE) -f "$$abs_srcdir/mini-gmp/tests/Makefile" clean ; \
+       fi
+
+clean-local: clean-mini-gmp
+distclean-local: clean-mini-gmp
+
  # Tell versions [3.59,3.63) of GNU make to not export all variables.
  # Otherwise a system limit (for SysV at least) may be exceeded.
  .NOEXPORT:
diff --git a/NEWS b/NEWS

index b1a695c8a41097a4d965ea28e808db5965e92cb9..e983884ea05dfaa42495cc1dd25e22461a3f1eca 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,10 +1,151 @@
  Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
-2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
  
  Verbatim copying and distribution of this entire article is permitted in any
  medium, provided this notice is preserved.
  
  
+Changes between GMP version 5.1.2 and 5.1.3
+
+  BUGS FIXED
+  * The internal functions mpn_sbpi1_div_qr_sec mpn_sbpi1_div_r_sec could
+    compute garbage with a low probability.  They are now rewritten, and the
+    test code has been improved.
+
+  * A bug in the ia64 implementation of mpn_divrem_2, clobbering some
+    callee-save registers, has been fixed. This is an internal
+    function, with the bug manifesting itself as miscomputation in,
+    e.g., mpn_sqrtrem.
+
+  * The documentation now correctly says 'const' for input arguments.
+
+  SPEEDUPS
+  * None.
+
+  FEATURES
+  * None.
+
+  MISC
+  * None.
+
+Changes between GMP version 5.1.1 and 5.1.2
+
+  BUGS FIXED
+  * A bug in mpz_powm_ui triggered by base arguments of at least 15000 decimal
+    digits or mod arguments of at least 7500 decimal digits has been fixed.
+
+  * A AMD Bulldozer specific bug affecting the 64-bit Windows ABI has been
+    fixed.  This bug was in a key function (mpn_mul_1) and made both Bulldozer
+    specific builds and fat builds run on Bulldozer completely non-functional.
+
+  SPEEDUPS
+  * None.
+
+  FEATURES
+  * None.
+
+  MISC
+  * Fixes and generalisations to the test suite.
+
+  * Minor portability enhancements.
+
+
+Changes between GMP version 5.1.0 and 5.1.1
+
+  BUGS FIXED
+  * On Windows 64-bit, an error causing link errors about
+    __gmp_binvert_limb_table has been fixed.
+
+  * Aarch64 alias ARM64 support now works.
+
+  * A possible buffer overrun in mpz_ior has been fixed.
+
+  * A rare sign flip in mpz_remove has been fixed.
+
+  * A bug causing problems with mpf numbers with absolute value >= 2^31 has
+    been fixed.
+
+  * Several bugs in mini-gmp have been fixed.
+
+  * A bug caused by automake, related to the 'distcheck' target, has been fixed
+    by upgrading the automake used for GMP release engineering.
+
+  SPEEDUPS
+  * None.
+
+  FEATURES
+  * Preliminary support for the x32 ABI under x86-64.
+
+  MISC
+  * The mini-gmp testsuite now tests the entire set of functions.
+
+  * Various improvements of the GMP testsuite.
+
+
+Changes between GMP version 5.0.* and 5.1.0
+
+  BUGS FIXED
+  * When reading a C++ number (like mpz_class) in an istream reaches the end
+    of the stream, the eofbit is now set.
+
+  * The result sign of mpz_rootrem's remainder is now always correct.
+
+  * The mpz_remove function now handles negative divisors.
+
+  * Contains all fixes from release 5.0.5.
+
+  SPEEDUPS
+  * The n-factorial and n-over-k functions have been reimplemented for great
+    speedups for small and large operands.
+
+  * New subquadratic algorithm for the Kronecker/Jacobi/Legendre symbol.
+
+  * Major speedup for ARM, in particular ARM Cortex-A9 and A15, thanks to broad
+    assembly support.
+
+  * Significant speedup for POWER6 and POWER7 thanks to improved assembly.
+
+  * The performance under M$ Windows' 64-bit ABI has been greatly improved
+    thanks to complete assembly support.
+
+  * Minor speed improvements of many functions and for many platforms.
+
+  FEATURES
+  * Many new CPUs recognised.
+
+  * New functions for multi-factorials, and primorial: mpz_2fac_ui,
+    mpz_mfac_uiui and mpz_primorial_ui.
+
+  * The mpz_powm_sec function now uses side-channel silent division for
+    converting into Montgomery residues.
+
+  * The fat binary mechanism is now more robust in its CPU recognition.
+
+  MISC
+  * Inclusion of assembly code is now controlled by the configure options
+    --enable-assembly and --disable-assembly.  The "none" CPU target is gone.
+
+  * In C++, the conversions mpq_class->mpz_class, mpf_class->mpz_class and
+    mpf_class->mpq_class are now explicit.
+
+  * Includes "mini-gmp", a small, portable, but less efficient, implementation
+    of a subset of GMP's mpn and mpz interfaces. Used in GMP bootstrap, but it
+    can also be bundled with applications as a fallback when the real GMP
+    library is unavailable.
+
+  * The ABIs under AIX are no longer called aix32 and aix64, but mode64 and 32.
+    This is more consistent with other powerpc systems.
+
+  * The coverage of the testsuite has been improved, using the lcov tool.  See
+    also http://gmplib.org/devel/lcov/.
+
+  * It is now possible to compile GMP using a C++ compiler.
+
+  * K&R C compilers are no longer supported.
+
+  * The BSD MP compatibility functions have been removed.
+
+
  Changes between GMP version 5.0.4 and 5.0.5
  
    BUGS FIXED
@@ -14,7 +155,7 @@ Changes between GMP version 5.0.4 and 5.0.5
      as one would have hoped CPU traps of some 'illegal instruction' sort).
  
    * A bug affecting recent Intel Sandy Bridge CPUs resulting in configuration
-    failures has been fixed,
+    failures has been fixed.
  
    SPEEDUPS
    * None.
diff --git a/acinclude.m4 b/acinclude.m4

index dfb270f009e17609e8ab7ddad011bfed74543fe1..f1f071f124ced339db7adbf679d103dac61ce14b 100644 (file)
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -3599,28 +3599,6 @@ fi
  ])
  
  
-dnl  GMP_H_ANSI
-dnl  ----------
-dnl  Check whether gmp.h recognises the compiler as ANSI capable.
-
-AC_DEFUN([GMP_H_ANSI],
-[AC_REQUIRE([AC_PROG_CC_STDC])
-case $ac_cv_prog_cc_stdc in
-  no)
-    ;;
-  *)
-    AC_TRY_COMPILE(
-GMP_INCLUDE_GMP_H
-[#if ! __GMP_HAVE_PROTOTYPES
-die die die
-#endif
-],,,
-    [AC_MSG_WARN([gmp.h doesnt recognise compiler as ANSI, prototypes and "const" will be unavailable])])
-    ;;
-esac
-])
-
-
  dnl  GMP_H_EXTERN_INLINE
  dnl  -------------------
  dnl  If the compiler has an "inline" of some sort, check whether the
diff --git a/aclocal.m4 b/aclocal.m4

index aecf1dc0812501a905a7a10536740dd5b7e7f043..11222f5a2370a74413253fcde1ff083207eddf43 100644 (file)
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1,7 +1,8 @@
-# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.11.6 -*- Autoconf -*-
  
  # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2005, 2006, 2007, 2008, 2009  Free Software Foundation, Inc.
+# 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+# Inc.
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -13,8 +14,8 @@
  
  m4_ifndef([AC_AUTOCONF_VERSION],
    [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
-m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.65],,
-[m4_warning([this file was generated for autoconf 2.65.
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
+[m4_warning([this file was generated for autoconf 2.69.
  You have another version of autoconf.  It may work, but is not guaranteed to.
  If you have problems, you may need to regenerate the build system entirely.
  To do so, use the procedure documented by the package, typically `autoreconf'.])])
@@ -8606,12 +8607,15 @@ m4_ifndef([_LT_PROG_F77],               [AC_DEFUN([_LT_PROG_F77])])
  m4_ifndef([_LT_PROG_FC],               [AC_DEFUN([_LT_PROG_FC])])
  m4_ifndef([_LT_PROG_CXX],              [AC_DEFUN([_LT_PROG_CXX])])
  
-# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008, 2011 Free Software
+# Foundation, Inc.
  #
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
  
+# serial 1
+
  # AM_AUTOMAKE_VERSION(VERSION)
  # ----------------------------
  # Automake X.Y traces this macro to ensure aclocal.m4 has been
@@ -8621,7 +8625,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
  [am__api_version='1.11'
  dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
  dnl require some minimum version.  Point them to the right macro.
-m4_if([$1], [1.11.1], [],
+m4_if([$1], [1.11.6], [],
        [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
  ])
  
@@ -8637,19 +8641,21 @@ m4_define([_AM_AUTOCONF_VERSION], [])
  # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
  # This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
  AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.11.1])dnl
+[AM_AUTOMAKE_VERSION([1.11.6])dnl
  m4_ifndef([AC_AUTOCONF_VERSION],
    [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
  _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
  
  # AM_AUX_DIR_EXPAND                                         -*- Autoconf -*-
  
-# Copyright (C) 2001, 2003, 2005  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
  #
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
  
+# serial 1
+
  # For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
  # $ac_aux_dir to `$srcdir/foo'.  In other projects, it is set to
  # `$srcdir', `$srcdir/..', or `$srcdir/../..'.
@@ -8731,18 +8737,6 @@ AC_CONFIG_COMMANDS_PRE(
  Usually this means the macro was only invoked conditionally.]])
  fi])])
  
-# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 8
-
-# AM_CONFIG_HEADER is obsolete.  It has been replaced by AC_CONFIG_HEADERS.
-AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
-
  # Do all the work for Automake.                             -*- Autoconf -*-
  
  # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
@@ -8882,12 +8876,15 @@ for _am_header in $config_headers :; do
  done
  echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
  
-# Copyright (C) 2001, 2003, 2005, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2008, 2011 Free Software Foundation,
+# Inc.
  #
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
  
+# serial 1
+
  # AM_PROG_INSTALL_SH
  # ------------------
  # Define $install_sh.
@@ -8948,8 +8945,8 @@ fi])
  # Add --enable-maintainer-mode option to configure.         -*- Autoconf -*-
  # From Jim Meyering
  
-# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008
-# Free Software Foundation, Inc.
+# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008,
+# 2011 Free Software Foundation, Inc.
  #
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
@@ -8969,7 +8966,7 @@ AC_DEFUN([AM_MAINTAINER_MODE],
         [disable], [m4_define([am_maintainer_other], [enable])],
         [m4_define([am_maintainer_other], [enable])
          m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])])
-AC_MSG_CHECKING([whether to am_maintainer_other maintainer-specific portions of Makefiles])
+AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
    dnl maintainer-mode's default is 'disable' unless 'enable' is passed
    AC_ARG_ENABLE([maintainer-mode],
  [  --][am_maintainer_other][-maintainer-mode  am_maintainer_other make rules and dependencies not useful
@@ -9028,12 +9025,15 @@ else
  fi
  ])
  
-# Copyright (C) 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
+# Copyright (C) 2003, 2004, 2005, 2006, 2011 Free Software Foundation,
+# Inc.
  #
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
  
+# serial 1
+
  # AM_PROG_MKDIR_P
  # ---------------
  # Check for `mkdir -p'.
@@ -9056,13 +9056,14 @@ esac
  
  # Helper functions for option handling.                     -*- Autoconf -*-
  
-# Copyright (C) 2001, 2002, 2003, 2005, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2002, 2003, 2005, 2008, 2010 Free Software
+# Foundation, Inc.
  #
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
  
-# serial 4
+# serial 5
  
  # _AM_MANGLE_OPTION(NAME)
  # -----------------------
@@ -9070,13 +9071,13 @@ AC_DEFUN([_AM_MANGLE_OPTION],
  [[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
  
  # _AM_SET_OPTION(NAME)
-# ------------------------------
+# --------------------
  # Set option NAME.  Presently that only means defining a flag for this option.
  AC_DEFUN([_AM_SET_OPTION],
  [m4_define(_AM_MANGLE_OPTION([$1]), 1)])
  
  # _AM_SET_OPTIONS(OPTIONS)
-# ----------------------------------
+# ------------------------
  # OPTIONS is a space-separated list of Automake options.
  AC_DEFUN([_AM_SET_OPTIONS],
  [m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
@@ -9087,32 +9088,6 @@ AC_DEFUN([_AM_SET_OPTIONS],
  AC_DEFUN([_AM_IF_OPTION],
  [m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
  
-# Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2005, 2006
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 5
-
-AC_DEFUN([AM_C_PROTOTYPES],
-[AC_REQUIRE([AC_C_PROTOTYPES])
-if test "$ac_cv_prog_cc_stdc" != no; then
-  U= ANSI2KNR=
-else
-  U=_ ANSI2KNR=./ansi2knr
-fi
-# Ensure some checks needed by ansi2knr itself.
-AC_REQUIRE([AC_HEADER_STDC])
-AC_CHECK_HEADERS([string.h])
-AC_SUBST([U])dnl
-AC_SUBST([ANSI2KNR])dnl
-_AM_SUBST_NOTMAKE([ANSI2KNR])dnl
-])
-
-AU_DEFUN([fp_C_PROTOTYPES], [AM_C_PROTOTYPES])
-
  # Check to make sure that the build environment is sane.    -*- Autoconf -*-
  
  # Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005, 2008
@@ -9178,12 +9153,14 @@ Check your system clock])
  fi
  AC_MSG_RESULT(yes)])
  
-# Copyright (C) 2001, 2003, 2005  Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
  #
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
  
+# serial 1
+
  # AM_PROG_INSTALL_STRIP
  # ---------------------
  # One issue with vendor `install' (even GNU) is that you can't
@@ -9206,13 +9183,13 @@ fi
  INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
  AC_SUBST([INSTALL_STRIP_PROGRAM])])
  
-# Copyright (C) 2006, 2008  Free Software Foundation, Inc.
+# Copyright (C) 2006, 2008, 2010 Free Software Foundation, Inc.
  #
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
  
-# serial 2
+# serial 3
  
  # _AM_SUBST_NOTMAKE(VARIABLE)
  # ---------------------------
@@ -9221,13 +9198,13 @@ AC_SUBST([INSTALL_STRIP_PROGRAM])])
  AC_DEFUN([_AM_SUBST_NOTMAKE])
  
  # AM_SUBST_NOTMAKE(VARIABLE)
-# ---------------------------
+# --------------------------
  # Public sister of _AM_SUBST_NOTMAKE.
  AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
  
  # Check how to create a tarball.                            -*- Autoconf -*-
  
-# Copyright (C) 2004, 2005  Free Software Foundation, Inc.
+# Copyright (C) 2004, 2005, 2012 Free Software Foundation, Inc.
  #
  # This file is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
@@ -9249,10 +9226,11 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
  # a tarball read from stdin.
  #     $(am__untar) < result.tar
  AC_DEFUN([_AM_PROG_TAR],
-[# Always define AMTAR for backward compatibility.
-AM_MISSING_PROG([AMTAR], [tar])
+[# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AC_SUBST([AMTAR], ['$${TAR-tar}'])
  m4_if([$1], [v7],
-     [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
+     [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
       [m4_case([$1], [ustar],, [pax],,
                [m4_fatal([Unknown tar format])])
  AC_MSG_CHECKING([how to create a $1 tar archive])
diff --git a/ansi2knr.1 b/ansi2knr.1

deleted file mode 100644 (file)

index f9ee5a6..0000000
--- a/ansi2knr.1
+++ /dev/null
@@ -1,36 +0,0 @@
-.TH ANSI2KNR 1 "19 Jan 1996"
-.SH NAME
-ansi2knr \- convert ANSI C to Kernighan & Ritchie C
-.SH SYNOPSIS
-.I ansi2knr
-[--varargs] input_file [output_file]
-.SH DESCRIPTION
-If no output_file is supplied, output goes to stdout.
-.br
-There are no error messages.
-.sp
-.I ansi2knr
-recognizes function definitions by seeing a non-keyword identifier at the left
-margin, followed by a left parenthesis, with a right parenthesis as the last
-character on the line, and with a left brace as the first token on the
-following line (ignoring possible intervening comments).  It will recognize a
-multi-line header provided that no intervening line ends with a left or right
-brace or a semicolon.  These algorithms ignore whitespace and comments, except
-that the function name must be the first thing on the line.
-.sp
-The following constructs will confuse it:
-.br
-     - Any other construct that starts at the left margin and follows the
-above syntax (such as a macro or function call).
-.br
-     - Some macros that tinker with the syntax of the function header.
-.sp
-The --varargs switch is obsolete, and is recognized only for
-backwards compatibility.  The present version of
-.I ansi2knr
-will always attempt to convert a ... argument to va_alist and va_dcl.
-.SH AUTHOR
-L. Peter Deutsch <ghost@aladdin.com> wrote the original ansi2knr and
-continues to maintain the current version; most of the code in the current
-version is his work.  ansi2knr also includes contributions by Francois
-Pinard <pinard@iro.umontreal.ca> and Jim Avera <jima@netcom.com>.
diff --git a/ansi2knr.c b/ansi2knr.c

deleted file mode 100644 (file)

index b646b85..0000000
--- a/ansi2knr.c
+++ /dev/null
@@ -1,739 +0,0 @@
-/* Copyright (C) 1989, 2000 Aladdin Enterprises.  All rights reserved. */
-
-/*$Id$*/
-/* Convert ANSI C function definitions to K&R ("traditional C") syntax */
-
-/*
-ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY.  No author or distributor accepts responsibility to anyone for the
-consequences of using it or for whether it serves any particular purpose or
-works at all, unless he says so in writing.  Refer to the GNU General Public
-License (the "GPL") for full details.
-
-Everyone is granted permission to copy, modify and redistribute ansi2knr,
-but only under the conditions described in the GPL.  A copy of this license
-is supposed to have been given to you along with ansi2knr so you can know
-your rights and responsibilities.  It should be in a file named COPYLEFT,
-or, if there is no file named COPYLEFT, a file named COPYING.  Among other
-things, the copyright notice and this notice must be preserved on all
-copies.
-
-We explicitly state here what we believe is already implied by the GPL: if
-the ansi2knr program is distributed as a separate set of sources and a
-separate executable file which are aggregated on a storage medium together
-with another program, this in itself does not bring the other program under
-the GPL, nor does the mere fact that such a program or the procedures for
-constructing it invoke the ansi2knr executable bring any other part of the
-program under the GPL.
-*/
-
-/*
- * Usage:
-       ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
- * --filename provides the file name for the #line directive in the output,
- * overriding input_file (if present).
- * If no input_file is supplied, input is read from stdin.
- * If no output_file is supplied, output goes to stdout.
- * There are no error messages.
- *
- * ansi2knr recognizes function definitions by seeing a non-keyword
- * identifier at the left margin, followed by a left parenthesis, with a
- * right parenthesis as the last character on the line, and with a left
- * brace as the first token on the following line (ignoring possible
- * intervening comments and/or preprocessor directives), except that a line
- * consisting of only
- *     identifier1(identifier2)
- * will not be considered a function definition unless identifier2 is
- * the word "void", and a line consisting of
- *     identifier1(identifier2, <<arbitrary>>)
- * will not be considered a function definition.
- * ansi2knr will recognize a multi-line header provided that no intervening
- * line ends with a left or right brace or a semicolon.  These algorithms
- * ignore whitespace, comments, and preprocessor directives, except that
- * the function name must be the first thing on the line.  The following
- * constructs will confuse it:
- *     - Any other construct that starts at the left margin and
- *         follows the above syntax (such as a macro or function call).
- *     - Some macros that tinker with the syntax of function headers.
- */
-
-/*
- * The original and principal author of ansi2knr is L. Peter Deutsch
- * <ghost@aladdin.com>.  Other authors are noted in the change history
- * that follows (in reverse chronological order):
-
-       lpd 2000-04-12 backs out Eggert's changes because of bugs:
-       - concatlits didn't declare the type of its bufend argument;
-       - concatlits didn't recognize when it was inside a comment;
-       - scanstring could scan backward past the beginning of the string; when
-       - the check for \ + newline in scanstring was unnecessary.
-
-       2000-03-05  Paul Eggert  <eggert@twinsun.com>
-
-       Add support for concatenated string literals.
-       * ansi2knr.c (concatlits): New decl.
-       (main): Invoke concatlits to concatenate string literals.
-       (scanstring): Handle backslash-newline correctly.  Work with
-       character constants.  Fix bug when scanning backwards through
-       backslash-quote.  Check for unterminated strings.
-       (convert1): Parse character constants, too.
-       (appendline, concatlits): New functions.
-       * ansi2knr.1: Document this.
-
-       lpd 1999-08-17 added code to allow preprocessor directives
-               wherever comments are allowed
-       lpd 1999-04-12 added minor fixes from Pavel Roskin
-               <pavel_roskin@geocities.com> for clean compilation with
-               gcc -W -Wall
-       lpd 1999-03-22 added hack to recognize lines consisting of
-               identifier1(identifier2, xxx) as *not* being procedures
-       lpd 1999-02-03 made indentation of preprocessor commands consistent
-       lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
-               endless loop; quoted strings within an argument list
-               confused the parser
-       lpd 1999-01-24 added a check for write errors on the output,
-               suggested by Jim Meyering <meyering@ascend.com>
-       lpd 1998-11-09 added further hack to recognize identifier(void)
-               as being a procedure
-       lpd 1998-10-23 added hack to recognize lines consisting of
-               identifier1(identifier2) as *not* being procedures
-       lpd 1997-12-08 made input_file optional; only closes input and/or
-               output file if not stdin or stdout respectively; prints
-               usage message on stderr rather than stdout; adds
-               --filename switch (changes suggested by
-               <ceder@lysator.liu.se>)
-       lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
-               compilers that don't understand void, as suggested by
-               Tom Lane
-       lpd 1996-01-15 changed to require that the first non-comment token
-               on the line following a function header be a left brace,
-               to reduce sensitivity to macros, as suggested by Tom Lane
-               <tgl@sss.pgh.pa.us>
-       lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
-               undefined preprocessor symbols as 0; changed all #ifdefs
-               for configuration symbols to #ifs
-       lpd 1995-04-05 changed copyright notice to make it clear that
-               including ansi2knr in a program does not bring the entire
-               program under the GPL
-       lpd 1994-12-18 added conditionals for systems where ctype macros
-               don't handle 8-bit characters properly, suggested by
-               Francois Pinard <pinard@iro.umontreal.ca>;
-               removed --varargs switch (this is now the default)
-       lpd 1994-10-10 removed CONFIG_BROKETS conditional
-       lpd 1994-07-16 added some conditionals to help GNU `configure',
-               suggested by Francois Pinard <pinard@iro.umontreal.ca>;
-               properly erase prototype args in function parameters,
-               contributed by Jim Avera <jima@netcom.com>;
-               correct error in writeblanks (it shouldn't erase EOLs)
-       lpd 1989-xx-xx original version
- */
-
-/* Most of the conditionals here are to make ansi2knr work with */
-/* or without the GNU configure machinery. */
-
-#if HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#include <stdio.h>
-#include <ctype.h>
-
-#if HAVE_CONFIG_H
-
-/*
-   For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
-   This will define HAVE_CONFIG_H and so, activate the following lines.
- */
-
-# if STDC_HEADERS || HAVE_STRING_H
-#  include <string.h>
-# else
-#  include <strings.h>
-# endif
-
-#else /* not HAVE_CONFIG_H */
-
-/* Otherwise do it the hard way */
-
-# ifdef BSD
-#  include <strings.h>
-# else
-#  ifdef VMS
-    extern int strlen(), strncmp();
-#  else
-#   include <string.h>
-#  endif
-# endif
-
-#endif /* not HAVE_CONFIG_H */
-
-#if STDC_HEADERS
-# include <stdlib.h>
-#else
-/*
-   malloc and free should be declared in stdlib.h,
-   but if you've got a K&R compiler, they probably aren't.
- */
-# ifdef MSDOS
-#  include <malloc.h>
-# else
-#  ifdef VMS
-     extern char *malloc();
-     extern void free();
-#  else
-     extern char *malloc();
-     extern int free();
-#  endif
-# endif
-
-#endif
-
-/* Define NULL (for *very* old compilers). */
-#ifndef NULL
-# define NULL (0)
-#endif
-
-/*
- * The ctype macros don't always handle 8-bit characters correctly.
- * Compensate for this here.
- */
-#ifdef isascii
-# undef HAVE_ISASCII           /* just in case */
-# define HAVE_ISASCII 1
-#else
-#endif
-#if STDC_HEADERS || !HAVE_ISASCII
-# define is_ascii(c) 1
-#else
-# define is_ascii(c) isascii(c)
-#endif
-
-#define is_space(c) (is_ascii(c) && isspace(c))
-#define is_alpha(c) (is_ascii(c) && isalpha(c))
-#define is_alnum(c) (is_ascii(c) && isalnum(c))
-
-/* Scanning macros */
-#define isidchar(ch) (is_alnum(ch) || (ch) == '_')
-#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
-
-/* Forward references */
-char *ppdirforward();
-char *ppdirbackward();
-char *skipspace();
-char *scanstring();
-int writeblanks();
-int test1();
-int convert1();
-
-/* The main program */
-int
-main(argc, argv)
-    int argc;
-    char *argv[];
-{      FILE *in = stdin;
-       FILE *out = stdout;
-       char *filename = 0;
-       char *program_name = argv[0];
-       char *output_name = 0;
-#define bufsize 5000                   /* arbitrary size */
-       char *buf;
-       char *line;
-       char *more;
-       char *usage =
-         "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
-       /*
-        * In previous versions, ansi2knr recognized a --varargs switch.
-        * If this switch was supplied, ansi2knr would attempt to convert
-        * a ... argument to va_alist and va_dcl; if this switch was not
-        * supplied, ansi2knr would simply drop any such arguments.
-        * Now, ansi2knr always does this conversion, and we only
-        * check for this switch for backward compatibility.
-        */
-       int convert_varargs = 1;
-       int output_error;
-
-       while ( argc > 1 && argv[1][0] == '-' ) {
-         if ( !strcmp(argv[1], "--varargs") ) {
-           convert_varargs = 1;
-           argc--;
-           argv++;
-           continue;
-         }
-         if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
-           filename = argv[2];
-           argc -= 2;
-           argv += 2;
-           continue;
-         }
-         fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
-                 argv[1]);
-         fprintf(stderr, usage);
-         exit(1);
-       }
-       switch ( argc )
-          {
-       default:
-               fprintf(stderr, usage);
-               exit(0);
-       case 3:
-               output_name = argv[2];
-               out = fopen(output_name, "w");
-               if ( out == NULL ) {
-                 fprintf(stderr, "%s: Cannot open output file %s\n",
-                         program_name, output_name);
-                 exit(1);
-               }
-               /* falls through */
-       case 2:
-               in = fopen(argv[1], "r");
-               if ( in == NULL ) {
-                 fprintf(stderr, "%s: Cannot open input file %s\n",
-                         program_name, argv[1]);
-                 exit(1);
-               }
-               if ( filename == 0 )
-                 filename = argv[1];
-               /* falls through */
-       case 1:
-               break;
-          }
-       if ( filename )
-         fprintf(out, "#line 1 \"%s\"\n", filename);
-       buf = malloc(bufsize);
-       if ( buf == NULL )
-          {
-               fprintf(stderr, "Unable to allocate read buffer!\n");
-               exit(1);
-          }
-       line = buf;
-       while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
-          {
-test:          line += strlen(line);
-               switch ( test1(buf) )
-                  {
-               case 2:                 /* a function header */
-                       convert1(buf, out, 1, convert_varargs);
-                       break;
-               case 1:                 /* a function */
-                       /* Check for a { at the start of the next line. */
-                       more = ++line;
-f:                     if ( line >= buf + (bufsize - 1) ) /* overflow check */
-                         goto wl;
-                       if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
-                         goto wl;
-                       switch ( *skipspace(ppdirforward(more), 1) )
-                         {
-                         case '{':
-                           /* Definitely a function header. */
-                           convert1(buf, out, 0, convert_varargs);
-                           fputs(more, out);
-                           break;
-                         case 0:
-                           /* The next line was blank or a comment: */
-                           /* keep scanning for a non-comment. */
-                           line += strlen(line);
-                           goto f;
-                         default:
-                           /* buf isn't a function header, but */
-                           /* more might be. */
-                           fputs(buf, out);
-                           strcpy(buf, more);
-                           line = buf;
-                           goto test;
-                         }
-                       break;
-               case -1:                /* maybe the start of a function */
-                       if ( line != buf + (bufsize - 1) ) /* overflow check */
-                         continue;
-                       /* falls through */
-               default:                /* not a function */
-wl:                    fputs(buf, out);
-                       break;
-                  }
-               line = buf;
-          }
-       if ( line != buf )
-         fputs(buf, out);
-       free(buf);
-       if ( output_name ) {
-         output_error = ferror(out);
-         output_error |= fclose(out);
-       } else {                /* out == stdout */
-         fflush(out);
-         output_error = ferror(out);
-       }
-       if ( output_error ) {
-         fprintf(stderr, "%s: error writing to %s\n", program_name,
-                 (output_name ? output_name : "stdout"));
-         exit(1);
-       }
-       if ( in != stdin )
-         fclose(in);
-       return 0;
-}
-
-/*
- * Skip forward or backward over one or more preprocessor directives.
- */
-char *
-ppdirforward(p)
-    char *p;
-{
-    for (; *p == '#'; ++p) {
-       for (; *p != '\r' && *p != '\n'; ++p)
-           if (*p == 0)
-               return p;
-       if (*p == '\r' && p[1] == '\n')
-           ++p;
-    }
-    return p;
-}
-char *
-ppdirbackward(p, limit)
-    char *p;
-    char *limit;
-{
-    char *np = p;
-
-    for (;; p = --np) {
-       if (*np == '\n' && np[-1] == '\r')
-           --np;
-       for (; np > limit && np[-1] != '\r' && np[-1] != '\n'; --np)
-           if (np[-1] == 0)
-               return np;
-       if (*np != '#')
-           return p;
-    }
-}
-
-/*
- * Skip over whitespace, comments, and preprocessor directives,
- * in either direction.
- */
-char *
-skipspace(p, dir)
-    char *p;
-    int dir;                   /* 1 for forward, -1 for backward */
-{
-    for ( ; ; ) {
-       while ( is_space(*p) )
-           p += dir;
-       if ( !(*p == '/' && p[dir] == '*') )
-           break;
-       p += dir;  p += dir;
-       while ( !(*p == '*' && p[dir] == '/') ) {
-           if ( *p == 0 )
-               return p;       /* multi-line comment?? */
-           p += dir;
-       }
-       p += dir;  p += dir;
-    }
-    return p;
-}
-
-/* Scan over a quoted string, in either direction. */
-char *
-scanstring(p, dir)
-    char *p;
-    int dir;
-{
-    for (p += dir; ; p += dir)
-       if (*p == '"' && p[-dir] != '\\')
-           return p + dir;
-}
-
-/*
- * Write blanks over part of a string.
- * Don't overwrite end-of-line characters.
- */
-int
-writeblanks(start, end)
-    char *start;
-    char *end;
-{      char *p;
-       for ( p = start; p < end; p++ )
-         if ( *p != '\r' && *p != '\n' )
-           *p = ' ';
-       return 0;
-}
-
-/*
- * Test whether the string in buf is a function definition.
- * The string may contain and/or end with a newline.
- * Return as follows:
- *     0 - definitely not a function definition;
- *     1 - definitely a function definition;
- *     2 - definitely a function prototype (NOT USED);
- *     -1 - may be the beginning of a function definition,
- *             append another line and look again.
- * The reason we don't attempt to convert function prototypes is that
- * Ghostscript's declaration-generating macros look too much like
- * prototypes, and confuse the algorithms.
- */
-int
-test1(buf)
-    char *buf;
-{      char *p = buf;
-       char *bend;
-       char *endfn;
-       int contin;
-
-       if ( !isidfirstchar(*p) )
-         return 0;             /* no name at left margin */
-       bend = skipspace(ppdirbackward(buf + strlen(buf) - 1, buf), -1);
-       switch ( *bend )
-          {
-          case ';': contin = 0 /*2*/; break;
-          case ')': contin = 1; break;
-          case '{': return 0;          /* not a function */
-          case '}': return 0;          /* not a function */
-          default: contin = -1;
-          }
-       while ( isidchar(*p) )
-         p++;
-       endfn = p;
-       p = skipspace(p, 1);
-       if ( *p++ != '(' )
-         return 0;             /* not a function */
-       p = skipspace(p, 1);
-       if ( *p == ')' )
-         return 0;             /* no parameters */
-       /* Check that the apparent function name isn't a keyword. */
-       /* We only need to check for keywords that could be followed */
-       /* by a left parenthesis (which, unfortunately, is most of them). */
-          {    static char *words[] =
-                  {    "asm", "auto", "case", "char", "const", "double",
-                       "extern", "float", "for", "if", "int", "long",
-                       "register", "return", "short", "signed", "sizeof",
-                       "static", "switch", "typedef", "unsigned",
-                       "void", "volatile", "while", 0
-                  };
-               char **key = words;
-               char *kp;
-               unsigned len = endfn - buf;
-
-               while ( (kp = *key) != 0 )
-                  {    if ( strlen(kp) == len && !strncmp(kp, buf, len) )
-                         return 0;     /* name is a keyword */
-                       key++;
-                  }
-          }
-          {
-              char *id = p;
-              int len;
-              /*
-               * Check for identifier1(identifier2) and not
-               * identifier1(void), or identifier1(identifier2, xxxx).
-               */
-
-              while ( isidchar(*p) )
-                  p++;
-              len = p - id;
-              p = skipspace(p, 1);
-              if (*p == ',' ||
-                  (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
-                  )
-                  return 0;    /* not a function */
-          }
-       /*
-        * If the last significant character was a ), we need to count
-        * parentheses, because it might be part of a formal parameter
-        * that is a procedure.
-        */
-       if (contin > 0) {
-           int level = 0;
-
-           for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
-               level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
-           if (level > 0)
-               contin = -1;
-       }
-       return contin;
-}
-
-/* Convert a recognized function definition or header to K&R syntax. */
-int
-convert1(buf, out, header, convert_varargs)
-    char *buf;
-    FILE *out;
-    int header;                        /* Boolean */
-    int convert_varargs;       /* Boolean */
-{      char *endfn;
-       char *p;
-       /*
-        * The breaks table contains pointers to the beginning and end
-        * of each argument.
-        */
-       char **breaks;
-       unsigned num_breaks = 2;        /* for testing */
-       char **btop;
-       char **bp;
-       char **ap;
-       char *vararg = 0;
-
-       /* Pre-ANSI implementations don't agree on whether strchr */
-       /* is called strchr or index, so we open-code it here. */
-       for ( endfn = buf; *(endfn++) != '('; )
-         ;
-top:   p = endfn;
-       breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
-       if ( breaks == NULL )
-          {    /* Couldn't allocate break table, give up */
-               fprintf(stderr, "Unable to allocate break table!\n");
-               fputs(buf, out);
-               return -1;
-          }
-       btop = breaks + num_breaks * 2 - 2;
-       bp = breaks;
-       /* Parse the argument list */
-       do
-          {    int level = 0;
-               char *lp = NULL;
-               char *rp = NULL;
-               char *end = NULL;
-
-               if ( bp >= btop )
-                  {    /* Filled up break table. */
-                       /* Allocate a bigger one and start over. */
-                       free((char *)breaks);
-                       num_breaks <<= 1;
-                       goto top;
-                  }
-               *bp++ = p;
-               /* Find the end of the argument */
-               for ( ; end == NULL; p++ )
-                  {    switch(*p)
-                          {
-                          case ',':
-                               if ( !level ) end = p;
-                               break;
-                          case '(':
-                               if ( !level ) lp = p;
-                               level++;
-                               break;
-                          case ')':
-                               if ( --level < 0 ) end = p;
-                               else rp = p;
-                               break;
-                          case '/':
-                               if (p[1] == '*')
-                                   p = skipspace(p, 1) - 1;
-                               break;
-                          case '"':
-                              p = scanstring(p, 1) - 1;
-                              break;
-                          default:
-                               ;
-                          }
-                  }
-               /* Erase any embedded prototype parameters. */
-               if ( lp && rp )
-                 writeblanks(lp + 1, rp);
-               p--;                    /* back up over terminator */
-               /* Find the name being declared. */
-               /* This is complicated because of procedure and */
-               /* array modifiers. */
-               for ( ; ; )
-                  {    p = skipspace(p - 1, -1);
-                       switch ( *p )
-                          {
-                          case ']':    /* skip array dimension(s) */
-                          case ')':    /* skip procedure args OR name */
-                          {    int level = 1;
-                               while ( level )
-                                switch ( *--p )
-                                  {
-                                  case ']': case ')':
-                                      level++;
-                                      break;
-                                  case '[': case '(':
-                                      level--;
-                                      break;
-                                  case '/':
-                                      if (p > buf && p[-1] == '*')
-                                          p = skipspace(p, -1) + 1;
-                                      break;
-                                  case '"':
-                                      p = scanstring(p, -1) + 1;
-                                      break;
-                                  default: ;
-                                  }
-                          }
-                               if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
-                                  {    /* We found the name being declared */
-                                       while ( !isidfirstchar(*p) )
-                                         p = skipspace(p, 1) + 1;
-                                       goto found;
-                                  }
-                               break;
-                          default:
-                               goto found;
-                          }
-                  }
-found:         if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
-                 {     if ( convert_varargs )
-                         {     *bp++ = "va_alist";
-                               vararg = p-2;
-                         }
-                       else
-                         {     p++;
-                               if ( bp == breaks + 1 ) /* sole argument */
-                                 writeblanks(breaks[0], p);
-                               else
-                                 writeblanks(bp[-1] - 1, p);
-                               bp--;
-                         }
-                  }
-               else
-                  {    while ( isidchar(*p) ) p--;
-                       *bp++ = p+1;
-                  }
-               p = end;
-          }
-       while ( *p++ == ',' );
-       *bp = p;
-       /* Make a special check for 'void' arglist */
-       if ( bp == breaks+2 )
-          {    p = skipspace(breaks[0], 1);
-               if ( !strncmp(p, "void", 4) )
-                  {    p = skipspace(p+4, 1);
-                       if ( p == breaks[2] - 1 )
-                          {    bp = breaks;    /* yup, pretend arglist is empty */
-                               writeblanks(breaks[0], p + 1);
-                          }
-                  }
-          }
-       /* Put out the function name and left parenthesis. */
-       p = buf;
-       while ( p != endfn ) putc(*p, out), p++;
-       /* Put out the declaration. */
-       if ( header )
-         {     fputs(");", out);
-               for ( p = breaks[0]; *p; p++ )
-                 if ( *p == '\r' || *p == '\n' )
-                   putc(*p, out);
-         }
-       else
-         {     for ( ap = breaks+1; ap < bp; ap += 2 )
-                 {     p = *ap;
-                       while ( isidchar(*p) )
-                         putc(*p, out), p++;
-                       if ( ap < bp - 1 )
-                         fputs(", ", out);
-                 }
-               fputs(")  ", out);
-               /* Put out the argument declarations */
-               for ( ap = breaks+2; ap <= bp; ap += 2 )
-                 (*ap)[-1] = ';';
-               if ( vararg != 0 )
-                 {     *vararg = 0;
-                       fputs(breaks[0], out);          /* any prior args */
-                       fputs("va_dcl", out);           /* the final arg */
-                       fputs(bp[0], out);
-                 }
-               else
-                 fputs(breaks[0], out);
-         }
-       free((char *)breaks);
-       return 0;
-}
diff --git a/bootstrap.c b/bootstrap.c

new file mode 100644 (file)

index 0000000..3478ce0
--- /dev/null
+++ b/bootstrap.c
@@ -0,0 +1,135 @@
+/* Functions needed for bootstrapping the gmp build, based on mini-gmp.
+
+Copyright 2001, 2002, 2004, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mini-gmp/mini-gmp.c"
+
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#define PTR(x)   ((x)->_mp_d)
+#define SIZ(x)   ((x)->_mp_size)
+
+#define xmalloc gmp_default_alloc
+
+int
+isprime (unsigned long int t)
+{
+  unsigned long int q, r, d;
+
+  if (t < 32)
+    return (0xa08a28acUL >> t) & 1;
+  if ((t & 1) == 0)
+    return 0;
+
+  if (t % 3 == 0)
+    return 0;
+  if (t % 5 == 0)
+    return 0;
+  if (t % 7 == 0)
+    return 0;
+
+  for (d = 11;;)
+    {
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+       return 1;
+      if (r == 0)
+       break;
+      d += 2;
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+       return 1;
+      if (r == 0)
+       break;
+      d += 4;
+    }
+  return 0;
+}
+
+int
+log2_ceil (int n)
+{
+  int  e;
+  assert (n >= 1);
+  for (e = 0; ; e++)
+    if ((1 << e) >= n)
+      break;
+  return e;
+}
+
+/* Set inv to the inverse of d, in the style of invert_limb, ie. for
+   udiv_qrnnd_preinv.  */
+void
+mpz_preinv_invert (mpz_t inv, mpz_t d, int numb_bits)
+{
+  mpz_t  t;
+  int    norm;
+  assert (SIZ(d) > 0);
+
+  norm = numb_bits - mpz_sizeinbase (d, 2);
+  assert (norm >= 0);
+  mpz_init_set_ui (t, 1L);
+  mpz_mul_2exp (t, t, 2*numb_bits - norm);
+  mpz_tdiv_q (inv, t, d);
+  mpz_set_ui (t, 1L);
+  mpz_mul_2exp (t, t, numb_bits);
+  mpz_sub (inv, inv, t);
+
+  mpz_clear (t);
+}
+
+/* Calculate r satisfying r*d == 1 mod 2^n. */
+void
+mpz_invert_2exp (mpz_t r, mpz_t a, unsigned long n)
+{
+  unsigned long  i;
+  mpz_t  inv, prod;
+
+  assert (mpz_odd_p (a));
+
+  mpz_init_set_ui (inv, 1L);
+  mpz_init (prod);
+
+  for (i = 1; i < n; i++)
+    {
+      mpz_mul (prod, inv, a);
+      if (mpz_tstbit (prod, i) != 0)
+       mpz_setbit (inv, i);
+    }
+
+  mpz_mul (prod, inv, a);
+  mpz_tdiv_r_2exp (prod, prod, n);
+  assert (mpz_cmp_ui (prod, 1L) == 0);
+
+  mpz_set (r, inv);
+
+  mpz_clear (inv);
+  mpz_clear (prod);
+}
+
+/* Calculate inv satisfying r*a == 1 mod 2^n. */
+void
+mpz_invert_ui_2exp (mpz_t r, unsigned long a, unsigned long n)
+{
+  mpz_t  az;
+  mpz_init_set_ui (az, a);
+  mpz_invert_2exp (r, az, n);
+  mpz_clear (az);
+}
diff --git a/config.guess b/config.guess

index d6e9acde39ef1556316b2792812b6849ff52672c..2832cdb5f26d3b77a9f9ee0cb8b82d41f9c60c8e 100755 (executable)
--- a/config.guess
+++ b/config.guess
@@ -3,8 +3,8 @@
  # GMP config.guess wrapper.
  
  
-# Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2011 Free Software
-# Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2011, 2012 Free
+# Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -153,6 +153,37 @@ EOF
    rm -f $dummy.s $dummy.o $dummy
    ;;
  
+arm*-*-*)
+  cpu_code=`sed -n 's/^CPU part.*\(0x.*\)$/\1/p' /proc/cpuinfo 2>/dev/null`
+  case "$cpu_code" in
+      0x210)   exact_cpu="armxscale";;         # v5 pxa250
+      0x290)   exact_cpu="armxscale";;         # v5 pxa260
+      0x2d0)   exact_cpu="armxscale";;         # v5 pxa255
+      0x2d0)   exact_cpu="armxscale";;         # v5 pxa261
+      0x2d0)   exact_cpu="armxscale";;         # v5 pxa262
+      0x411)   exact_cpu="armxscale";;         # v5 pxa270
+      0x915)   exact_cpu="armti915t";;         # v?
+      0x925)   exact_cpu="armti925t";;         # v?
+      0x926)   exact_cpu="arm926";;            # v5
+      0x946)   exact_cpu="arm946";;            # v5
+      0x966)   exact_cpu="arm966";;            # v5
+      0xa11)   exact_cpu="armsa1";;            # v4 strongarm/sa1100
+      0xa26)   exact_cpu="arm1026";;           # v5
+      0xb02)   exact_cpu="arm11mpcore";;       # v6
+      0xb11)   exact_cpu="armsa1";;            # v4 strongarm/sa1110
+      0xb36)   exact_cpu="arm1136";;           # v6
+      0xb56)   exact_cpu="arm1156";;           # v6t2
+      0xb76)   exact_cpu="arm1176";;           # v6
+      0xc05)   exact_cpu="armcortexa5";;       # v7a
+      0xc08)   exact_cpu="armcortexa8";;       # v7a
+      0xc09)   exact_cpu="armcortexa9";;       # v7a
+      0xc0f)   exact_cpu="armcortexa15";;      # v7a
+      0xc14)   exact_cpu="armcortexr4";;       # v7r
+      0xc15)   exact_cpu="armcortexr5";;       # v7r
+      0xc23)   exact_cpu="armcortexm3";;       # v7m
+  esac
+  ;;
+
  ia64*-*-*)
    # CPUID[3] bits 24 to 31 is the processor family.  itanium2 is documented
    # as 0x1f, plain itanium has been seen returning 0x07 on two systems, but
@@ -780,6 +811,9 @@ main ()
           else if (model == 0x2d)       cpu_64bit = 1, modelstr = "coreisbr"; /* SBC-EP */
           else if (model == 0x2e)       cpu_64bit = 1, modelstr = "coreinhm"; /* NHM Beckton */
           else if (model == 0x2f)       cpu_64bit = 1, modelstr = "coreiwsm"; /* WSM Eagleton */
+         else if (model == 0x3a)       cpu_64bit = 1, modelstr = "coreisbr"; /* IBR */
+         else if (model == 0x3c)       cpu_64bit = 1, modelstr = "coreisbr"; /* Haswell */
+         else if (model == 0x36)       cpu_64bit = 1, modelstr = "atom";  /* Cedarview/Saltwell */
           else cpu_64bit = 1, modelstr = "corei"; /* default */
           break;
         case 15:
@@ -823,8 +857,8 @@ main ()
         case 21:                /* Bulldozer */
           cpu_64bit = 1, modelstr = "bulldozer";
           break;
-       case 22:                /* AMD Internal, assume future bulldozer */
-         cpu_64bit = 1, modelstr = "bulldozer";
+       case 22:                /* jaguar, an improved bobcat */
+         cpu_64bit = 1, modelstr = "bobcat";
           break;
         }
      }
diff --git a/config.in b/config.in

index 8980159862b5126e3d676c727935642b6aafeeea..57b138a297086adc43e2d6c9cb41bccbf7b3d5e5 100644 (file)
--- a/config.in
+++ b/config.in
@@ -1,9 +1,9 @@
-/* config.in.  Generated from configure.in by autoheader.  */
+/* config.in.  Generated from configure.ac by autoheader.  */
  
  /*
  
  Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -61,7 +61,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  /* Define to 1 if you have the `clock' function. */
  #undef HAVE_CLOCK
  
-/* Define to 1 if you have the `clock_gettime' function. */
+/* Define to 1 if you have the `clock_gettime' function */
  #undef HAVE_CLOCK_GETTIME
  
  /* Define to 1 if you have the `cputime' function. */
@@ -224,9 +224,25 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  #undef HAVE_NATIVE_mpn_add_n_sub_n
  #undef HAVE_NATIVE_mpn_add_nc
  #undef HAVE_NATIVE_mpn_addaddmul_1msb0
+#undef HAVE_NATIVE_mpn_addcnd_n
  #undef HAVE_NATIVE_mpn_addlsh1_n
  #undef HAVE_NATIVE_mpn_addlsh2_n
  #undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addlsh1_nc
+#undef HAVE_NATIVE_mpn_addlsh2_nc
+#undef HAVE_NATIVE_mpn_addlsh_nc
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip2
  #undef HAVE_NATIVE_mpn_addmul_1c
  #undef HAVE_NATIVE_mpn_addmul_2
  #undef HAVE_NATIVE_mpn_addmul_3
@@ -235,6 +251,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  #undef HAVE_NATIVE_mpn_addmul_6
  #undef HAVE_NATIVE_mpn_addmul_7
  #undef HAVE_NATIVE_mpn_addmul_8
+#undef HAVE_NATIVE_mpn_addmul_2s
  #undef HAVE_NATIVE_mpn_and_n
  #undef HAVE_NATIVE_mpn_andn_n
  #undef HAVE_NATIVE_mpn_bdiv_dbm1c
@@ -243,6 +260,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  #undef HAVE_NATIVE_mpn_com
  #undef HAVE_NATIVE_mpn_copyd
  #undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_div_qr_2
  #undef HAVE_NATIVE_mpn_divexact_1
  #undef HAVE_NATIVE_mpn_divexact_by3c
  #undef HAVE_NATIVE_mpn_divrem_1
@@ -269,6 +287,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  #undef HAVE_NATIVE_mpn_mul_2
  #undef HAVE_NATIVE_mpn_mul_3
  #undef HAVE_NATIVE_mpn_mul_4
+#undef HAVE_NATIVE_mpn_mul_5
+#undef HAVE_NATIVE_mpn_mul_6
  #undef HAVE_NATIVE_mpn_mul_basecase
  #undef HAVE_NATIVE_mpn_nand_n
  #undef HAVE_NATIVE_mpn_nior_n
@@ -280,6 +300,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  #undef HAVE_NATIVE_mpn_rsblsh1_n
  #undef HAVE_NATIVE_mpn_rsblsh2_n
  #undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsblsh1_nc
+#undef HAVE_NATIVE_mpn_rsblsh2_nc
+#undef HAVE_NATIVE_mpn_rsblsh_nc
  #undef HAVE_NATIVE_mpn_rsh1add_n
  #undef HAVE_NATIVE_mpn_rsh1add_nc
  #undef HAVE_NATIVE_mpn_rsh1sub_n
@@ -287,12 +310,24 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  #undef HAVE_NATIVE_mpn_rshift
  #undef HAVE_NATIVE_mpn_sqr_basecase
  #undef HAVE_NATIVE_mpn_sqr_diagonal
+#undef HAVE_NATIVE_mpn_sqr_diag_addlsh1
  #undef HAVE_NATIVE_mpn_sub_n
  #undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_subcnd_n
  #undef HAVE_NATIVE_mpn_sublsh1_n
  #undef HAVE_NATIVE_mpn_sublsh2_n
  #undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_sublsh1_nc
+#undef HAVE_NATIVE_mpn_sublsh2_nc
+#undef HAVE_NATIVE_mpn_sublsh_nc
+#undef HAVE_NATIVE_mpn_sublsh1_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh_nc_ip1
  #undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_tabselect
  #undef HAVE_NATIVE_mpn_udiv_qrnnd
  #undef HAVE_NATIVE_mpn_udiv_qrnnd_r
  #undef HAVE_NATIVE_mpn_umul_ppmm
@@ -457,6 +492,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  /* Define to 1 if you have the `vsnprintf' function and it works properly. */
  #undef HAVE_VSNPRINTF
  
+/* Define to 1 for Windos/64 */
+#undef HOST_DOS64
+
  /* Assembler local label prefix */
  #undef LSYM_PREFIX
  
@@ -485,9 +523,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  /* Define to the version of this package. */
  #undef PACKAGE_VERSION
  
-/* Define to 1 if the C compiler supports function prototypes. */
-#undef PROTOTYPES
-
  /* Define as the return type of signal handlers (`int' or `void'). */
  #undef RETSIGTYPE
  
@@ -521,6 +556,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  /* Version number of package */
  #undef VERSION
  
+/* Defined to 1 as per --enable-assembly */
+#undef WANT_ASSEMBLY
+
  /* Define to 1 to enable ASSERT checking, per --enable-assert */
  #undef WANT_ASSERT
  
@@ -566,9 +604,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
     `char[]'. */
  #undef YYTEXT_POINTER
  
-/* Define like PROTOTYPES; this can be used by system headers. */
-#undef __PROTOTYPES
-
  /* Define to `__inline__' or `__inline' if that's what the C compiler
     calls it, or to nothing if 'inline' is not supported under any name.  */
  #ifndef __cplusplus
diff --git a/config.sub b/config.sub

index 4f68184dad728cf25d870a4a1723e9c00c8e888e..0777c0df037069063eade1223f7833c4f4926612 100755 (executable)
--- a/config.sub
+++ b/config.sub
@@ -116,6 +116,13 @@ z900 | z990 | z9 | z10 | z196)
  z900esa | z990esa | z9esa | z10esa | z196esa)
    test_cpu=s390;;
  
+armxscale | armxscale | armxscale | armxscale | armxscale | armxscale |        \
+armti915t | armti925t | arm926 | arm946 | arm966 | armsa1 | arm1026 |  \
+arm11mpcore | armsa1 | arm1136 | arm1156 | arm1176 | armcortexa5 |     \
+armcortexa8 | armcortexa9 | armcortexa15 | armcortexr4 | armcortexr5 | \
+armcortexm3)
+  test_cpu="arm";;
+
  *)
    # Don't need or want to change the given name, just run configfsf.sub
    $SHELL $configfsf_sub "$given_full"
diff --git a/configfsf.guess b/configfsf.guess

old mode 100644 (file)

new mode 100755 (executable)

index 187cd54..872b96a
--- a/configfsf.guess
+++ b/configfsf.guess
@@ -2,9 +2,9 @@
  # Attempt to guess a canonical system name.
  #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  #   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-#   2011 Free Software Foundation, Inc.
+#   2011, 2012 Free Software Foundation, Inc.
  
-timestamp='2011-02-02'
+timestamp='2012-09-25'
  
  # This file is free software; you can redistribute it and/or modify it
  # under the terms of the GNU General Public License as published by
@@ -17,9 +17,7 @@ timestamp='2011-02-02'
  # General Public License for more details.
  #
  # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
  #
  # As a special exception to the GNU General Public License, if you
  # distribute this file as part of a program that contains a
@@ -57,8 +55,8 @@ GNU config.guess ($timestamp)
  
  Originally written by Per Bothner.
  Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
-Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
  
  This is free software; see the source for copying conditions.  There is NO
  warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -145,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
  case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
      *:NetBSD:*:*)
         # NetBSD (nbsd) targets should (where applicable) match one or
-       # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+       # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
         # *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
         # switched to ELF, *-*-netbsd* would select the old
         # object file format.  This provides both forward
@@ -202,6 +200,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
         # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
         echo "${machine}-${os}${release}"
         exit ;;
+    *:Bitrig:*:*)
+       UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+       echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
+       exit ;;
      *:OpenBSD:*:*)
         UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
         echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
@@ -304,7 +306,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
      arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
         echo arm-acorn-riscix${UNAME_RELEASE}
         exit ;;
-    arm:riscos:*:*|arm:RISCOS:*:*)
+    arm*:riscos:*:*|arm*:RISCOS:*:*)
         echo arm-unknown-riscos
         exit ;;
      SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
@@ -792,21 +794,26 @@ EOF
         echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
         exit ;;
      *:FreeBSD:*:*)
-       case ${UNAME_MACHINE} in
-           pc98)
-               echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+       UNAME_PROCESSOR=`/usr/bin/uname -p`
+       case ${UNAME_PROCESSOR} in
             amd64)
                 echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
             *)
-               echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+               echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
         esac
         exit ;;
      i*:CYGWIN*:*)
         echo ${UNAME_MACHINE}-pc-cygwin
         exit ;;
+    *:MINGW64*:*)
+       echo ${UNAME_MACHINE}-pc-mingw64
+       exit ;;
      *:MINGW*:*)
         echo ${UNAME_MACHINE}-pc-mingw32
         exit ;;
+    i*:MSYS*:*)
+       echo ${UNAME_MACHINE}-pc-msys
+       exit ;;
      i*:windows32*:*)
         # uname -m includes "-pc" on this system.
         echo ${UNAME_MACHINE}-mingw32
@@ -861,6 +868,13 @@ EOF
      i*86:Minix:*:*)
         echo ${UNAME_MACHINE}-pc-minix
         exit ;;
+    aarch64:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    aarch64_be:Linux:*:*)
+       UNAME_MACHINE=aarch64_be
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
      alpha:Linux:*:*)
         case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
           EV5)   UNAME_MACHINE=alphaev5 ;;
@@ -882,20 +896,29 @@ EOF
         then
             echo ${UNAME_MACHINE}-unknown-linux-gnu
         else
-           echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+           if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+               | grep -q __ARM_PCS_VFP
+           then
+               echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+           else
+               echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+           fi
         fi
         exit ;;
      avr32*:Linux:*:*)
         echo ${UNAME_MACHINE}-unknown-linux-gnu
         exit ;;
      cris:Linux:*:*)
-       echo cris-axis-linux-gnu
+       echo ${UNAME_MACHINE}-axis-linux-gnu
         exit ;;
      crisv32:Linux:*:*)
-       echo crisv32-axis-linux-gnu
+       echo ${UNAME_MACHINE}-axis-linux-gnu
         exit ;;
      frv:Linux:*:*)
-       echo frv-unknown-linux-gnu
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
+       exit ;;
+    hexagon:Linux:*:*)
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
         exit ;;
      i*86:Linux:*:*)
         LIBC=gnu
@@ -937,7 +960,7 @@ EOF
         test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
         ;;
      or32:Linux:*:*)
-       echo or32-unknown-linux-gnu
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
         exit ;;
      padre:Linux:*:*)
         echo sparc-unknown-linux-gnu
@@ -972,13 +995,13 @@ EOF
         echo ${UNAME_MACHINE}-unknown-linux-gnu
         exit ;;
      tile*:Linux:*:*)
-       echo ${UNAME_MACHINE}-tilera-linux-gnu
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
         exit ;;
      vax:Linux:*:*)
         echo ${UNAME_MACHINE}-dec-linux-gnu
         exit ;;
      x86_64:Linux:*:*)
-       echo x86_64-unknown-linux-gnu
+       echo ${UNAME_MACHINE}-unknown-linux-gnu
         exit ;;
      xtensa*:Linux:*:*)
         echo ${UNAME_MACHINE}-unknown-linux-gnu
@@ -1185,6 +1208,9 @@ EOF
      BePC:Haiku:*:*)    # Haiku running on Intel PC compatible.
         echo i586-pc-haiku
         exit ;;
+    x86_64:Haiku:*:*)
+       echo x86_64-unknown-haiku
+       exit ;;
      SX-4:SUPER-UX:*:*)
         echo sx4-nec-superux${UNAME_RELEASE}
         exit ;;
@@ -1240,7 +1266,7 @@ EOF
      NEO-?:NONSTOP_KERNEL:*:*)
         echo neo-tandem-nsk${UNAME_RELEASE}
         exit ;;
-    NSE-?:NONSTOP_KERNEL:*:*)
+    NSE-*:NONSTOP_KERNEL:*:*)
         echo nse-tandem-nsk${UNAME_RELEASE}
         exit ;;
      NSR-?:NONSTOP_KERNEL:*:*)
@@ -1309,11 +1335,11 @@ EOF
      i*86:AROS:*:*)
         echo ${UNAME_MACHINE}-pc-aros
         exit ;;
+    x86_64:VMkernel:*:*)
+       echo ${UNAME_MACHINE}-unknown-esx
+       exit ;;
  esac
  
-#echo '(No uname command or uname output not recognized.)' 1>&2
-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
-
  eval $set_cc_for_build
  cat >$dummy.c <<EOF
  #ifdef _SEQUENT_
diff --git a/configfsf.sub b/configfsf.sub

old mode 100644 (file)

new mode 100755 (executable)

index 30fdca8..8df5511
--- a/configfsf.sub
+++ b/configfsf.sub
@@ -2,9 +2,9 @@
  # Configuration validation subroutine script.
  #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  #   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-#   2011 Free Software Foundation, Inc.
+#   2011, 2012 Free Software Foundation, Inc.
  
-timestamp='2011-03-23'
+timestamp='2012-12-06'
  
  # This file is (in principle) common to ALL GNU software.
  # The presence of a machine in this file suggests that SOME GNU software
@@ -21,9 +21,7 @@ timestamp='2011-03-23'
  # GNU General Public License for more details.
  #
  # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
  #
  # As a special exception to the GNU General Public License, if you
  # distribute this file as part of a program that contains a
@@ -76,8 +74,8 @@ version="\
  GNU config.sub ($timestamp)
  
  Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
-Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
  
  This is free software; see the source for copying conditions.  There is NO
  warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -125,13 +123,17 @@ esac
  maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
  case $maybe_os in
    nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
-  linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
    knetbsd*-gnu* | netbsd*-gnu* | \
    kopensolaris*-gnu* | \
    storm-chaos* | os2-emx* | rtmk-nova*)
      os=-$maybe_os
      basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
      ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
    *)
      basic_machine=`echo $1 | sed 's/-[^-]*$//'`
      if [ $basic_machine != $1 ]
@@ -154,7 +156,7 @@ case $os in
         -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
         -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
         -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-       -apple | -axis | -knuth | -cray | -microblaze)
+       -apple | -axis | -knuth | -cray | -microblaze*)
                 os=
                 basic_machine=$1
                 ;;
@@ -223,6 +225,12 @@ case $os in
         -isc*)
                 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
                 ;;
+       -lynx*178)
+               os=-lynxos178
+               ;;
+       -lynx*5)
+               os=-lynxos5
+               ;;
         -lynx*)
                 os=-lynxos
                 ;;
@@ -247,20 +255,27 @@ case $basic_machine in
         # Some are omitted here because they have special meanings below.
         1750a | 580 \
         | a29k \
+       | aarch64 | aarch64_be \
         | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
         | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
         | am33_2.0 \
-       | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+       | arc \
+       | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
+       | avr | avr32 \
+       | be32 | be64 \
         | bfin \
         | c4x | clipper \
         | d10v | d30v | dlx | dsp16xx \
+       | epiphany \
         | fido | fr30 | frv \
         | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+       | hexagon \
         | i370 | i860 | i960 | ia64 \
         | ip2k | iq2000 \
+       | le32 | le64 \
         | lm32 \
         | m32c | m32r | m32rle | m68000 | m68k | m88k \
-       | maxq | mb | microblaze | mcore | mep | metag \
+       | maxq | mb | microblaze | microblazeel | mcore | mep | metag \
         | mips | mipsbe | mipseb | mipsel | mipsle \
         | mips16 \
         | mips64 | mips64el \
@@ -291,7 +306,7 @@ case $basic_machine in
         | pdp10 | pdp11 | pj | pjl \
         | powerpc | powerpc64 | powerpc64le | powerpcle \
         | pyramid \
-       | rx \
+       | rl78 | rx \
         | score \
         | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
         | sh64 | sh64le \
@@ -300,7 +315,7 @@ case $basic_machine in
         | spu \
         | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
         | ubicom32 \
-       | v850 | v850e \
+       | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
         | we32k \
         | x86 | xc16x | xstormy16 | xtensa \
         | z8k | z80)
@@ -315,8 +330,7 @@ case $basic_machine in
         c6x)
                 basic_machine=tic6x-unknown
                 ;;
-       m6811 | m68hc11 | m6812 | m68hc12 | picochip)
-               # Motorola 68HC11/12.
+       m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
                 basic_machine=$basic_machine-unknown
                 os=-none
                 ;;
@@ -329,7 +343,10 @@ case $basic_machine in
         strongarm | thumb | xscale)
                 basic_machine=arm-unknown
                 ;;
-
+       xgate)
+               basic_machine=$basic_machine-unknown
+               os=-none
+               ;;
         xscaleeb)
                 basic_machine=armeb-unknown
                 ;;
@@ -352,11 +369,13 @@ case $basic_machine in
         # Recognize the basic CPU types with company name.
         580-* \
         | a29k-* \
+       | aarch64-* | aarch64_be-* \
         | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
         | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
         | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
         | arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
         | avr-* | avr32-* \
+       | be32-* | be64-* \
         | bfin-* | bs2000-* \
         | c[123]* | c30-* | [cjt]90-* | c4x-* \
         | clipper-* | craynv-* | cydra-* \
@@ -365,12 +384,15 @@ case $basic_machine in
         | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
         | h8300-* | h8500-* \
         | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+       | hexagon-* \
         | i*86-* | i860-* | i960-* | ia64-* \
         | ip2k-* | iq2000-* \
+       | le32-* | le64-* \
         | lm32-* \
         | m32c-* | m32r-* | m32rle-* \
         | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
-       | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
+       | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
+       | microblaze-* | microblazeel-* \
         | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
         | mips16-* \
         | mips64-* | mips64el-* \
@@ -400,7 +422,7 @@ case $basic_machine in
         | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
         | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
         | pyramid-* \
-       | romp-* | rs6000-* | rx-* \
+       | rl78-* | romp-* | rs6000-* | rx-* \
         | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
         | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
         | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
@@ -408,10 +430,11 @@ case $basic_machine in
         | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
         | tahoe-* \
         | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
-       | tile-* | tilegx-* \
+       | tile*-* \
         | tron-* \
         | ubicom32-* \
-       | v850-* | v850e-* | vax-* \
+       | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+       | vax-* \
         | we32k-* \
         | x86-* | x86_64-* | xc16x-* | xps100-* \
         | xstormy16-* | xtensa*-* \
@@ -711,7 +734,6 @@ case $basic_machine in
         i370-ibm* | ibm*)
                 basic_machine=i370-ibm
                 ;;
-# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
         i*86v32)
                 basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
                 os=-sysv32
@@ -769,9 +791,13 @@ case $basic_machine in
                 basic_machine=ns32k-utek
                 os=-sysv
                 ;;
-       microblaze)
+       microblaze*)
                 basic_machine=microblaze-xilinx
                 ;;
+       mingw64)
+               basic_machine=x86_64-pc
+               os=-mingw64
+               ;;
         mingw32)
                 basic_machine=i386-pc
                 os=-mingw32
@@ -808,10 +834,18 @@ case $basic_machine in
         ms1-*)
                 basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
                 ;;
+       msys)
+               basic_machine=i386-pc
+               os=-msys
+               ;;
         mvs)
                 basic_machine=i370-ibm
                 os=-mvs
                 ;;
+       nacl)
+               basic_machine=le32-unknown
+               os=-nacl
+               ;;
         ncr3000)
                 basic_machine=i486-ncr
                 os=-sysv4
@@ -992,7 +1026,11 @@ case $basic_machine in
                 basic_machine=i586-unknown
                 os=-pw32
                 ;;
-       rdos)
+       rdos | rdos64)
+               basic_machine=x86_64-pc
+               os=-rdos
+               ;;
+       rdos32)
                 basic_machine=i386-pc
                 os=-rdos
                 ;;
@@ -1120,13 +1158,8 @@ case $basic_machine in
                 basic_machine=t90-cray
                 os=-unicos
                 ;;
-       # This must be matched before tile*.
-       tilegx*)
-               basic_machine=tilegx-unknown
-               os=-linux-gnu
-               ;;
         tile*)
-               basic_machine=tile-unknown
+               basic_machine=$basic_machine-unknown
                 os=-linux-gnu
                 ;;
         tx39)
@@ -1330,15 +1363,15 @@ case $os in
               | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
               | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
               | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
-             | -openbsd* | -solidbsd* \
+             | -bitrig* | -openbsd* | -solidbsd* \
               | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
               | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
               | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
               | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
               | -chorusos* | -chorusrdb* | -cegcc* \
-             | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-             | -mingw32* | -linux-gnu* | -linux-android* \
-             | -linux-newlib* | -linux-uclibc* \
+             | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+             | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+             | -linux-newlib* | -linux-musl* | -linux-uclibc* \
               | -uxpv* | -beos* | -mpeix* | -udk* \
               | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
               | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
@@ -1521,6 +1554,9 @@ case $basic_machine in
         c4x-* | tic4x-*)
                 os=-coff
                 ;;
+       hexagon-*)
+               os=-elf
+               ;;
         tic54x-*)
                 os=-coff
                 ;;
@@ -1548,9 +1584,6 @@ case $basic_machine in
                 ;;
         m68000-sun)
                 os=-sunos3
-               # This also exists in the configure program, but was not the
-               # default.
-               # os=-sunos4
                 ;;
         m68*-cisco)
                 os=-aout
diff --git a/configure b/configure

index e8378cca2007100776df0bdbc29499023265cd64..eaadd20b23387dedf8c8a71dfd6b9a1d4c7b98c0 100755 (executable)
--- a/configure
+++ b/configure
@@ -1,14 +1,14 @@
  #! /bin/sh
-# From configure.in Revision.
+# From configure.ac Revision.
  # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.65 for GNU MP 5.0.5.
+# Generated by GNU Autoconf 2.69 for GNU MP 5.1.3.
  #
  # Report bugs to <gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html>.
  #
  #
  #
  # Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+# 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -27,9 +27,7 @@
  #
  #
  #
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
  #
  #
  # This configure script is free software; the Free Software Foundation
@@ -113,6 +111,7 @@ fi
  IFS=" ""       $as_nl"
  
  # Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
  case $0 in #((
    *[\\/]* ) as_myself=$0 ;;
    *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -157,6 +156,31 @@ export LANGUAGE
  # CDPATH.
  (unset CDPATH) >/dev/null 2>&1 && unset CDPATH
  
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
  if test "x$CONFIG_SHELL" = x; then
    as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
    emulate sh
@@ -190,7 +214,8 @@ if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then :
  else
    exitcode=1; echo positional parameters were not saved.
  fi
-test x\$exitcode = x0 || exit 1"
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
    as_suggested="  as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
    as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
    eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
@@ -243,14 +268,25 @@ IFS=$as_save_IFS
  
  
        if test "x$CONFIG_SHELL" != x; then :
-  # We cannot yet assume a decent shell, so we have to provide a
-       # neutralization value for shells without unset; and this also
-       # works around shells that cannot unset nonexistent variables.
-       BASH_ENV=/dev/null
-       ENV=/dev/null
-       (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
-       export CONFIG_SHELL
-       exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
  fi
  
      if test x$as_have_required = xno; then :
@@ -350,10 +386,18 @@ $as_echo X"$as_dir" |
        test -d "$as_dir" && break
      done
      test -z "$as_dirs" || eval "mkdir $as_dirs"
-  } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
  
  
  } # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
  # as_fn_append VAR VALUE
  # ----------------------
  # Append the text in VALUE to the end of the definition contained in VAR. Take
@@ -390,19 +434,19 @@ else
  fi # as_fn_arith
  
  
-# as_fn_error ERROR [LINENO LOG_FD]
-# ---------------------------------
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
  # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
  # provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with status $?, using 1 if that was 0.
+# script with STATUS, using 1 if that was 0.
  as_fn_error ()
  {
-  as_status=$?; test $as_status -eq 0 && as_status=1
-  if test "$3"; then
-    as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-    $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
    fi
-  $as_echo "$as_me: error: $1" >&2
+  $as_echo "$as_me: error: $2" >&2
    as_fn_exit $as_status
  } # as_fn_error
  
@@ -475,6 +519,10 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits
    chmod +x "$as_me.lineno" ||
      { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
  
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
    # Don't try to exec as it changes $[0], causing all sort of problems
    # (the dirname of $[0] is not the place where we might find the
    # original and so on.  Autoconf is especially sensitive to this).
@@ -509,16 +557,16 @@ if (echo >conf$$.file) 2>/dev/null; then
      # ... but there are two gotchas:
      # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
      # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
-    # In both cases, we have to default to `cp -p'.
+    # In both cases, we have to default to `cp -pR'.
      ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
-      as_ln_s='cp -p'
+      as_ln_s='cp -pR'
    elif ln conf$$.file conf$$ 2>/dev/null; then
      as_ln_s=ln
    else
-    as_ln_s='cp -p'
+    as_ln_s='cp -pR'
    fi
  else
-  as_ln_s='cp -p'
+  as_ln_s='cp -pR'
  fi
  rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
  rmdir conf$$.dir 2>/dev/null
@@ -530,28 +578,8 @@ else
    as_mkdir_p=false
  fi
  
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
-else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-       test -d "$1/.";
-      else
-       case $1 in #(
-       -*)set "./$1";;
-       esac;
-       case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
-       ???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
-fi
-as_executable_p=$as_test_x
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
  
  # Sed expression to map a string onto a valid CPP name.
  as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -566,7 +594,7 @@ test -n "$DJDIR" || exec 7<&0 </dev/null
  exec 6>&1
  
  # Name of the host.
-# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
  # so uname gets run too.
  ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
  
@@ -585,8 +613,8 @@ MAKEFLAGS=
  # Identity of this package.
  PACKAGE_NAME='GNU MP'
  PACKAGE_TARNAME='gmp'
-PACKAGE_VERSION='5.0.5'
-PACKAGE_STRING='GNU MP 5.0.5'
+PACKAGE_VERSION='5.1.3'
+PACKAGE_STRING='GNU MP 5.1.3'
  PACKAGE_BUGREPORT='gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html'
  PACKAGE_URL='http://www.gnu.org/software/gmp/'
  
@@ -652,9 +680,9 @@ TUNE_SQR_OBJ
  gmp_srclinks
  mpn_objs_in_libgmp
  mpn_objects
-mpn_objs_in_libmp
  GMP_LIMB_BITS
  M4
+TUNE_LIBS
  TAL_OBJECT
  LIBM
  ENABLE_STATIC_FALSE
@@ -681,8 +709,6 @@ ac_ct_DUMPBIN
  DUMPBIN
  AR
  ASMFLAGS
-ANSI2KNR
-U
  EGREP
  GREP
  CXXCPP
@@ -715,8 +741,6 @@ HAVE_HOST_CPU_FAMILY_powerpc
  HAVE_HOST_CPU_FAMILY_power
  ABI
  GMP_NAIL_BITS
-WANT_MPBSD_FALSE
-WANT_MPBSD_TRUE
  MAINT
  MAINTAINER_MODE_FALSE
  MAINTAINER_MODE_TRUE
@@ -796,9 +820,9 @@ enable_maintainer_mode
  enable_assert
  enable_alloca
  enable_cxx
+enable_assembly
  enable_fft
  enable_old_fft_full
-enable_mpbsd
  enable_nails
  enable_profiling
  with_readline
@@ -893,8 +917,9 @@ do
    fi
  
    case $ac_option in
-  *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
-  *)   ac_optarg=yes ;;
+  *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *=)   ac_optarg= ;;
+  *)    ac_optarg=yes ;;
    esac
  
    # Accept the important Cygnus configure options, so we can diagnose typos.
@@ -939,7 +964,7 @@ do
      ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
      # Reject names that are not valid shell variable names.
      expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid feature name: $ac_useropt"
+      as_fn_error $? "invalid feature name: $ac_useropt"
      ac_useropt_orig=$ac_useropt
      ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
      case $ac_user_opts in
@@ -965,7 +990,7 @@ do
      ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
      # Reject names that are not valid shell variable names.
      expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid feature name: $ac_useropt"
+      as_fn_error $? "invalid feature name: $ac_useropt"
      ac_useropt_orig=$ac_useropt
      ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
      case $ac_user_opts in
@@ -1169,7 +1194,7 @@ do
      ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
      # Reject names that are not valid shell variable names.
      expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid package name: $ac_useropt"
+      as_fn_error $? "invalid package name: $ac_useropt"
      ac_useropt_orig=$ac_useropt
      ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
      case $ac_user_opts in
@@ -1185,7 +1210,7 @@ do
      ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
      # Reject names that are not valid shell variable names.
      expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid package name: $ac_useropt"
+      as_fn_error $? "invalid package name: $ac_useropt"
      ac_useropt_orig=$ac_useropt
      ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
      case $ac_user_opts in
@@ -1215,8 +1240,8 @@ do
    | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
      x_libraries=$ac_optarg ;;
  
-  -*) as_fn_error "unrecognized option: \`$ac_option'
-Try \`$0 --help' for more information."
+  -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
      ;;
  
    *=*)
@@ -1224,7 +1249,7 @@ Try \`$0 --help' for more information."
      # Reject names that are not valid shell variable names.
      case $ac_envvar in #(
        '' | [0-9]* | *[!_$as_cr_alnum]* )
-      as_fn_error "invalid variable name: \`$ac_envvar'" ;;
+      as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
      esac
      eval $ac_envvar=\$ac_optarg
      export $ac_envvar ;;
@@ -1234,7 +1259,7 @@ Try \`$0 --help' for more information."
      $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
      expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
        $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
-    : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+    : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
      ;;
  
    esac
@@ -1242,13 +1267,13 @@ done
  
  if test -n "$ac_prev"; then
    ac_option=--`echo $ac_prev | sed 's/_/-/g'`
-  as_fn_error "missing argument to $ac_option"
+  as_fn_error $? "missing argument to $ac_option"
  fi
  
  if test -n "$ac_unrecognized_opts"; then
    case $enable_option_checking in
      no) ;;
-    fatal) as_fn_error "unrecognized options: $ac_unrecognized_opts" ;;
+    fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
      *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
    esac
  fi
@@ -1271,7 +1296,7 @@ do
      [\\/$]* | ?:[\\/]* )  continue;;
      NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
    esac
-  as_fn_error "expected an absolute directory name for --$ac_var: $ac_val"
+  as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
  done
  
  # There might be people who depend on the old broken behavior: `$host'
@@ -1285,8 +1310,6 @@ target=$target_alias
  if test "x$host_alias" != x; then
    if test "x$build_alias" = x; then
      cross_compiling=maybe
-    $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
-    If a cross compiler is detected then cross compile mode will be used." >&2
    elif test "x$build_alias" != "x$host_alias"; then
      cross_compiling=yes
    fi
@@ -1301,9 +1324,9 @@ test "$silent" = yes && exec 6>/dev/null
  ac_pwd=`pwd` && test -n "$ac_pwd" &&
  ac_ls_di=`ls -di .` &&
  ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
-  as_fn_error "working directory cannot be determined"
+  as_fn_error $? "working directory cannot be determined"
  test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
-  as_fn_error "pwd does not report name of working directory"
+  as_fn_error $? "pwd does not report name of working directory"
  
  
  # Find the source files, if location was not specified.
@@ -1342,11 +1365,11 @@ else
  fi
  if test ! -r "$srcdir/$ac_unique_file"; then
    test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
-  as_fn_error "cannot find sources ($ac_unique_file) in $srcdir"
+  as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
  fi
  ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
  ac_abs_confdir=`(
-       cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error "$ac_msg"
+       cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
         pwd)`
  # When building in place, set srcdir=.
  if test "$ac_abs_confdir" = "$ac_pwd"; then
@@ -1372,7 +1395,7 @@ if test "$ac_init_help" = "long"; then
    # Omit some internal or obsolete options to make the list less imposing.
    # This message is too long to be a string in the A/UX 3.1 sh.
    cat <<_ACEOF
-\`configure' configures GNU MP 5.0.5 to adapt to many kinds of systems.
+\`configure' configures GNU MP 5.1.3 to adapt to many kinds of systems.
  
  Usage: $0 [OPTION]... [VAR=VALUE]...
  
@@ -1386,7 +1409,7 @@ Configuration:
        --help=short        display options specific to this package
        --help=recursive    display the short help of all the included packages
    -V, --version           display version information and exit
-  -q, --quiet, --silent   do not print \`checking...' messages
+  -q, --quiet, --silent   do not print \`checking ...' messages
        --cache-file=FILE   cache test results in FILE [disabled]
    -C, --config-cache      alias for \`--cache-file=config.cache'
    -n, --no-create         do not create output files
@@ -1442,7 +1465,7 @@ fi
  
  if test -n "$ac_init_help"; then
    case $ac_init_help in
-     short | recursive ) echo "Configuration of GNU MP 5.0.5:";;
+     short | recursive ) echo "Configuration of GNU MP 5.1.3:";;
     esac
    cat <<\_ACEOF
  
@@ -1455,11 +1478,10 @@ Optional Features:
    --enable-assert         enable ASSERT checking [[default=no]]
    --enable-alloca         how to get temp memory [[default=reentrant]]
    --enable-cxx            enable C++ support [[default=no]]
+  --enable-assembly       enable the use of assembly loops [[default=yes]]
    --enable-fft            enable FFTs for multiplication [[default=yes]]
    --enable-old-fft-full   enable old mpn_mul_fft_full for multiplication
                            [[default=no]]
-  --enable-mpbsd          build Berkeley MP compatibility library
-                          [[default=no]]
    --enable-nails          use nails on limbs [[default=no]]
    --enable-profiling      build with profiler support [[default=no]]
    --enable-fat            build a fat binary on systems that support it
@@ -1500,8 +1522,9 @@ Some influential environment variables:
    CXXFLAGS    C++ compiler flags
    CXXCPP      C++ preprocessor
    M4          m4 macro processor
-  YACC        The `Yet Another C Compiler' implementation to use. Defaults to
-              the first program found out of: `bison -y', `byacc', `yacc'.
+  YACC        The `Yet Another Compiler Compiler' implementation to use.
+              Defaults to the first program found out of: `bison -y', `byacc',
+              `yacc'.
    YFLAGS      The list of arguments that will be passed by default to $YACC.
                This script will default YFLAGS to the empty string to avoid a
                default value of `-d' given by some make applications.
@@ -1574,17 +1597,17 @@ fi
  test -n "$ac_init_help" && exit $ac_status
  if $ac_init_version; then
    cat <<\_ACEOF
-GNU MP configure 5.0.5
-generated by GNU Autoconf 2.65
+GNU MP configure 5.1.3
+generated by GNU Autoconf 2.69
  
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
  This configure script is free software; the Free Software Foundation
  gives unlimited permission to copy, distribute and modify it.
  
  
  
  Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -1642,7 +1665,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
  
         ac_retval=1
  fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    as_fn_set_status $ac_retval
  
  } # ac_fn_c_try_compile
@@ -1668,7 +1691,7 @@ $as_echo "$ac_try_echo"; } >&5
      mv -f conftest.er1 conftest.err
    fi
    $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; } >/dev/null && {
+  test $ac_status = 0; } > conftest.i && {
          test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
          test ! -s conftest.err
         }; then :
@@ -1679,7 +1702,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
  
      ac_retval=1
  fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    as_fn_set_status $ac_retval
  
  } # ac_fn_c_try_cpp
@@ -1717,7 +1740,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
  
         ac_retval=1
  fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    as_fn_set_status $ac_retval
  
  } # ac_fn_cxx_try_compile
@@ -1743,7 +1766,7 @@ $as_echo "$ac_try_echo"; } >&5
      mv -f conftest.er1 conftest.err
    fi
    $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; } >/dev/null && {
+  test $ac_status = 0; } > conftest.i && {
          test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" ||
          test ! -s conftest.err
         }; then :
@@ -1754,18 +1777,18 @@ sed 's/^/| /' conftest.$ac_ext >&5
  
      ac_retval=1
  fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    as_fn_set_status $ac_retval
  
  } # ac_fn_cxx_try_cpp
  
-# ac_fn_c_try_run LINENO
-# ----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
-# that executables *can* be run.
-ac_fn_c_try_run ()
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
  {
    as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest$ac_exeext
    if { { ac_try="$ac_link"
  case "(($ac_try" in
    *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
@@ -1773,126 +1796,37 @@ case "(($ac_try" in
  esac
  eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
  $as_echo "$ac_try_echo"; } >&5
-  (eval "$ac_link") 2>&5
-  ac_status=$?
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
-  { { case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
-  (eval "$ac_try") 2>&5
+  (eval "$ac_link") 2>conftest.err
    ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
    $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }; then :
+  test $ac_status = 0; } && {
+        test -z "$ac_c_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+        test "$cross_compiling" = yes ||
+        test -x conftest$ac_exeext
+       }; then :
    ac_retval=0
  else
-  $as_echo "$as_me: program exited with status $ac_status" >&5
-       $as_echo "$as_me: failed program was:" >&5
+  $as_echo "$as_me: failed program was:" >&5
  sed 's/^/| /' conftest.$ac_ext >&5
  
-       ac_retval=$ac_status
+       ac_retval=1
  fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
    rm -rf conftest.dSYM conftest_ipa8_conftest.oo
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    as_fn_set_status $ac_retval
  
-} # ac_fn_c_try_run
-
-# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
-# -------------------------------------------------------
-# Tests whether HEADER exists, giving a warning if it cannot be compiled using
-# the include files in INCLUDES and setting the cache variable VAR
-# accordingly.
-ac_fn_c_check_header_mongrel ()
-{
-  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
-  $as_echo_n "(cached) " >&6
-fi
-eval ac_res=\$$3
-              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-else
-  # Is the header compilable?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
-$as_echo_n "checking $2 usability... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  ac_header_compiler=yes
-else
-  ac_header_compiler=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
-$as_echo "$ac_header_compiler" >&6; }
-
-# Is the header present?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
-$as_echo_n "checking $2 presence... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <$2>
-_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
-  ac_header_preproc=yes
-else
-  ac_header_preproc=no
-fi
-rm -f conftest.err conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
-$as_echo "$ac_header_preproc" >&6; }
-
-# So?  What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
-  yes:no: )
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
-$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-    ;;
-  no:yes:* )
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
-$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
-$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
-$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
-$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-( cat <<\_ASBOX
-## ssssssssssssssssssssssssssssssssss ##
-## Report this to gmp-bugs@gmplib.org ##
-## ssssssssssssssssssssssssssssssssss ##
-_ASBOX
-     ) | sed "s/^/$as_me: WARNING:     /" >&2
-    ;;
-esac
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
-  $as_echo_n "(cached) " >&6
-else
-  eval "$3=\$ac_header_compiler"
-fi
-eval ac_res=\$$3
-              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-
-} # ac_fn_c_check_header_mongrel
+} # ac_fn_c_try_link
  
  # ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
  # -------------------------------------------------------
@@ -1903,7 +1837,7 @@ ac_fn_c_check_header_compile ()
    as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
  $as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -1921,17 +1855,17 @@ fi
  eval ac_res=\$$3
                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
  $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
  
  } # ac_fn_c_check_header_compile
  
-# ac_fn_c_try_link LINENO
-# -----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_link ()
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
  {
    as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  rm -f conftest.$ac_objext conftest$ac_exeext
    if { { ac_try="$ac_link"
  case "(($ac_try" in
    *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
@@ -1939,37 +1873,33 @@ case "(($ac_try" in
  esac
  eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
  $as_echo "$ac_try_echo"; } >&5
-  (eval "$ac_link") 2>conftest.err
+  (eval "$ac_link") 2>&5
    ac_status=$?
-  if test -s conftest.err; then
-    grep -v '^ *+' conftest.err >conftest.er1
-    cat conftest.er1 >&5
-    mv -f conftest.er1 conftest.err
-  fi
    $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; } && {
-        test -z "$ac_c_werror_flag" ||
-        test ! -s conftest.err
-       } && test -s conftest$ac_exeext && {
-        test "$cross_compiling" = yes ||
-        $as_test_x conftest$ac_exeext
-       }; then :
+  test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then :
    ac_retval=0
  else
-  $as_echo "$as_me: failed program was:" >&5
+  $as_echo "$as_me: program exited with status $ac_status" >&5
+       $as_echo "$as_me: failed program was:" >&5
  sed 's/^/| /' conftest.$ac_ext >&5
  
-       ac_retval=1
+       ac_retval=$ac_status
  fi
-  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
-  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
-  # interfere with the next link command; also delete a directory that is
-  # left behind by Apple's compiler.  We do this before executing the actions.
    rm -rf conftest.dSYM conftest_ipa8_conftest.oo
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    as_fn_set_status $ac_retval
  
-} # ac_fn_c_try_link
+} # ac_fn_c_try_run
  
  # ac_fn_c_check_func LINENO FUNC VAR
  # ----------------------------------
@@ -1979,7 +1909,7 @@ ac_fn_c_check_func ()
    as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
  $as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -2034,7 +1964,7 @@ fi
  eval ac_res=\$$3
                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
  $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
  
  } # ac_fn_c_check_func
  
@@ -2065,7 +1995,7 @@ $as_echo "$ac_try_echo"; } >&5
          test ! -s conftest.err
         } && test -s conftest$ac_exeext && {
          test "$cross_compiling" = yes ||
-        $as_test_x conftest$ac_exeext
+        test -x conftest$ac_exeext
         }; then :
    ac_retval=0
  else
@@ -2079,7 +2009,7 @@ fi
    # interfere with the next link command; also delete a directory that is
    # left behind by Apple's compiler.  We do this before executing the actions.
    rm -rf conftest.dSYM conftest_ipa8_conftest.oo
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    as_fn_set_status $ac_retval
  
  } # ac_fn_cxx_try_link
@@ -2111,7 +2041,7 @@ $as_echo "$ac_try_echo"; } >&5
          test ! -s conftest.err
         } && test -s conftest$ac_exeext && {
          test "$cross_compiling" = yes ||
-        $as_test_x conftest$ac_exeext
+        test -x conftest$ac_exeext
         }; then :
    ac_retval=0
  else
@@ -2125,20 +2055,114 @@ fi
    # interfere with the next link command; also delete a directory that is
    # left behind by Apple's compiler.  We do this before executing the actions.
    rm -rf conftest.dSYM conftest_ipa8_conftest.oo
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    as_fn_set_status $ac_retval
  
  } # ac_fn_f77_try_link
  
-# ac_fn_c_check_decl LINENO SYMBOL VAR
-# ------------------------------------
-# Tests whether SYMBOL is declared, setting cache variable VAR accordingly.
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if eval \${$3+:} false; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_header_compiler=yes
+else
+  ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  ac_header_preproc=yes
+else
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+  yes:no: )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+  no:yes:* )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+( $as_echo "## ssssssssssssssssssssssssssssssssss ##
+## Report this to gmp-bugs@gmplib.org ##
+## ssssssssssssssssssssssssssssssssss ##"
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_mongrel
+
+# ac_fn_c_check_decl LINENO SYMBOL VAR INCLUDES
+# ---------------------------------------------
+# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR
+# accordingly.
  ac_fn_c_check_decl ()
  {
    as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $2 is declared" >&5
-$as_echo_n "checking whether $2 is declared... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  as_decl_name=`echo $2|sed 's/ *(.*//'`
+  as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'`
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5
+$as_echo_n "checking whether $as_decl_name is declared... " >&6; }
+if eval \${$3+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -2147,8 +2171,12 @@ $4
  int
  main ()
  {
-#ifndef $2
-  (void) $2;
+#ifndef $as_decl_name
+#ifdef __cplusplus
+  (void) $as_decl_use;
+#else
+  (void) $as_decl_name;
+#endif
  #endif
  
    ;
@@ -2165,7 +2193,7 @@ fi
  eval ac_res=\$$3
                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
  $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
  
  } # ac_fn_c_check_decl
  
@@ -2178,7 +2206,7 @@ ac_fn_c_check_type ()
    as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
  $as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    eval "$3=no"
@@ -2219,7 +2247,7 @@ fi
  eval ac_res=\$$3
                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
  $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
  
  } # ac_fn_c_check_type
  
@@ -2232,7 +2260,7 @@ ac_fn_c_check_member ()
    as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5
  $as_echo_n "checking for $2.$3... " >&6; }
-if { as_var=$4; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$4+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -2276,7 +2304,7 @@ fi
  eval ac_res=\$$4
                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
  $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
  
  } # ac_fn_c_check_member
  
@@ -2288,10 +2316,10 @@ $as_echo "$ac_res" >&6; }
  ac_fn_cxx_check_header_mongrel ()
  {
    as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+  if eval \${$3+:} false; then :
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
  $as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
    $as_echo_n "(cached) " >&6
  fi
  eval ac_res=\$$3
@@ -2327,7 +2355,7 @@ if ac_fn_cxx_try_cpp "$LINENO"; then :
  else
    ac_header_preproc=no
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
  $as_echo "$ac_header_preproc" >&6; }
  
@@ -2350,17 +2378,15 @@ $as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
  $as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
  $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-( cat <<\_ASBOX
-## ssssssssssssssssssssssssssssssssss ##
+( $as_echo "## ssssssssssssssssssssssssssssssssss ##
  ## Report this to gmp-bugs@gmplib.org ##
-## ssssssssssssssssssssssssssssssssss ##
-_ASBOX
+## ssssssssssssssssssssssssssssssssss ##"
       ) | sed "s/^/$as_me: WARNING:     /" >&2
      ;;
  esac
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
  $as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    eval "$3=\$ac_header_compiler"
@@ -2369,7 +2395,7 @@ eval ac_res=\$$3
                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
  $as_echo "$ac_res" >&6; }
  fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
  
  } # ac_fn_cxx_check_header_mongrel
  
@@ -2382,7 +2408,7 @@ ac_fn_cxx_check_type ()
    as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
  $as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    eval "$3=no"
@@ -2423,7 +2449,7 @@ fi
  eval ac_res=\$$3
                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
  $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
  
  } # ac_fn_cxx_check_type
  
@@ -2444,7 +2470,8 @@ int
  main ()
  {
  static int test_array [1 - 2 * !(($2) >= 0)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
  
    ;
    return 0;
@@ -2460,7 +2487,8 @@ int
  main ()
  {
  static int test_array [1 - 2 * !(($2) <= $ac_mid)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
  
    ;
    return 0;
@@ -2486,7 +2514,8 @@ int
  main ()
  {
  static int test_array [1 - 2 * !(($2) < 0)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
  
    ;
    return 0;
@@ -2502,7 +2531,8 @@ int
  main ()
  {
  static int test_array [1 - 2 * !(($2) >= $ac_mid)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
  
    ;
    return 0;
@@ -2536,7 +2566,8 @@ int
  main ()
  {
  static int test_array [1 - 2 * !(($2) <= $ac_mid)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
  
    ;
    return 0;
@@ -2600,7 +2631,7 @@ rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
  rm -f conftest.val
  
    fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
    as_fn_set_status $ac_retval
  
  } # ac_fn_c_compute_int
@@ -2608,8 +2639,8 @@ cat >config.log <<_ACEOF
  This file contains any messages produced by compilers while
  running configure, to aid debugging if configure makes a mistake.
  
-It was created by GNU MP $as_me 5.0.5, which was
-generated by GNU Autoconf 2.65.  Invocation command line was
+It was created by GNU MP $as_me 5.1.3, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
  
    $ $0 $@
  
@@ -2719,11 +2750,9 @@ trap 'exit_status=$?
    {
      echo
  
-    cat <<\_ASBOX
-## ---------------- ##
+    $as_echo "## ---------------- ##
  ## Cache variables. ##
-## ---------------- ##
-_ASBOX
+## ---------------- ##"
      echo
      # The following way of writing the cache mishandles newlines in values,
  (
@@ -2757,11 +2786,9 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
  )
      echo
  
-    cat <<\_ASBOX
-## ----------------- ##
+    $as_echo "## ----------------- ##
  ## Output variables. ##
-## ----------------- ##
-_ASBOX
+## ----------------- ##"
      echo
      for ac_var in $ac_subst_vars
      do
@@ -2774,11 +2801,9 @@ _ASBOX
      echo
  
      if test -n "$ac_subst_files"; then
-      cat <<\_ASBOX
-## ------------------- ##
+      $as_echo "## ------------------- ##
  ## File substitutions. ##
-## ------------------- ##
-_ASBOX
+## ------------------- ##"
        echo
        for ac_var in $ac_subst_files
        do
@@ -2792,11 +2817,9 @@ _ASBOX
      fi
  
      if test -s confdefs.h; then
-      cat <<\_ASBOX
-## ----------- ##
+      $as_echo "## ----------- ##
  ## confdefs.h. ##
-## ----------- ##
-_ASBOX
+## ----------- ##"
        echo
        cat confdefs.h
        echo
@@ -2851,7 +2874,12 @@ _ACEOF
  ac_site_file1=NONE
  ac_site_file2=NONE
  if test -n "$CONFIG_SITE"; then
-  ac_site_file1=$CONFIG_SITE
+  # We do not want a PATH search for config.site.
+  case $CONFIG_SITE in #((
+    -*)  ac_site_file1=./$CONFIG_SITE;;
+    */*) ac_site_file1=$CONFIG_SITE;;
+    *)   ac_site_file1=./$CONFIG_SITE;;
+  esac
  elif test "x$prefix" != xNONE; then
    ac_site_file1=$prefix/share/config.site
    ac_site_file2=$prefix/etc/config.site
@@ -2866,7 +2894,11 @@ do
      { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
  $as_echo "$as_me: loading site script $ac_site_file" >&6;}
      sed 's/^/| /' "$ac_site_file" >&5
-    . "$ac_site_file"
+    . "$ac_site_file" \
+      || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
    fi
  done
  
@@ -2942,7 +2974,7 @@ if $ac_cache_corrupted; then
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
    { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
  $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
-  as_fn_error "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+  as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
  fi
  ## -------------------- ##
  ## Main body of script. ##
@@ -2963,13 +2995,14 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
  
  
  
+
  # If --target is not used then $target_alias is empty, but if say
  # "./configure athlon-pc-freebsd3.5" is used, then all three of
  # $build_alias, $host_alias and $target_alias are set to
  # "athlon-pc-freebsd3.5".
  #
  if test -n "$target_alias" && test "$target_alias" != "$host_alias"; then
-  as_fn_error "--target is not appropriate for GMP
+  as_fn_error $? "--target is not appropriate for GMP
  Use --build=CPU-VENDOR-OS if you need to specify your CPU and/or system
  explicitly.  Use --host if cross-compiling (see \"Installing GMP\" in the
  manual for more on this)." "$LINENO" 5
@@ -2996,16 +3029,22 @@ echo "include(CONFIG_TOP_SRCDIR\`/mpn/asm-defs.m4')" >>$gmp_tmpconfigm4i
  
  ac_aux_dir=
  for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
-  for ac_t in install-sh install.sh shtool; do
-    if test -f "$ac_dir/$ac_t"; then
-      ac_aux_dir=$ac_dir
-      ac_install_sh="$ac_aux_dir/$ac_t -c"
-      break 2
-    fi
-  done
+  if test -f "$ac_dir/install-sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f "$ac_dir/install.sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  elif test -f "$ac_dir/shtool"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/shtool install -c"
+    break
+  fi
  done
  if test -z "$ac_aux_dir"; then
-  as_fn_error "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+  as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
  fi
  
  # These three variables are undocumented and unsupported,
@@ -3019,27 +3058,27 @@ ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
  
  # Make sure we can run config.sub.
  $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
-  as_fn_error "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
+  as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
  $as_echo_n "checking build system type... " >&6; }
-if test "${ac_cv_build+set}" = set; then :
+if ${ac_cv_build+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_build_alias=$build_alias
  test "x$ac_build_alias" = x &&
    ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
  test "x$ac_build_alias" = x &&
-  as_fn_error "cannot guess build type; you must specify one" "$LINENO" 5
+  as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5
  ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
-  as_fn_error "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
+  as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
  
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
  $as_echo "$ac_cv_build" >&6; }
  case $ac_cv_build in
  *-*-*) ;;
-*) as_fn_error "invalid value of canonical build" "$LINENO" 5;;
+*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;;
  esac
  build=$ac_cv_build
  ac_save_IFS=$IFS; IFS='-'
@@ -3057,14 +3096,14 @@ case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
  $as_echo_n "checking host system type... " >&6; }
-if test "${ac_cv_host+set}" = set; then :
+if ${ac_cv_host+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test "x$host_alias" = x; then
    ac_cv_host=$ac_cv_build
  else
    ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
-    as_fn_error "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
+    as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
  fi
  
  fi
@@ -3072,7 +3111,7 @@ fi
  $as_echo "$ac_cv_host" >&6; }
  case $ac_cv_host in
  *-*-*) ;;
-*) as_fn_error "invalid value of canonical host" "$LINENO" 5;;
+*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;;
  esac
  host=$ac_cv_host
  ac_save_IFS=$IFS; IFS='-'
@@ -3108,7 +3147,7 @@ am__api_version='1.11'
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
  $as_echo_n "checking for a BSD-compatible install... " >&6; }
  if test -z "$INSTALL"; then
-if test "${ac_cv_path_install+set}" = set; then :
+if ${ac_cv_path_install+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -3128,7 +3167,7 @@ case $as_dir/ in #((
      # by default.
      for ac_prog in ginstall scoinst install; do
        for ac_exec_ext in '' $ac_executable_extensions; do
-       if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then
+       if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
           if test $ac_prog = install &&
             grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
             # AIX install.  It has an incompatible calling convention.
@@ -3195,11 +3234,11 @@ am_lf='
  '
  case `pwd` in
    *[\\\"\#\$\&\'\`$am_lf]*)
-    as_fn_error "unsafe absolute working directory name" "$LINENO" 5;;
+    as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;;
  esac
  case $srcdir in
    *[\\\"\#\$\&\'\`$am_lf\ \    ]*)
-    as_fn_error "unsafe srcdir value: \`$srcdir'" "$LINENO" 5;;
+    as_fn_error $? "unsafe srcdir value: \`$srcdir'" "$LINENO" 5;;
  esac
  
  # Do `set' in a subshell so we don't clobber the current shell's
@@ -3221,7 +3260,7 @@ if (
        # if, for instance, CONFIG_SHELL is bash and it inherits a
        # broken ls alias from the environment.  This has actually
        # happened.  Such a system could not be considered "sane".
-      as_fn_error "ls -t appears to fail.  Make sure there is not a broken
+      as_fn_error $? "ls -t appears to fail.  Make sure there is not a broken
  alias in your environment" "$LINENO" 5
     fi
  
@@ -3231,7 +3270,7 @@ then
     # Ok.
     :
  else
-   as_fn_error "newly created file is older than distributed files!
+   as_fn_error $? "newly created file is older than distributed files!
  Check your system clock" "$LINENO" 5
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
@@ -3285,7 +3324,7 @@ if test "$cross_compiling" != no; then
  set dummy ${ac_tool_prefix}strip; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP+set}" = set; then :
+if ${ac_cv_prog_STRIP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$STRIP"; then
@@ -3297,7 +3336,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_STRIP="${ac_tool_prefix}strip"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -3325,7 +3364,7 @@ if test -z "$ac_cv_prog_STRIP"; then
  set dummy strip; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then :
+if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_STRIP"; then
@@ -3337,7 +3376,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_STRIP="strip"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -3378,7 +3417,7 @@ INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5
  $as_echo_n "checking for a thread-safe mkdir -p... " >&6; }
  if test -z "$MKDIR_P"; then
-  if test "${ac_cv_path_mkdir+set}" = set; then :
+  if ${ac_cv_path_mkdir+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -3388,7 +3427,7 @@ do
    test -z "$as_dir" && as_dir=.
      for ac_prog in mkdir gmkdir; do
          for ac_exec_ext in '' $ac_executable_extensions; do
-          { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; } || continue
+          as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue
            case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
              'mkdir (GNU coreutils) '* | \
              'mkdir (coreutils) '* | \
@@ -3429,7 +3468,7 @@ do
  set dummy $ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AWK+set}" = set; then :
+if ${ac_cv_prog_AWK+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$AWK"; then
@@ -3441,7 +3480,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_AWK="$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -3469,7 +3508,7 @@ done
  $as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
  set x ${MAKE-make}
  ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
-if { as_var=ac_cv_prog_make_${ac_make}_set; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.make <<\_ACEOF
@@ -3477,7 +3516,7 @@ SHELL = /bin/sh
  all:
         @echo '@@@%%%=$(MAKE)=@@@%%%'
  _ACEOF
-# GNU make sometimes prints "make[1]: Entering...", which would confuse us.
+# GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
  case `${MAKE-make} -f conftest.make 2>/dev/null` in
    *@@@%%%=?*=@@@%%%*)
      eval ac_cv_prog_make_${ac_make}_set=yes;;
@@ -3511,7 +3550,7 @@ if test "`cd $srcdir && pwd`" != "`pwd`"; then
    am__isrc=' -I$(srcdir)'
    # test to see if srcdir already configured
    if test -f $srcdir/config.status; then
-    as_fn_error "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
+    as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
    fi
  fi
  
@@ -3527,7 +3566,7 @@ fi
  
  # Define the identity of the package.
   PACKAGE='gmp'
- VERSION='5.0.5'
+ VERSION='5.1.3'
  
  
  cat >>confdefs.h <<_ACEOF
@@ -3557,11 +3596,11 @@ MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
  
  # We need awk for the "check" target.  The system "awk" is bad on
  # some platforms.
-# Always define AMTAR for backward compatibility.
-
-AMTAR=${AMTAR-"${am_missing_run}tar"}
+# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
  
-am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
  
  
  
@@ -3598,7 +3637,7 @@ fi
  if test "${enable_assert+set}" = set; then :
    enableval=$enable_assert; case $enableval in
  yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-assert, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-assert, need yes or no" "$LINENO" 5 ;;
  esac
  else
    enable_assert=no
@@ -3625,7 +3664,7 @@ alloca|malloc-reentrant|malloc-notreentrant) ;;
  yes|no|reentrant|notreentrant) ;;
  debug) ;;
  *)
-  as_fn_error "bad value $enableval for --enable-alloca, need one of:
+  as_fn_error $? "bad value $enableval for --enable-alloca, need one of:
  yes no reentrant notreentrant alloca malloc-reentrant malloc-notreentrant debug" "$LINENO" 5 ;;
  esac
  else
@@ -3650,7 +3689,7 @@ fi
  if test "${enable_cxx+set}" = set; then :
    enableval=$enable_cxx; case $enableval in
  yes|no|detect) ;;
-*) as_fn_error "bad value $enableval for --enable-cxx, need yes/no/detect" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-cxx, need yes/no/detect" "$LINENO" 5 ;;
  esac
  else
    enable_cxx=no
@@ -3658,70 +3697,68 @@ fi
  
  
  
-# Check whether --enable-fft was given.
-if test "${enable_fft+set}" = set; then :
-  enableval=$enable_fft; case $enableval in
+# Check whether --enable-assembly was given.
+if test "${enable_assembly+set}" = set; then :
+  enableval=$enable_assembly; case $enableval in
  yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-fft, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-assembly, need yes or no" "$LINENO" 5 ;;
  esac
  else
-  enable_fft=yes
+  enable_assembly=yes
  fi
  
  
-if test "$enable_fft" = "yes"; then
+if test "$enable_assembly" = "yes"; then
  
-$as_echo "#define WANT_FFT 1" >>confdefs.h
+$as_echo "#define WANT_ASSEMBLY 1" >>confdefs.h
  
  fi
  
  
-# Check whether --enable-old-fft-full was given.
-if test "${enable_old_fft_full+set}" = set; then :
-  enableval=$enable_old_fft_full; case $enableval in
+# Check whether --enable-fft was given.
+if test "${enable_fft+set}" = set; then :
+  enableval=$enable_fft; case $enableval in
  yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-old-fft-full, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-fft, need yes or no" "$LINENO" 5 ;;
  esac
  else
-  enable_old_fft_full=no
+  enable_fft=yes
  fi
  
  
-if test "$enable_old_fft_full" = "yes"; then
+if test "$enable_fft" = "yes"; then
  
-$as_echo "#define WANT_OLD_FFT_FULL 1" >>confdefs.h
+$as_echo "#define WANT_FFT 1" >>confdefs.h
  
  fi
  
  
-# Check whether --enable-mpbsd was given.
-if test "${enable_mpbsd+set}" = set; then :
-  enableval=$enable_mpbsd; case $enableval in
+# Check whether --enable-old-fft-full was given.
+if test "${enable_old_fft_full+set}" = set; then :
+  enableval=$enable_old_fft_full; case $enableval in
  yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-mpbsd, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-old-fft-full, need yes or no" "$LINENO" 5 ;;
  esac
  else
-  enable_mpbsd=no
-fi
-
- if test "$enable_mpbsd" = "yes"; then
-  WANT_MPBSD_TRUE=
-  WANT_MPBSD_FALSE='#'
-else
-  WANT_MPBSD_TRUE='#'
-  WANT_MPBSD_FALSE=
+  enable_old_fft_full=no
  fi
  
  
+if test "$enable_old_fft_full" = "yes"; then
+
+$as_echo "#define WANT_OLD_FFT_FULL 1" >>confdefs.h
+
+fi
+
  
  # Check whether --enable-nails was given.
  if test "${enable_nails+set}" = set; then :
    enableval=$enable_nails; case $enableval in
  yes|no|[02468]|[0-9][02468]) ;;
  *[13579])
-  as_fn_error "bad value $enableval for --enable-nails, only even nail sizes supported" "$LINENO" 5 ;;
+  as_fn_error $? "bad value $enableval for --enable-nails, only even nail sizes supported" "$LINENO" 5 ;;
  *)
-  as_fn_error "bad value $enableval for --enable-nails, need yes/no/number" "$LINENO" 5 ;;
+  as_fn_error $? "bad value $enableval for --enable-nails, need yes/no/number" "$LINENO" 5 ;;
  esac
  else
    enable_nails=no
@@ -3740,7 +3777,7 @@ esac
  if test "${enable_profiling+set}" = set; then :
    enableval=$enable_profiling; case $enableval in
  no|prof|gprof|instrument) ;;
-*) as_fn_error "bad value $enableval for --enable-profiling, need no/prof/gprof/instrument" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-profiling, need no/prof/gprof/instrument" "$LINENO" 5 ;;
  esac
  else
    enable_profiling=no
@@ -3782,7 +3819,7 @@ fi
  if test "${with_readline+set}" = set; then :
    withval=$with_readline; case $withval in
  yes|no|detect) ;;
-*) as_fn_error "bad value $withval for --with-readline, need yes/no/detect" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $withval for --with-readline, need yes/no/detect" "$LINENO" 5 ;;
  esac
  else
    with_readline=detect
@@ -3794,7 +3831,7 @@ fi
  if test "${enable_fat+set}" = set; then :
    enableval=$enable_fat; case $enableval in
  yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-fat, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-fat, need yes or no" "$LINENO" 5 ;;
  esac
  else
    enable_fat=no
@@ -3806,7 +3843,7 @@ fi
  if test "${enable_minithres+set}" = set; then :
    enableval=$enable_minithres; case $enableval in
  yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-minithres, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-minithres, need yes or no" "$LINENO" 5 ;;
  esac
  else
    enable_minithres=no
@@ -3814,6 +3851,10 @@ fi
  
  
  
+if test $enable_fat = yes && test $enable_assembly = no ; then
+  as_fn_error $? "when doing a fat build, disabling assembly will not work" "$LINENO" 5
+fi
+
  
  tmp_host=`echo $host_cpu | sed 's/\./_/'`
  cat >>confdefs.h <<_ACEOF
@@ -4102,12 +4143,50 @@ echo "include_mpn(\`alpha/default.m4')" >> $gmp_tmpconfigm4i
  
  
    arm*-*-*)
-    path="arm"
      gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_cflags_optlist="arch tune"
+    gcc_cflags_maybe="-marm"
      gcc_testlist="gcc-arm-umodsi"
  
  echo "include_mpn(\`arm/arm-defs.m4')" >> $gmp_tmpconfigm4i
  
+    CALLING_CONVENTIONS_OBJS='arm32call.lo arm32check.lo'
+
+    case $host_cpu in
+      armsa1 | armv4*)
+       path="arm"
+       gcc_cflags_arch="-march=armv4"
+       ;;
+      armxscale | arm926 | arm946 | arm966 | arm1026 | armv5*)
+       path="arm/v5 arm"
+       gcc_cflags_arch="-march=armv5"
+       ;;
+      arm11mpcore | arm1136 | arm1176 | armv6*)
+       path="arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv6"
+       ;;
+      arm1156)
+       path="arm/v6t2 arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv6t2"
+       ;;
+      armcortexa9)
+       path="arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv7-a"
+       gcc_cflags_tune="-mtune=cortex-a9"
+       ;;
+      armcortexa15)
+       path="arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv7-a"
+       gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
+       ;;
+      armcortexa5 | armcortexa8 | armv7a*)
+       path="arm/v6t2 arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv7-a"
+       ;;
+      *)
+       path="arm"
+       ;;
+    esac
      ;;
  
  
@@ -4188,7 +4267,7 @@ echo "include_mpn(\`arm/arm-defs.m4')" >> $gmp_tmpconfigm4i
          # -mpa-risc-2-0 is only an optional flag, in case an old gcc is
          # used.  Assembler support for 2.0 is essential though, for our asm
          # files.
-       gcc_20n_cflags="-O2"
+       gcc_20n_cflags="$gcc_cflags"
         gcc_20n_cflags_optlist="arch"
          gcc_20n_cflags_arch="-mpa-risc-2-0 -mpa-risc-1-1"
          gcc_20n_testlist="sizeof-long-4 hppa-level-2.0"
@@ -4211,7 +4290,7 @@ echo "include_mpn(\`arm/arm-defs.m4')" >> $gmp_tmpconfigm4i
            esac
  
            cclist_20w="gcc cc"
-         gcc_20w_cflags="-O2 -mpa-risc-2-0"
+         gcc_20w_cflags="$gcc_cflags -mpa-risc-2-0"
            cc_20w_cflags="+DD64 +O2"
            cc_20w_testlist="hpc-hppa-2-0"
            path_20w="pa64"
@@ -4236,6 +4315,7 @@ echo "include_mpn(\`arm/arm-defs.m4')" >> $gmp_tmpconfigm4i
  echo "include_mpn(\`ia64/ia64-defs.m4')" >> $gmp_tmpconfigm4i
  
      SPEED_CYCLECOUNTER_OBJ=ia64.lo
+    any_32_testlist="sizeof-long-4"
  
      case $host_cpu in
        itanium)   path="ia64/itanium  ia64" ;;
@@ -4269,13 +4349,14 @@ echo "include_mpn(\`ia64/ia64-defs.m4')" >> $gmp_tmpconfigm4i
          # let us use whatever seems to work.
          #
          abilist="32 64"
+        any_64_testlist="sizeof-long-8"
  
          cclist_32="gcc cc"
          path_32="ia64"
          cc_32_cflags=""
          cc_32_cflags_optlist="opt"
          cc_32_cflags_opt="+O3 +O2 +O1"
-        gcc_32_cflags="-milp32 -O2"
+        gcc_32_cflags="$gcc_cflags -milp32"
          limb_32=longlong
          SPEED_CYCLECOUNTER_OBJ_32=ia64.lo
          cyclecounter_size_32=2
@@ -4290,7 +4371,7 @@ echo "include_mpn(\`ia64/ia64-defs.m4')" >> $gmp_tmpconfigm4i
          cc_64_cppflags="+DD64"
          cc_64_cflags_optlist="opt"
          cc_64_cflags_opt="+O3 +O2 +O1"
-        gcc_64_cflags="$gcc_64_cflags -mlp64"
+        gcc_64_cflags="$gcc_cflags -mlp64"
          ;;
      esac
      ;;
@@ -4376,13 +4457,13 @@ echo "include_mpn(\`mips32/mips-defs.m4')" >> $gmp_tmpconfigm4i
          abilist="n32 64 o32"
  
          cclist_n32="gcc cc"
-        gcc_n32_cflags="-O2 -mabi=n32"
+        gcc_n32_cflags="$gcc_cflags -mabi=n32"
          cc_n32_cflags="-O2 -n32"       # no -g, it disables all optimizations
          limb_n32=longlong
          path_n32="mips64"
  
          cclist_64="gcc cc"
-        gcc_64_cflags="$gcc_64_cflags -mabi=64"
+        gcc_64_cflags="$gcc_cflags -mabi=64"
          gcc_64_ldflags="-Wc,-mabi=64"
          cc_64_cflags="-O2 -64"         # no -g, it disables all optimizations
          cc_64_ldflags="-Wc,-64"
@@ -4513,23 +4594,29 @@ echo "include_mpn(\`mips32/mips-defs.m4')" >> $gmp_tmpconfigm4i
           *-*-aix*)
             # On AIX a true 64-bit ABI is available.
             # Need -Wc to pass object type flags through to the linker.
-           abilist="aix64 $abilist"
-           cclist_aix64="gcc xlc"
-           gcc_aix64_cflags="-O2 -maix64 -mpowerpc64"
-           gcc_aix64_cflags_optlist="cpu"
-           gcc_aix64_ldflags="-Wc,-maix64"
-           xlc_aix64_cflags="-O2 -q64 -qmaxmem=20000"
-           xlc_aix64_cflags_optlist="arch"
-           xlc_aix64_ldflags="-Wc,-q64"
+           abilist="mode64 $abilist"
+           cclist_mode64="gcc xlc"
+           gcc_mode64_cflags="$gcc_cflags -maix64 -mpowerpc64"
+           gcc_mode64_cflags_optlist="cpu"
+           gcc_mode64_ldflags="-Wc,-maix64"
+           xlc_mode64_cflags="-O2 -q64 -qmaxmem=20000"
+           xlc_mode64_cflags_optlist="arch"
+           xlc_mode64_ldflags="-Wc,-q64"
             # Must indicate object type to ar and nm
-           ar_aix64_flags="-X64"
-           nm_aix64_flags="-X64"
-           path_aix64=""
-           for i in $cpu_path; do path_aix64="${path_aix64}powerpc64/mode64/$i "; done
-           path_aix64="${path_aix64}powerpc64/mode64 $vmx_path powerpc64"
+           ar_mode64_flags="-X64"
+           nm_mode64_flags="-X64"
+           path_mode64=""
+           p=""
+           for i in $cpu_path
+             do path_mode64="${path_mode64}powerpc64/mode64/$i "
+                path_mode64="${path_mode64}powerpc64/$i "
+                p="${p} powerpc32/$i "
+             done
+           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           path="$p $path"
             # grab this object, though it's not a true cycle counter routine
-           SPEED_CYCLECOUNTER_OBJ_aix64=powerpc64.lo
-           cyclecounter_size_aix64=0
+           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+           cyclecounter_size_mode64=0
             ;;
           *-*-darwin*)
             # On Darwin we can use 64-bit instructions with a longlong limb,
@@ -4560,15 +4647,24 @@ echo "include_mpn(\`mips32/mips-defs.m4')" >> $gmp_tmpconfigm4i
             gcc_mode32_cflags_optlist="subtype cpu opt"
             gcc_mode32_cflags_subtype="-force_cpusubtype_ALL"
             gcc_mode32_cflags_opt="-O3 -O2 -O1"
-           path_mode32="powerpc64/mode32 $vmx_path powerpc64"
             limb_mode32=longlong
             cclist_mode64="gcc"
             gcc_mode64_cflags="-m64"
             gcc_mode64_cflags_optlist="cpu opt"
             gcc_mode64_cflags_opt="-O3 -O2 -O1"
             path_mode64=""
-           for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
+           path_mode32=""
+           p=""
+           for i in $cpu_path
+             do path_mode64="${path_mode64}powerpc64/mode64/$i "
+                path_mode64="${path_mode64}powerpc64/$i "
+                path_mode32="${path_mode32}powerpc64/mode32/$i "
+                path_mode32="${path_mode32}powerpc64/$i "
+                p="${p} powerpc32/$i "
+             done
             path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
+           path="$p $path"
             SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
             cyclecounter_size_mode64=0
             any_mode64_testlist="sizeof-long-8"
@@ -4595,15 +4691,24 @@ echo "include_mpn(\`mips32/mips-defs.m4')" >> $gmp_tmpconfigm4i
             gcc_mode32_cflags="-mpowerpc64"
             gcc_mode32_cflags_optlist="cpu opt"
             gcc_mode32_cflags_opt="-O3 -O2 -O1"
-           path_mode32="powerpc64/mode32 $vmx_path powerpc64"
             limb_mode32=longlong
             cclist_mode64="gcc gcc64"
             gcc_mode64_cflags_maybe="-m64"
             gcc_mode64_cflags_optlist="cpu opt"
             gcc_mode64_cflags_opt="-O3 -O2 -O1"
             path_mode64=""
-           for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
+           path_mode32=""
+           p=""
+           for i in $cpu_path
+             do path_mode64="${path_mode64}powerpc64/mode64/$i "
+                path_mode64="${path_mode64}powerpc64/$i "
+                path_mode32="${path_mode32}powerpc64/mode32/$i "
+                path_mode32="${path_mode32}powerpc64/$i "
+                p="${p} powerpc32/$i "
+             done
             path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
+           path="$p $path"
             SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
             cyclecounter_size_mode64=0
             any_mode64_testlist="sizeof-long-8"
@@ -4766,8 +4871,10 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
          path="sparc32/v8 sparc32" ;;
        supersparc)
          path="sparc32/v8/supersparc sparc32/v8 sparc32" ;;
-      sparc64 | sparcv9* | ultrasparc*)
+      sparc64 | sparcv9* | ultrasparc | ultrasparc[234]*)
          path="sparc32/v9 sparc32/v8 sparc32" ;;
+      ultrasparct[12345])
+        path="sparc32/ultrasparct1 sparc32/v8 sparc32" ;;
        *)
          path="sparc32" ;;
      esac
@@ -4787,10 +4894,10 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
      # abilist="64" only.
      #
      case $host_cpu in
+      ultrasparct[345])
+        gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plusd" ;;
        sparc64 | sparcv9* | ultrasparc*)
-        gcc_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
-      *)
-        gcc_cflags="$gcc_cflags" ;;
+        gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
      esac
      gcc_32_cflags_maybe="-m32"
      gcc_cflags_optlist="cpu"
@@ -4829,6 +4936,7 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
         case $host_cpu in
           sparcv8 | microsparc | supersparc | turbosparc)
                                               cc_cflags_arch="-xarch=v8" ;;
+          ultrasparct[345])                 cc_cflags_arch="-xarch=v8plusd" ;;
           sparc64 | sparcv9* | ultrasparc*)   cc_cflags_arch="-xarch=v8plus" ;;
           *)                                  cc_cflags_arch="-xarch=v7" ;;
         esac
@@ -4882,11 +4990,11 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
  
         case $host_cpu in
           ultrasparc | ultrasparc2 | ultrasparc2i)
-           path_64="sparc64/ultrasparc12 sparc64" ;;
+           path_64="sparc64/ultrasparc1234 sparc64" ;;
           ultrasparc[34])
             path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
-         ultrasparct[1234])
-           path_64="sparc64" ;;
+         ultrasparct[12345])
+           path_64="sparc64/ultrasparct1 sparc64" ;;
           *)
             path_64="sparc64"
         esac
@@ -4905,7 +5013,7 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
          # it until we're sure.  (Might want -xarch=v9a or -xarch=v9b for the
          # higher cpu types instead.)
          #
-        gcc_64_cflags="$gcc_64_cflags -m64 -mptr64"
+        gcc_64_cflags="$gcc_cflags -m64 -mptr64"
          gcc_64_ldflags="-Wc,-m64"
          gcc_64_cflags_optlist="cpu"
  
@@ -4934,10 +5042,18 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
  
  
    # VAX
+  vax*-*-*elf*)
+    # Use elf conventions (i.e., '%' register prefix, no global prefix)
+    #
+
+echo "include_mpn(\`vax/elf.m4')" >> $gmp_tmpconfigm4i
+
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    path="vax"
+    extra_functions="udiv_w_sdiv"
+    ;;
    vax*-*-*)
-    # Currently gcc (version 3.0) on vax always uses a frame pointer
-    # (config/vax/vax.h FRAME_POINTER_REQUIRED=1), so -fomit-frame-pointer
-    # will be ignored.
+    # Default to aout conventions (i.e., no register prefix, '_' global prefix)
      #
      gcc_cflags="$gcc_cflags $fomit_frame_pointer"
      path="vax"
@@ -5124,7 +5240,7 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
         gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
         gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
         path="x86/bd1 x86/k7/mmx x86/k7 x86"
-       path_64="x86_64/bd1 x86_64"
+       path_64="x86_64/bd1 x86_64/k10 x86_64/k8 x86_64"
         ;;
        core2)
         gcc_cflags_cpu="-mtune=core2 -mtune=k8"
@@ -5138,7 +5254,7 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
         path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
         path_64="x86_64/coreinhm x86_64/core2 x86_64"
         ;;
-      coreisbr)
+      coreisbr | coreihwl | coreibwl)
         gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
         gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
         path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
@@ -5167,12 +5283,27 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
      case $host in
        athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | bulldozer-*-* | pentium4-*-* | atom-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-*)
         cclist_64="gcc"
-       gcc_64_cflags="$gcc_64_cflags -m64"
+       gcc_64_cflags="$gcc_cflags -m64"
         gcc_64_cflags_optlist="cpu arch"
         CALLING_CONVENTIONS_OBJS_64='amd64call.lo amd64check$U.lo'
         SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo
         cyclecounter_size_64=2
-       abilist="64 32"
+
+       cclist_x32="gcc"
+       gcc_x32_cflags="$gcc_cflags -mx32"
+       gcc_x32_cflags_optlist="$gcc_64_cflags_optlist"
+       CALLING_CONVENTIONS_OBJS_x32="$CALLING_CONVENTIONS_OBJS_64"
+       SPEED_CYCLECOUNTER_OBJ_x32="$SPEED_CYCLECOUNTER_OBJ_64"
+       cyclecounter_size_x32="$cyclecounter_size_64"
+       path_x32="$path_64"
+       limb_x32=longlong
+       any_x32_testlist="sizeof-long-4"
+
+       abilist="64 x32 32"
+       if test "$enable_assembly" = "yes" ; then
+           extra_functions_64="invert_limb_table"
+           extra_functions_x32=$extra_functions_64
+       fi
  
         case $host in
           *-*-solaris*)
@@ -5182,9 +5313,11 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
             ;;
           *-*-mingw* | *-*-cygwin)
             limb_64=longlong
-           path_64=""  # Windows amd64 calling conventions are *different*
-           # Silence many pedantic warnings for w64.  FIXME.
-           gcc_64_cflags="$gcc_64_cflags -std=gnu99"
+           CALLING_CONVENTIONS_OBJS_64=""
+
+$as_echo "#define HOST_DOS64 1" >>confdefs.h
+
+           GMP_NONSTD_ABI_64=DOS64
             ;;
         esac
         ;;
@@ -5204,21 +5337,11 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
      ;;
  
  
-  # Special CPU "none" selects generic C.  -DNO_ASM is used to disable gcc
-  # asm blocks in longlong.h (since they're driven by cpp pre-defined
-  # symbols like __alpha rather than the configured $host_cpu).
-  #
+  # Special CPU "none" used to select generic C, now this is obsolete.
    none-*-*)
-    abilist="long longlong"
-    cclist_long=$cclist
-    gcc_long_cflags=$gcc_cflags
-    gcc_long_cppflags="-DNO_ASM"
-    cc_long_cflags=$cc_cflags
-    cclist_longlong=$cclist
-    gcc_longlong_cflags=$gcc_cflags
-    gcc_longlong_cppflags="-DNO_ASM"
-    cc_longlong_cflags=$cc_cflags
-    limb_longlong=longlong
+    enable_assembly=no
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: the \"none\" host is obsolete, use --disable-assembly" >&5
+$as_echo "$as_me: WARNING: the \"none\" host is obsolete, use --disable-assembly" >&2;}
      ;;
  
  esac
@@ -5268,7 +5391,7 @@ if test -n "$ABI"; then
      if test $abi = "$ABI"; then found=yes; break; fi
    done
    if test $found = no; then
-    as_fn_error "ABI=$ABI is not among the following valid choices: $abilist" "$LINENO" 5
+    as_fn_error $? "ABI=$ABI is not among the following valid choices: $abilist" "$LINENO" 5
    fi
    abilist="$ABI"
  fi
@@ -6715,7 +6838,7 @@ done
  # C on MS-DOS systems).
  #
  if test $found_compiler = no && test -n "$path"; then
-  as_fn_error "could not find a working compiler, see config.log for details" "$LINENO" 5
+  as_fn_error $? "could not find a working compiler, see config.log for details" "$LINENO" 5
  fi
  
  case $host in
@@ -6725,31 +6848,40 @@ case $host in
        gcc_cflags_cpu=""
        gcc_cflags_arch=""
  
+      fat_functions="add_n addmul_1 bdiv_dbm1c com copyd copyi dive_1 divrem_1
+                    gcd_1 lshift lshiftc mod_1 mod_1_1 mod_1_1_cps mod_1_2
+                    mod_1_2_cps mod_1_4 mod_1_4_cps mod_34lsub1 mode1o mul_1
+                    mul_basecase mullo_basecase pre_divrem_1 pre_mod_1 redc_1
+                    redc_2 rshift sqr_basecase sub_n submul_1"
+
        if test "$abi" = 32; then
         extra_functions="$extra_functions fat fat_entry"
         path="x86/fat x86"
         fat_path="x86 x86/fat x86/i486
                   x86/k6 x86/k6/mmx x86/k6/k62mmx
                   x86/k7 x86/k7/mmx
+                 x86/k8 x86/k10 x86/bobcat
                   x86/pentium x86/pentium/mmx
                   x86/p6 x86/p6/mmx x86/p6/p3mmx x86/p6/sse2
-                 x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2"
+                 x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2
+                 x86/core2 x86/coreinhm x86/coreisbr
+                 x86/atom x86/atom/mmx x86/atom/sse2 x86/nano"
        fi
  
        if test "$abi" = 64; then
         gcc_64_cflags=""
         extra_functions_64="$extra_functions_64 fat fat_entry"
         path_64="x86_64/fat x86_64"
-       fat_path="x86_64 x86_64/fat x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr x86_64/atom x86_64/nano"
+       fat_path="x86_64 x86_64/fat
+                 x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat
+                 x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
+                 x86_64/atom x86_64/nano"
+       fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
        fi
  
-      fat_functions="add_n addmul_1 copyd copyi
-                    dive_1 diveby3 divrem_1 gcd_1 lshift
-                    mod_1 mod_34lsub1 mode1o mul_1 mul_basecase
-                    pre_divrem_1 pre_mod_1 rshift
-                    sqr_basecase sub_n submul_1"
        fat_thresholds="MUL_TOOM22_THRESHOLD MUL_TOOM33_THRESHOLD
-                     SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD"
+                     SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD
+                     BMOD_1_TO_MOD_1_THRESHOLD"
      fi
      ;;
  esac
@@ -6804,7 +6936,7 @@ fi
  
                { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the operating system supports XMM registers" >&5
  $as_echo_n "checking whether the operating system supports XMM registers... " >&6; }
-if test "${gmp_cv_os_x86_xmm+set}" = set; then :
+if ${gmp_cv_os_x86_xmm+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test "$build" = "$host"; then
@@ -8040,7 +8172,7 @@ esac
    CC="$cc"
    CFLAGS="$cflags"
    CPPFLAGS="$cppflags"
-
+  eval GMP_NONSTD_ABI=\"\$GMP_NONSTD_ABI_$ABI\"
  
    # Could easily have this in config.h too, if desired.
    ABI_nodots=`echo $ABI | sed 's/\./_/'`
@@ -8088,7 +8220,11 @@ _ACEOF
    #
                      eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi1\"
    test -n "$tmp" || eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi2\"
-  CALLING_CONVENTIONS_OBJS="$tmp"
+  if test "$enable_assembly" = "yes"; then
+     CALLING_CONVENTIONS_OBJS="$tmp"
+  else
+     CALLING_CONVENTIONS_OBJS=""
+  fi
  
    if test -n "$CALLING_CONVENTIONS_OBJS"; then
  
@@ -8131,7 +8267,7 @@ if test -n "$ac_tool_prefix"; then
  set dummy ${ac_tool_prefix}gcc; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$CC"; then
@@ -8143,7 +8279,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_CC="${ac_tool_prefix}gcc"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -8171,7 +8307,7 @@ if test -z "$ac_cv_prog_CC"; then
  set dummy gcc; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_CC"; then
@@ -8183,7 +8319,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_CC="gcc"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -8224,7 +8360,7 @@ if test -z "$CC"; then
  set dummy ${ac_tool_prefix}cc; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$CC"; then
@@ -8236,7 +8372,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_CC="${ac_tool_prefix}cc"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -8264,7 +8400,7 @@ if test -z "$CC"; then
  set dummy cc; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$CC"; then
@@ -8277,7 +8413,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
         ac_prog_rejected=yes
         continue
@@ -8323,7 +8459,7 @@ if test -z "$CC"; then
  set dummy $ac_tool_prefix$ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$CC"; then
@@ -8335,7 +8471,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -8367,7 +8503,7 @@ do
  set dummy $ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_CC"; then
@@ -8379,7 +8515,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_CC="$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -8421,8 +8557,8 @@ fi
  
  test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "no acceptable C compiler found in \$PATH
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
  
  # Provide some information about the compiler.
  $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
@@ -8536,9 +8672,8 @@ sed 's/^/| /' conftest.$ac_ext >&5
  
  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "C compiler cannot create executables
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
  else
    { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
  $as_echo "yes" >&6; }
@@ -8580,8 +8715,8 @@ done
  else
    { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
  fi
  rm -f conftest conftest$ac_cv_exeext
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
@@ -8638,9 +8773,9 @@ $as_echo "$ac_try_echo"; } >&5
      else
         { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run C compiled programs.
+as_fn_error $? "cannot run C compiled programs.
  If you meant to cross compile, use \`--host'.
-See \`config.log' for more details." "$LINENO" 5; }
+See \`config.log' for more details" "$LINENO" 5; }
      fi
    fi
  fi
@@ -8651,7 +8786,7 @@ rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
  ac_clean_files=$ac_clean_files_save
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
  $as_echo_n "checking for suffix of object files... " >&6; }
-if test "${ac_cv_objext+set}" = set; then :
+if ${ac_cv_objext+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -8691,8 +8826,8 @@ sed 's/^/| /' conftest.$ac_ext >&5
  
  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot compute suffix of object files: cannot compile
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
  fi
  rm -f conftest.$ac_cv_objext conftest.$ac_ext
  fi
@@ -8702,7 +8837,7 @@ OBJEXT=$ac_cv_objext
  ac_objext=$OBJEXT
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
  $as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
-if test "${ac_cv_c_compiler_gnu+set}" = set; then :
+if ${ac_cv_c_compiler_gnu+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -8739,7 +8874,7 @@ ac_test_CFLAGS=${CFLAGS+set}
  ac_save_CFLAGS=$CFLAGS
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
  $as_echo_n "checking whether $CC accepts -g... " >&6; }
-if test "${ac_cv_prog_cc_g+set}" = set; then :
+if ${ac_cv_prog_cc_g+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_save_c_werror_flag=$ac_c_werror_flag
@@ -8817,7 +8952,7 @@ else
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
  $as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if ${ac_cv_prog_cc_c89+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_cv_prog_cc_c89=no
@@ -8826,8 +8961,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
  /* end confdefs.h.  */
  #include <stdarg.h>
  #include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
+struct stat;
  /* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
  struct buf { int x; };
  FILE * (*rcsopen) (struct buf *, struct stat *, int);
@@ -8918,7 +9052,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
    *) :
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5
  $as_echo_n "checking for $CC option to accept ISO C99... " >&6; }
-if test "${ac_cv_prog_cc_c99+set}" = set; then :
+if ${ac_cv_prog_cc_c99+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_cv_prog_cc_c99=no
@@ -9062,7 +9196,7 @@ main ()
    return 0;
  }
  _ACEOF
-for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -xc99=all -qlanglvl=extc99
+for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -D_STDC_C99= -qlanglvl=extc99
  do
    CC="$ac_save_CC $ac_arg"
    if ac_fn_c_try_compile "$LINENO"; then :
@@ -9093,7 +9227,7 @@ if test "x$ac_cv_prog_cc_c99" != xno; then :
  else
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
  $as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if ${ac_cv_prog_cc_c89+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_cv_prog_cc_c89=no
@@ -9102,8 +9236,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
  /* end confdefs.h.  */
  #include <stdarg.h>
  #include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
+struct stat;
  /* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
  struct buf { int x; };
  FILE * (*rcsopen) (struct buf *, struct stat *, int);
@@ -9189,7 +9322,7 @@ fi
  esac
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO Standard C" >&5
  $as_echo_n "checking for $CC option to accept ISO Standard C... " >&6; }
-  if test "${ac_cv_prog_cc_stdc+set}" = set; then :
+  if ${ac_cv_prog_cc_stdc+:} false; then :
    $as_echo_n "(cached) " >&6
  fi
  
@@ -9217,7 +9350,7 @@ if test -n "$CPP" && test -d "$CPP"; then
    CPP=
  fi
  if test -z "$CPP"; then
-  if test "${ac_cv_prog_CPP+set}" = set; then :
+  if ${ac_cv_prog_CPP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
        # Double quotes because CPP needs to be expanded
@@ -9247,7 +9380,7 @@ else
    # Broken: fails on valid input.
  continue
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
    # OK, works on sane cases.  Now check whether nonexistent headers
    # can be detected and how.
@@ -9263,11 +9396,11 @@ else
  ac_preproc_ok=:
  break
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
  done
  # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
  if $ac_preproc_ok; then :
    break
  fi
@@ -9306,7 +9439,7 @@ else
    # Broken: fails on valid input.
  continue
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
    # OK, works on sane cases.  Now check whether nonexistent headers
    # can be detected and how.
@@ -9322,18 +9455,18 @@ else
  ac_preproc_ok=:
  break
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
  done
  # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
  if $ac_preproc_ok; then :
  
  else
    { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "C preprocessor \"$CPP\" fails sanity check
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
  fi
  
  ac_ext=c
@@ -9344,42 +9477,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
  
  
  
-case $ac_cv_prog_cc_stdc in
-  no)
-    ;;
-  *)
-    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#define __GMP_WITHIN_CONFIGURE 1   /* ignore template stuff */
-#define GMP_NAIL_BITS $GMP_NAIL_BITS
-#define GMP_LIMB_BITS 123
-$DEFN_LONG_LONG_LIMB
-#include "$srcdir/gmp-h.in"
-
-#if ! __GMP_HAVE_PROTOTYPES
-die die die
-#endif
-
-int
-main ()
-{
-
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: gmp.h doesnt recognise compiler as ANSI, prototypes and \"const\" will be unavailable" >&5
-$as_echo "$as_me: WARNING: gmp.h doesnt recognise compiler as ANSI, prototypes and \"const\" will be unavailable" >&2;}
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-    ;;
-esac
-
-
-
  # The C compiler on the build system, and associated tests.
  
  if test -n "$CC_FOR_BUILD"; then
@@ -9411,7 +9508,7 @@ $as_echo "$cc_for_build_works" >&6; }
  if test "$cc_for_build_works" = yes; then
    :
  else
-  as_fn_error "Specified CC_FOR_BUILD doesn't seem to work" "$LINENO" 5
+  as_fn_error $? "Specified CC_FOR_BUILD doesn't seem to work" "$LINENO" 5
  fi
  
  elif test -n "$HOST_CC"; then
@@ -9443,7 +9540,7 @@ $as_echo "$cc_for_build_works" >&6; }
  if test "$cc_for_build_works" = yes; then
    CC_FOR_BUILD=$HOST_CC
  else
-  as_fn_error "Specified HOST_CC doesn't seem to work" "$LINENO" 5
+  as_fn_error $? "Specified HOST_CC doesn't seem to work" "$LINENO" 5
  fi
  
  else
@@ -9482,7 +9579,7 @@ fi
  
    done
    if test -z "$CC_FOR_BUILD"; then
-    as_fn_error "Cannot find a build system compiler" "$LINENO" 5
+    as_fn_error $? "Cannot find a build system compiler" "$LINENO" 5
    fi
  fi
  
@@ -9493,7 +9590,7 @@ fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for build system preprocessor" >&5
  $as_echo_n "checking for build system preprocessor... " >&6; }
  if test -z "$CPP_FOR_BUILD"; then
-  if test "${gmp_cv_prog_cpp_for_build+set}" = set; then :
+  if ${gmp_cv_prog_cpp_for_build+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.c <<EOF
@@ -9512,7 +9609,7 @@ EOF
    done
    rm -f conftest* a.out b.out a.exe a_out.exe
    if test -z "$gmp_cv_prog_cpp_for_build"; then
-    as_fn_error "Cannot find build system C preprocessor." "$LINENO" 5
+    as_fn_error $? "Cannot find build system C preprocessor." "$LINENO" 5
    fi
  
  fi
@@ -9528,7 +9625,7 @@ $as_echo "$CPP_FOR_BUILD" >&6; }
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for build system executable suffix" >&5
  $as_echo_n "checking for build system executable suffix... " >&6; }
-if test "${gmp_cv_prog_exeext_for_build+set}" = set; then :
+if ${gmp_cv_prog_exeext_for_build+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.c <<EOF
@@ -9553,7 +9650,7 @@ for i in .exe ,ff8 ""; do
  done
  rm -f conftest*
  if test "${gmp_cv_prog_exeext_for_build+set}" != set; then
-  as_fn_error "Cannot determine executable suffix" "$LINENO" 5
+  as_fn_error $? "Cannot determine executable suffix" "$LINENO" 5
  fi
  
  fi
@@ -9565,7 +9662,7 @@ EXEEXT_FOR_BUILD=$gmp_cv_prog_exeext_for_build
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build system compiler is ANSI" >&5
  $as_echo_n "checking whether build system compiler is ANSI... " >&6; }
-if test "${gmp_cv_c_for_build_ansi+set}" = set; then :
+if ${gmp_cv_c_for_build_ansi+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.c <<EOF
@@ -9600,7 +9697,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for build system compiler math library" >&5
  $as_echo_n "checking for build system compiler math library... " >&6; }
-if test "${gmp_cv_check_libm_for_build+set}" = set; then :
+if ${gmp_cv_check_libm_for_build+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.c <<EOF
@@ -9668,7 +9765,7 @@ if test -z "$CXX"; then
  set dummy $ac_tool_prefix$ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CXX+set}" = set; then :
+if ${ac_cv_prog_CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$CXX"; then
@@ -9680,7 +9777,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -9712,7 +9809,7 @@ do
  set dummy $ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_CXX"; then
@@ -9724,7 +9821,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_CXX="$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -9790,7 +9887,7 @@ done
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5
  $as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; }
-if test "${ac_cv_cxx_compiler_gnu+set}" = set; then :
+if ${ac_cv_cxx_compiler_gnu+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -9827,7 +9924,7 @@ ac_test_CXXFLAGS=${CXXFLAGS+set}
  ac_save_CXXFLAGS=$CXXFLAGS
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5
  $as_echo_n "checking whether $CXX accepts -g... " >&6; }
-if test "${ac_cv_prog_cxx_g+set}" = set; then :
+if ${ac_cv_prog_cxx_g+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_save_cxx_werror_flag=$ac_cxx_werror_flag
@@ -10081,7 +10178,7 @@ esac
  
    # If --enable-cxx=yes but a C++ compiler can't be found, then abort.
    if test $want_cxx = no && test $enable_cxx = yes; then
-    as_fn_error "C++ compiler not available, see config.log for details" "$LINENO" 5
+    as_fn_error $? "C++ compiler not available, see config.log for details" "$LINENO" 5
    fi
  fi
  
@@ -10108,7 +10205,7 @@ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
  $as_echo_n "checking how to run the C++ preprocessor... " >&6; }
  if test -z "$CXXCPP"; then
-  if test "${ac_cv_prog_CXXCPP+set}" = set; then :
+  if ${ac_cv_prog_CXXCPP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
        # Double quotes because CXXCPP needs to be expanded
@@ -10138,7 +10235,7 @@ else
    # Broken: fails on valid input.
  continue
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
    # OK, works on sane cases.  Now check whether nonexistent headers
    # can be detected and how.
@@ -10154,11 +10251,11 @@ else
  ac_preproc_ok=:
  break
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
  done
  # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
  if $ac_preproc_ok; then :
    break
  fi
@@ -10197,7 +10294,7 @@ else
    # Broken: fails on valid input.
  continue
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
    # OK, works on sane cases.  Now check whether nonexistent headers
    # can be detected and how.
@@ -10213,18 +10310,18 @@ else
  ac_preproc_ok=:
  break
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
  done
  # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
  if $ac_preproc_ok; then :
  
  else
    { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "C++ preprocessor \"$CXXCPP\" fails sanity check
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
  fi
  
  ac_ext=c
@@ -10240,9 +10337,10 @@ fi
  # deciding the compiler.
  #
  
+
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
  $as_echo_n "checking for grep that handles long lines and -e... " >&6; }
-if test "${ac_cv_path_GREP+set}" = set; then :
+if ${ac_cv_path_GREP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -z "$GREP"; then
@@ -10256,7 +10354,7 @@ do
      for ac_prog in grep ggrep; do
      for ac_exec_ext in '' $ac_executable_extensions; do
        ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+      as_fn_executable_p "$ac_path_GREP" || continue
  # Check for GNU ac_path_GREP and select it if it is found.
    # Check for GNU $ac_path_GREP
  case `"$ac_path_GREP" --version 2>&1` in
@@ -10291,7 +10389,7 @@ esac
    done
  IFS=$as_save_IFS
    if test -z "$ac_cv_path_GREP"; then
-    as_fn_error "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+    as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
    fi
  else
    ac_cv_path_GREP=$GREP
@@ -10305,7 +10403,7 @@ $as_echo "$ac_cv_path_GREP" >&6; }
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
  $as_echo_n "checking for egrep... " >&6; }
-if test "${ac_cv_path_EGREP+set}" = set; then :
+if ${ac_cv_path_EGREP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
@@ -10322,7 +10420,7 @@ do
      for ac_prog in egrep; do
      for ac_exec_ext in '' $ac_executable_extensions; do
        ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+      as_fn_executable_p "$ac_path_EGREP" || continue
  # Check for GNU ac_path_EGREP and select it if it is found.
    # Check for GNU $ac_path_EGREP
  case `"$ac_path_EGREP" --version 2>&1` in
@@ -10357,7 +10455,7 @@ esac
    done
  IFS=$as_save_IFS
    if test -z "$ac_cv_path_EGREP"; then
-    as_fn_error "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+    as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
    fi
  else
    ac_cv_path_EGREP=$EGREP
@@ -10434,7 +10532,7 @@ case $host in
        case "$path $fat_path" in
          *mmx*)   { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler knows about MMX instructions" >&5
  $as_echo_n "checking if the assembler knows about MMX instructions... " >&6; }
-if test "${gmp_cv_asm_x86_mmx+set}" = set; then :
+if ${gmp_cv_asm_x86_mmx+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.s <<EOF
@@ -10538,7 +10636,7 @@ fi
        case "$path $fat_path" in
          *sse2*)  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler knows about SSE2 instructions" >&5
  $as_echo_n "checking if the assembler knows about SSE2 instructions... " >&6; }
-if test "${gmp_cv_asm_x86_sse2+set}" = set; then :
+if ${gmp_cv_asm_x86_sse2+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.s <<EOF
@@ -10595,218 +10693,59 @@ path="$tmp_path"
  
  tmp_path=
  for i in $fat_path; do
-  case $i in
-    */sse2) ;;
-    *) tmp_path="$tmp_path $i" ;;
-  esac
-done
-fat_path="$tmp_path"
-
-
-  ;;
-esac
- ;;
-      esac
-    fi
-    ;;
-esac
-
-
-cat >&5 <<EOF
-Decided:
-ABI=$ABI
-CC=$CC
-CFLAGS=$CFLAGS
-CPPFLAGS=$CPPFLAGS
-GMP_LDFLAGS=$GMP_LDFLAGS
-CXX=$CXX
-CXXFLAGS=$CXXFLAGS
-path=$path
-EOF
-echo "using ABI=\"$ABI\""
-echo "      CC=\"$CC\""
-echo "      CFLAGS=\"$CFLAGS\""
-echo "      CPPFLAGS=\"$CPPFLAGS\""
-if test $want_cxx = yes; then
-  echo "      CXX=\"$CXX\""
-  echo "      CXXFLAGS=\"$CXXFLAGS\""
-fi
-echo "      MPN_PATH=\"$path\""
-
-
-# Automake ansi2knr support.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for function prototypes" >&5
-$as_echo_n "checking for function prototypes... " >&6; }
-if test "$ac_cv_prog_cc_c89" != no; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-$as_echo "#define PROTOTYPES 1" >>confdefs.h
-
-
-$as_echo "#define __PROTOTYPES 1" >>confdefs.h
-
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
-$as_echo_n "checking for ANSI C header files... " >&6; }
-if test "${ac_cv_header_stdc+set}" = set; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <float.h>
-
-int
-main ()
-{
-
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  ac_cv_header_stdc=yes
-else
-  ac_cv_header_stdc=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-if test $ac_cv_header_stdc = yes; then
-  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <string.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
-  $EGREP "memchr" >/dev/null 2>&1; then :
-
-else
-  ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
-  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <stdlib.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
-  $EGREP "free" >/dev/null 2>&1; then :
-
-else
-  ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
-  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
-  if test "$cross_compiling" = yes; then :
-  :
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <ctype.h>
-#include <stdlib.h>
-#if ((' ' & 0x0FF) == 0x020)
-# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
-# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
-#else
-# define ISLOWER(c) \
-                  (('a' <= (c) && (c) <= 'i') \
-                    || ('j' <= (c) && (c) <= 'r') \
-                    || ('s' <= (c) && (c) <= 'z'))
-# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
-#endif
-
-#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
-int
-main ()
-{
-  int i;
-  for (i = 0; i < 256; i++)
-    if (XOR (islower (i), ISLOWER (i))
-       || toupper (i) != TOUPPER (i))
-      return 2;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
-
-else
-  ac_cv_header_stdc=no
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
-$as_echo "$ac_cv_header_stdc" >&6; }
-if test $ac_cv_header_stdc = yes; then
-
-$as_echo "#define STDC_HEADERS 1" >>confdefs.h
-
-fi
-
-# On IRIX 5.3, sys/types and inttypes.h are conflicting.
-for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
-                 inttypes.h stdint.h unistd.h
-do :
-  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
-ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
-"
-eval as_val=\$$as_ac_Header
-   if test "x$as_val" = x""yes; then :
-  cat >>confdefs.h <<_ACEOF
-#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-
+  case $i in
+    */sse2) ;;
+    *) tmp_path="$tmp_path $i" ;;
+  esac
  done
+fat_path="$tmp_path"
  
  
+  ;;
+esac
+ ;;
+      esac
+    fi
+    ;;
+esac
  
-if test "$ac_cv_prog_cc_stdc" != no; then
-  U= ANSI2KNR=
-else
-  U=_ ANSI2KNR=./ansi2knr
-fi
-# Ensure some checks needed by ansi2knr itself.
-
-for ac_header in string.h
-do :
-  ac_fn_c_check_header_mongrel "$LINENO" "string.h" "ac_cv_header_string_h" "$ac_includes_default"
-if test "x$ac_cv_header_string_h" = x""yes; then :
-  cat >>confdefs.h <<_ACEOF
-#define HAVE_STRING_H 1
-_ACEOF
  
+if test "$enable_assembly" = "no"; then
+  path="generic"
+  CFLAGS="$CFLAGS -DNO_ASM"
+#  for abi in $abilist; do
+#    eval unset "path_\$abi"
+#    eval gcc_${abi}_cflags=\"\$gcc_${abi}_cflags -DNO_ASM\"
+#  done
  fi
  
-done
  
+cat >&5 <<EOF
+Decided:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS
+CPPFLAGS=$CPPFLAGS
+GMP_LDFLAGS=$GMP_LDFLAGS
+CXX=$CXX
+CXXFLAGS=$CXXFLAGS
+path=$path
+EOF
+echo "using ABI=\"$ABI\""
+echo "      CC=\"$CC\""
+echo "      CFLAGS=\"$CFLAGS\""
+echo "      CPPFLAGS=\"$CPPFLAGS\""
+if test $want_cxx = yes; then
+  echo "      CXX=\"$CXX\""
+  echo "      CXXFLAGS=\"$CXXFLAGS\""
+fi
+echo "      MPN_PATH=\"$path\""
  
  
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports --noexecstack option" >&5
  $as_echo_n "checking whether assembler supports --noexecstack option... " >&6; }
-if test "${cl_cv_as_noexecstack+set}" = set; then :
+if ${cl_cv_as_noexecstack+:} false; then :
    $as_echo_n "(cached) " >&6
  else
      cat > conftest.c <<EOF
@@ -10849,7 +10788,7 @@ if test -n "$ac_tool_prefix"; then
  set dummy ${ac_tool_prefix}ar; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR+set}" = set; then :
+if ${ac_cv_prog_AR+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$AR"; then
@@ -10861,7 +10800,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_AR="${ac_tool_prefix}ar"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -10889,7 +10828,7 @@ if test -z "$ac_cv_prog_AR"; then
  set dummy ar; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_AR+set}" = set; then :
+if ${ac_cv_prog_ac_ct_AR+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_AR"; then
@@ -10901,7 +10840,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_AR="ar"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -10957,7 +10896,7 @@ fi
  gmp_user_NM=$NM
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5
  $as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; }
-if test "${lt_cv_path_NM+set}" = set; then :
+if ${lt_cv_path_NM+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$NM"; then
@@ -11020,7 +10959,7 @@ else
  set dummy $ac_tool_prefix$ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DUMPBIN+set}" = set; then :
+if ${ac_cv_prog_DUMPBIN+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$DUMPBIN"; then
@@ -11032,7 +10971,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11064,7 +11003,7 @@ do
  set dummy $ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DUMPBIN+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_DUMPBIN"; then
@@ -11076,7 +11015,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11136,7 +11075,7 @@ test -z "$NM" && NM=nm
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5
  $as_echo_n "checking the name lister ($NM) interface... " >&6; }
-if test "${lt_cv_nm_interface+set}" = set; then :
+if ${lt_cv_nm_interface+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_nm_interface="BSD nm"
@@ -11172,7 +11111,7 @@ if test -z "$gmp_user_NM" && test -n "$ac_tool_prefix" && test "$NM" = nm; then
    NM=
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5
  $as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; }
-if test "${lt_cv_path_NM+set}" = set; then :
+if ${lt_cv_path_NM+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$NM"; then
@@ -11235,7 +11174,7 @@ else
  set dummy $ac_tool_prefix$ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DUMPBIN+set}" = set; then :
+if ${ac_cv_prog_DUMPBIN+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$DUMPBIN"; then
@@ -11247,7 +11186,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11279,7 +11218,7 @@ do
  set dummy $ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DUMPBIN+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_DUMPBIN"; then
@@ -11291,7 +11230,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11348,7 +11287,7 @@ test -z "$NM" && NM=nm
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5
  $as_echo_n "checking the name lister ($NM) interface... " >&6; }
-if test "${lt_cv_nm_interface+set}" = set; then :
+if ${lt_cv_nm_interface+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_nm_interface="BSD nm"
@@ -11413,7 +11352,7 @@ case $host in
  set dummy ${ac_tool_prefix}as; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AS+set}" = set; then :
+if ${ac_cv_prog_AS+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$AS"; then
@@ -11425,7 +11364,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_AS="${ac_tool_prefix}as"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11453,7 +11392,7 @@ if test -z "$ac_cv_prog_AS"; then
  set dummy as; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_AS+set}" = set; then :
+if ${ac_cv_prog_ac_ct_AS+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_AS"; then
@@ -11465,7 +11404,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_AS="as"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11505,7 +11444,7 @@ fi
  set dummy ${ac_tool_prefix}dlltool; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$DLLTOOL"; then
@@ -11517,7 +11456,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11545,7 +11484,7 @@ if test -z "$ac_cv_prog_DLLTOOL"; then
  set dummy dlltool; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_DLLTOOL"; then
@@ -11557,7 +11496,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_DLLTOOL="dlltool"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11597,7 +11536,7 @@ fi
  set dummy ${ac_tool_prefix}objdump; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$OBJDUMP"; then
@@ -11609,7 +11548,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11637,7 +11576,7 @@ if test -z "$ac_cv_prog_OBJDUMP"; then
  set dummy objdump; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_OBJDUMP"; then
@@ -11649,7 +11588,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_OBJDUMP="objdump"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -11718,7 +11657,7 @@ case $host in
      fi
      # Don't allow both static and DLL.
      if test "$enable_shared" != no && test "$enable_static" != no; then
-      as_fn_error "cannot build both static and DLL, since gmp.h is different for each.
+      as_fn_error $? "cannot build both static and DLL, since gmp.h is different for each.
  Use \"--disable-static --enable-shared\" to build just a DLL." "$LINENO" 5
      fi
  
@@ -11893,7 +11832,7 @@ esac
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
  $as_echo_n "checking for a sed that does not truncate output... " >&6; }
-if test "${ac_cv_path_SED+set}" = set; then :
+if ${ac_cv_path_SED+:} false; then :
    $as_echo_n "(cached) " >&6
  else
              ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/
@@ -11913,7 +11852,7 @@ do
      for ac_prog in sed gsed; do
      for ac_exec_ext in '' $ac_executable_extensions; do
        ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_SED" && $as_test_x "$ac_path_SED"; } || continue
+      as_fn_executable_p "$ac_path_SED" || continue
  # Check for GNU ac_path_SED and select it if it is found.
    # Check for GNU $ac_path_SED
  case `"$ac_path_SED" --version 2>&1` in
@@ -11948,7 +11887,7 @@ esac
    done
  IFS=$as_save_IFS
    if test -z "$ac_cv_path_SED"; then
-    as_fn_error "no acceptable sed could be found in \$PATH" "$LINENO" 5
+    as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5
    fi
  else
    ac_cv_path_SED=$SED
@@ -11975,7 +11914,7 @@ Xsed="$SED -e 1s/^X//"
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5
  $as_echo_n "checking for fgrep... " >&6; }
-if test "${ac_cv_path_FGREP+set}" = set; then :
+if ${ac_cv_path_FGREP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1
@@ -11992,7 +11931,7 @@ do
      for ac_prog in fgrep; do
      for ac_exec_ext in '' $ac_executable_extensions; do
        ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext"
-      { test -f "$ac_path_FGREP" && $as_test_x "$ac_path_FGREP"; } || continue
+      as_fn_executable_p "$ac_path_FGREP" || continue
  # Check for GNU ac_path_FGREP and select it if it is found.
    # Check for GNU $ac_path_FGREP
  case `"$ac_path_FGREP" --version 2>&1` in
@@ -12027,7 +11966,7 @@ esac
    done
  IFS=$as_save_IFS
    if test -z "$ac_cv_path_FGREP"; then
-    as_fn_error "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+    as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
    fi
  else
    ac_cv_path_FGREP=$FGREP
@@ -12106,7 +12045,7 @@ else
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
  $as_echo_n "checking for non-GNU ld... " >&6; }
  fi
-if test "${lt_cv_path_LD+set}" = set; then :
+if ${lt_cv_path_LD+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -z "$LD"; then
@@ -12143,10 +12082,10 @@ else
    { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
  $as_echo "no" >&6; }
  fi
-test -z "$LD" && as_fn_error "no acceptable ld found in \$PATH" "$LINENO" 5
+test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
  $as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
-if test "${lt_cv_prog_gnu_ld+set}" = set; then :
+if ${lt_cv_prog_gnu_ld+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    # I'd rather use --version here, but apparently some GNU lds only accept -v.
@@ -12185,7 +12124,7 @@ fi
  # find the maximum length of command line arguments
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5
  $as_echo_n "checking the maximum length of command line arguments... " >&6; }
-if test "${lt_cv_sys_max_cmd_len+set}" = set; then :
+if ${lt_cv_sys_max_cmd_len+:} false; then :
    $as_echo_n "(cached) " >&6
  else
      i=0
@@ -12387,7 +12326,7 @@ esac
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5
  $as_echo_n "checking how to convert $build file names to $host format... " >&6; }
-if test "${lt_cv_to_host_file_cmd+set}" = set; then :
+if ${lt_cv_to_host_file_cmd+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $host in
@@ -12434,7 +12373,7 @@ $as_echo "$lt_cv_to_host_file_cmd" >&6; }
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5
  $as_echo_n "checking how to convert $build file names to toolchain format... " >&6; }
-if test "${lt_cv_to_tool_file_cmd+set}" = set; then :
+if ${lt_cv_to_tool_file_cmd+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    #assume ordinary cross tools, or native build.
@@ -12461,7 +12400,7 @@ $as_echo "$lt_cv_to_tool_file_cmd" >&6; }
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5
  $as_echo_n "checking for $LD option to reload object files... " >&6; }
-if test "${lt_cv_ld_reload_flag+set}" = set; then :
+if ${lt_cv_ld_reload_flag+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_ld_reload_flag='-r'
@@ -12502,7 +12441,7 @@ if test -n "$ac_tool_prefix"; then
  set dummy ${ac_tool_prefix}objdump; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$OBJDUMP"; then
@@ -12514,7 +12453,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -12542,7 +12481,7 @@ if test -z "$ac_cv_prog_OBJDUMP"; then
  set dummy objdump; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_OBJDUMP"; then
@@ -12554,7 +12493,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_OBJDUMP="objdump"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -12598,7 +12537,7 @@ test -z "$OBJDUMP" && OBJDUMP=objdump
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5
  $as_echo_n "checking how to recognize dependent libraries... " >&6; }
-if test "${lt_cv_deplibs_check_method+set}" = set; then :
+if ${lt_cv_deplibs_check_method+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_file_magic_cmd='$MAGIC_CMD'
@@ -12845,7 +12784,7 @@ if test -n "$ac_tool_prefix"; then
  set dummy ${ac_tool_prefix}dlltool; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$DLLTOOL"; then
@@ -12857,7 +12796,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -12885,7 +12824,7 @@ if test -z "$ac_cv_prog_DLLTOOL"; then
  set dummy dlltool; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_DLLTOOL"; then
@@ -12897,7 +12836,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_DLLTOOL="dlltool"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -12942,7 +12881,7 @@ test -z "$DLLTOOL" && DLLTOOL=dlltool
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5
  $as_echo_n "checking how to associate runtime and link libraries... " >&6; }
-if test "${lt_cv_sharedlib_from_linklib_cmd+set}" = set; then :
+if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_sharedlib_from_linklib_cmd='unknown'
@@ -12985,7 +12924,7 @@ if test -n "$ac_tool_prefix"; then
  set dummy $ac_tool_prefix$ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR+set}" = set; then :
+if ${ac_cv_prog_AR+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$AR"; then
@@ -12997,7 +12936,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_AR="$ac_tool_prefix$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -13029,7 +12968,7 @@ do
  set dummy $ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_AR+set}" = set; then :
+if ${ac_cv_prog_ac_ct_AR+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_AR"; then
@@ -13041,7 +12980,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_AR="$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -13093,7 +13032,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5
  $as_echo_n "checking for archiver @FILE support... " >&6; }
-if test "${lt_cv_ar_at_file+set}" = set; then :
+if ${lt_cv_ar_at_file+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_ar_at_file=no
@@ -13154,7 +13093,7 @@ if test -n "$ac_tool_prefix"; then
  set dummy ${ac_tool_prefix}strip; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP+set}" = set; then :
+if ${ac_cv_prog_STRIP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$STRIP"; then
@@ -13166,7 +13105,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_STRIP="${ac_tool_prefix}strip"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -13194,7 +13133,7 @@ if test -z "$ac_cv_prog_STRIP"; then
  set dummy strip; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then :
+if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_STRIP"; then
@@ -13206,7 +13145,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_STRIP="strip"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -13253,7 +13192,7 @@ if test -n "$ac_tool_prefix"; then
  set dummy ${ac_tool_prefix}ranlib; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB+set}" = set; then :
+if ${ac_cv_prog_RANLIB+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$RANLIB"; then
@@ -13265,7 +13204,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -13293,7 +13232,7 @@ if test -z "$ac_cv_prog_RANLIB"; then
  set dummy ranlib; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then :
+if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_RANLIB"; then
@@ -13305,7 +13244,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_RANLIB="ranlib"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -13422,7 +13361,7 @@ compiler=$CC
  # Check for command to grab the raw symbol name followed by C symbol from nm.
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5
  $as_echo_n "checking command to parse $NM output from $compiler object... " >&6; }
-if test "${lt_cv_sys_global_symbol_pipe+set}" = set; then :
+if ${lt_cv_sys_global_symbol_pipe+:} false; then :
    $as_echo_n "(cached) " >&6
  else
  
@@ -13720,7 +13659,7 @@ case ${with_sysroot} in #(
   *)
     { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5
  $as_echo "${with_sysroot}" >&6; }
-   as_fn_error "The sysroot must be an absolute path." "$LINENO" 5
+   as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5
     ;;
  esac
  
@@ -13856,7 +13795,7 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
    CFLAGS="$CFLAGS -belf"
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5
  $as_echo_n "checking whether the C compiler needs -belf... " >&6; }
-if test "${lt_cv_cc_needs_belf+set}" = set; then :
+if ${lt_cv_cc_needs_belf+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_ext=c
@@ -13942,7 +13881,7 @@ if test -n "$ac_tool_prefix"; then
  set dummy ${ac_tool_prefix}mt; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_MANIFEST_TOOL+set}" = set; then :
+if ${ac_cv_prog_MANIFEST_TOOL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$MANIFEST_TOOL"; then
@@ -13954,7 +13893,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -13982,7 +13921,7 @@ if test -z "$ac_cv_prog_MANIFEST_TOOL"; then
  set dummy mt; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_MANIFEST_TOOL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_MANIFEST_TOOL"; then
@@ -13994,7 +13933,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_MANIFEST_TOOL="mt"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14032,7 +13971,7 @@ fi
  test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5
  $as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; }
-if test "${lt_cv_path_mainfest_tool+set}" = set; then :
+if ${lt_cv_path_mainfest_tool+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_path_mainfest_tool=no
@@ -14062,7 +14001,7 @@ fi
  set dummy ${ac_tool_prefix}dsymutil; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DSYMUTIL+set}" = set; then :
+if ${ac_cv_prog_DSYMUTIL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$DSYMUTIL"; then
@@ -14074,7 +14013,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14102,7 +14041,7 @@ if test -z "$ac_cv_prog_DSYMUTIL"; then
  set dummy dsymutil; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DSYMUTIL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_DSYMUTIL"; then
@@ -14114,7 +14053,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_DSYMUTIL="dsymutil"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14154,7 +14093,7 @@ fi
  set dummy ${ac_tool_prefix}nmedit; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_NMEDIT+set}" = set; then :
+if ${ac_cv_prog_NMEDIT+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$NMEDIT"; then
@@ -14166,7 +14105,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14194,7 +14133,7 @@ if test -z "$ac_cv_prog_NMEDIT"; then
  set dummy nmedit; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_NMEDIT+set}" = set; then :
+if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_NMEDIT"; then
@@ -14206,7 +14145,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_NMEDIT="nmedit"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14246,7 +14185,7 @@ fi
  set dummy ${ac_tool_prefix}lipo; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LIPO+set}" = set; then :
+if ${ac_cv_prog_LIPO+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$LIPO"; then
@@ -14258,7 +14197,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_LIPO="${ac_tool_prefix}lipo"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14286,7 +14225,7 @@ if test -z "$ac_cv_prog_LIPO"; then
  set dummy lipo; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_LIPO+set}" = set; then :
+if ${ac_cv_prog_ac_ct_LIPO+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_LIPO"; then
@@ -14298,7 +14237,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_LIPO="lipo"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14338,7 +14277,7 @@ fi
  set dummy ${ac_tool_prefix}otool; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OTOOL+set}" = set; then :
+if ${ac_cv_prog_OTOOL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$OTOOL"; then
@@ -14350,7 +14289,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_OTOOL="${ac_tool_prefix}otool"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14378,7 +14317,7 @@ if test -z "$ac_cv_prog_OTOOL"; then
  set dummy otool; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_OTOOL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_OTOOL+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_OTOOL"; then
@@ -14390,7 +14329,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_OTOOL="otool"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14430,7 +14369,7 @@ fi
  set dummy ${ac_tool_prefix}otool64; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OTOOL64+set}" = set; then :
+if ${ac_cv_prog_OTOOL64+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$OTOOL64"; then
@@ -14442,7 +14381,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14470,7 +14409,7 @@ if test -z "$ac_cv_prog_OTOOL64"; then
  set dummy otool64; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_OTOOL64+set}" = set; then :
+if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_OTOOL64"; then
@@ -14482,7 +14421,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_OTOOL64="otool64"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -14545,7 +14484,7 @@ fi
  
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5
  $as_echo_n "checking for -single_module linker flag... " >&6; }
-if test "${lt_cv_apple_cc_single_mod+set}" = set; then :
+if ${lt_cv_apple_cc_single_mod+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_apple_cc_single_mod=no
@@ -14581,7 +14520,7 @@ $as_echo "$lt_cv_apple_cc_single_mod" >&6; }
  
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5
  $as_echo_n "checking for -exported_symbols_list linker flag... " >&6; }
-if test "${lt_cv_ld_exported_symbols_list+set}" = set; then :
+if ${lt_cv_ld_exported_symbols_list+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_ld_exported_symbols_list=no
@@ -14614,7 +14553,7 @@ $as_echo "$lt_cv_ld_exported_symbols_list" >&6; }
  
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5
  $as_echo_n "checking for -force_load linker flag... " >&6; }
-if test "${lt_cv_ld_force_load+set}" = set; then :
+if ${lt_cv_ld_force_load+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_ld_force_load=no
@@ -14681,11 +14620,140 @@ $as_echo "$lt_cv_ld_force_load" >&6; }
      ;;
    esac
  
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if ${ac_cv_header_stdc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_header_stdc=yes
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then :
+  :
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+                  (('a' <= (c) && (c) <= 'i') \
+                    || ('j' <= (c) && (c) <= 'r') \
+                    || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+       || toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+  ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+                 inttypes.h stdint.h unistd.h
+do :
+  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
  for ac_header in dlfcn.h
  do :
    ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default
  "
-if test "x$ac_cv_header_dlfcn_h" = x""yes; then :
+if test "x$ac_cv_header_dlfcn_h" = xyes; then :
    cat >>confdefs.h <<_ACEOF
  #define HAVE_DLFCN_H 1
  _ACEOF
@@ -14901,7 +14969,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5
  $as_echo_n "checking for objdir... " >&6; }
-if test "${lt_cv_objdir+set}" = set; then :
+if ${lt_cv_objdir+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    rm -f .libs 2>/dev/null
@@ -14979,7 +15047,7 @@ file_magic*)
    if test "$file_magic_cmd" = '$MAGIC_CMD'; then
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5
  $as_echo_n "checking for ${ac_tool_prefix}file... " >&6; }
-if test "${lt_cv_path_MAGIC_CMD+set}" = set; then :
+if ${lt_cv_path_MAGIC_CMD+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $MAGIC_CMD in
@@ -15045,7 +15113,7 @@ if test -z "$lt_cv_path_MAGIC_CMD"; then
    if test -n "$ac_tool_prefix"; then
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5
  $as_echo_n "checking for file... " >&6; }
-if test "${lt_cv_path_MAGIC_CMD+set}" = set; then :
+if ${lt_cv_path_MAGIC_CMD+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $MAGIC_CMD in
@@ -15183,7 +15251,7 @@ if test "$GCC" = yes; then
  
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5
  $as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; }
-if test "${lt_cv_prog_compiler_rtti_exceptions+set}" = set; then :
+if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_rtti_exceptions=no
@@ -15545,7 +15613,7 @@ esac
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
  $as_echo_n "checking for $compiler option to produce PIC... " >&6; }
-if test "${lt_cv_prog_compiler_pic+set}" = set; then :
+if ${lt_cv_prog_compiler_pic+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_pic=$lt_prog_compiler_pic
@@ -15560,7 +15628,7 @@ lt_prog_compiler_pic=$lt_cv_prog_compiler_pic
  if test -n "$lt_prog_compiler_pic"; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5
  $as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; }
-if test "${lt_cv_prog_compiler_pic_works+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_works+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_pic_works=no
@@ -15624,7 +15692,7 @@ fi
  wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\"
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
  $as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
-if test "${lt_cv_prog_compiler_static_works+set}" = set; then :
+if ${lt_cv_prog_compiler_static_works+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_static_works=no
@@ -15667,7 +15735,7 @@ fi
  
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
  $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_c_o=no
@@ -15722,7 +15790,7 @@ $as_echo "$lt_cv_prog_compiler_c_o" >&6; }
  
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
  $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_c_o=no
@@ -16285,7 +16353,7 @@ _LT_EOF
          if test "${lt_cv_aix_libpath+set}" = set; then
    aix_libpath=$lt_cv_aix_libpath
  else
-  if test "${lt_cv_aix_libpath_+set}" = set; then :
+  if ${lt_cv_aix_libpath_+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -16338,7 +16406,7 @@ fi
          if test "${lt_cv_aix_libpath+set}" = set; then
    aix_libpath=$lt_cv_aix_libpath
  else
-  if test "${lt_cv_aix_libpath_+set}" = set; then :
+  if ${lt_cv_aix_libpath_+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -16607,7 +16675,7 @@ fi
           # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does)
           { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5
  $as_echo_n "checking if $CC understands -b... " >&6; }
-if test "${lt_cv_prog_compiler__b+set}" = set; then :
+if ${lt_cv_prog_compiler__b+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler__b=no
@@ -16676,7 +16744,7 @@ fi
         # This should be the same for all languages, so no per-tag cache variable.
         { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5
  $as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; }
-if test "${lt_cv_irix_exported_symbol+set}" = set; then :
+if ${lt_cv_irix_exported_symbol+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    save_LDFLAGS="$LDFLAGS"
@@ -16992,7 +17060,7 @@ x|xyes)
        # to ld, don't add -lc before -lgcc.
        { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
  $as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
-if test "${lt_cv_archive_cmds_need_lc+set}" = set; then :
+if ${lt_cv_archive_cmds_need_lc+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    $RM conftest*
@@ -17685,7 +17753,7 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu)
    shlibpath_overrides_runpath=no
  
    # Some binutils ld are patched to set DT_RUNPATH
-  if test "${lt_cv_shlibpath_overrides_runpath+set}" = set; then :
+  if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_shlibpath_overrides_runpath=no
@@ -18105,7 +18173,7 @@ else
    # if libdl is installed we need to link against it
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
  $as_echo_n "checking for dlopen in -ldl... " >&6; }
-if test "${ac_cv_lib_dl_dlopen+set}" = set; then :
+if ${ac_cv_lib_dl_dlopen+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -18139,7 +18207,7 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
  $as_echo "$ac_cv_lib_dl_dlopen" >&6; }
-if test "x$ac_cv_lib_dl_dlopen" = x""yes; then :
+if test "x$ac_cv_lib_dl_dlopen" = xyes; then :
    lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
  else
  
@@ -18153,12 +18221,12 @@ fi
  
    *)
      ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load"
-if test "x$ac_cv_func_shl_load" = x""yes; then :
+if test "x$ac_cv_func_shl_load" = xyes; then :
    lt_cv_dlopen="shl_load"
  else
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5
  $as_echo_n "checking for shl_load in -ldld... " >&6; }
-if test "${ac_cv_lib_dld_shl_load+set}" = set; then :
+if ${ac_cv_lib_dld_shl_load+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -18192,16 +18260,16 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5
  $as_echo "$ac_cv_lib_dld_shl_load" >&6; }
-if test "x$ac_cv_lib_dld_shl_load" = x""yes; then :
+if test "x$ac_cv_lib_dld_shl_load" = xyes; then :
    lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"
  else
    ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen"
-if test "x$ac_cv_func_dlopen" = x""yes; then :
+if test "x$ac_cv_func_dlopen" = xyes; then :
    lt_cv_dlopen="dlopen"
  else
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
  $as_echo_n "checking for dlopen in -ldl... " >&6; }
-if test "${ac_cv_lib_dl_dlopen+set}" = set; then :
+if ${ac_cv_lib_dl_dlopen+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -18235,12 +18303,12 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
  $as_echo "$ac_cv_lib_dl_dlopen" >&6; }
-if test "x$ac_cv_lib_dl_dlopen" = x""yes; then :
+if test "x$ac_cv_lib_dl_dlopen" = xyes; then :
    lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
  else
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5
  $as_echo_n "checking for dlopen in -lsvld... " >&6; }
-if test "${ac_cv_lib_svld_dlopen+set}" = set; then :
+if ${ac_cv_lib_svld_dlopen+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -18274,12 +18342,12 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5
  $as_echo "$ac_cv_lib_svld_dlopen" >&6; }
-if test "x$ac_cv_lib_svld_dlopen" = x""yes; then :
+if test "x$ac_cv_lib_svld_dlopen" = xyes; then :
    lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"
  else
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5
  $as_echo_n "checking for dld_link in -ldld... " >&6; }
-if test "${ac_cv_lib_dld_dld_link+set}" = set; then :
+if ${ac_cv_lib_dld_dld_link+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -18313,7 +18381,7 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5
  $as_echo "$ac_cv_lib_dld_dld_link" >&6; }
-if test "x$ac_cv_lib_dld_dld_link" = x""yes; then :
+if test "x$ac_cv_lib_dld_dld_link" = xyes; then :
    lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"
  fi
  
@@ -18354,7 +18422,7 @@ fi
  
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5
  $as_echo_n "checking whether a program can dlopen itself... " >&6; }
-if test "${lt_cv_dlopen_self+set}" = set; then :
+if ${lt_cv_dlopen_self+:} false; then :
    $as_echo_n "(cached) " >&6
  else
           if test "$cross_compiling" = yes; then :
@@ -18460,7 +18528,7 @@ $as_echo "$lt_cv_dlopen_self" >&6; }
        wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
        { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5
  $as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; }
-if test "${lt_cv_dlopen_self_static+set}" = set; then :
+if ${lt_cv_dlopen_self_static+:} false; then :
    $as_echo_n "(cached) " >&6
  else
           if test "$cross_compiling" = yes; then :
@@ -18697,7 +18765,7 @@ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
  $as_echo_n "checking how to run the C++ preprocessor... " >&6; }
  if test -z "$CXXCPP"; then
-  if test "${ac_cv_prog_CXXCPP+set}" = set; then :
+  if ${ac_cv_prog_CXXCPP+:} false; then :
    $as_echo_n "(cached) " >&6
  else
        # Double quotes because CXXCPP needs to be expanded
@@ -18727,7 +18795,7 @@ else
    # Broken: fails on valid input.
  continue
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
    # OK, works on sane cases.  Now check whether nonexistent headers
    # can be detected and how.
@@ -18743,11 +18811,11 @@ else
  ac_preproc_ok=:
  break
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
  done
  # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
  if $ac_preproc_ok; then :
    break
  fi
@@ -18786,7 +18854,7 @@ else
    # Broken: fails on valid input.
  continue
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
    # OK, works on sane cases.  Now check whether nonexistent headers
    # can be detected and how.
@@ -18802,18 +18870,18 @@ else
  ac_preproc_ok=:
  break
  fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
  
  done
  # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
  if $ac_preproc_ok; then :
  
  else
    { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "C++ preprocessor \"$CXXCPP\" fails sanity check
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
  fi
  
  ac_ext=c
@@ -18999,7 +19067,7 @@ else
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
  $as_echo_n "checking for non-GNU ld... " >&6; }
  fi
-if test "${lt_cv_path_LD+set}" = set; then :
+if ${lt_cv_path_LD+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -z "$LD"; then
@@ -19036,10 +19104,10 @@ else
    { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
  $as_echo "no" >&6; }
  fi
-test -z "$LD" && as_fn_error "no acceptable ld found in \$PATH" "$LINENO" 5
+test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
  $as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
-if test "${lt_cv_prog_gnu_ld+set}" = set; then :
+if ${lt_cv_prog_gnu_ld+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    # I'd rather use --version here, but apparently some GNU lds only accept -v.
@@ -19211,7 +19279,7 @@ $as_echo_n "checking whether the $compiler linker ($LD) supports shared librarie
            if test "${lt_cv_aix_libpath+set}" = set; then
    aix_libpath=$lt_cv_aix_libpath
  else
-  if test "${lt_cv_aix_libpath__CXX+set}" = set; then :
+  if ${lt_cv_aix_libpath__CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -19265,7 +19333,7 @@ fi
             if test "${lt_cv_aix_libpath+set}" = set; then
    aix_libpath=$lt_cv_aix_libpath
  else
-  if test "${lt_cv_aix_libpath__CXX+set}" = set; then :
+  if ${lt_cv_aix_libpath__CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -20673,7 +20741,7 @@ esac
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
  $as_echo_n "checking for $compiler option to produce PIC... " >&6; }
-if test "${lt_cv_prog_compiler_pic_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX
@@ -20688,7 +20756,7 @@ lt_prog_compiler_pic_CXX=$lt_cv_prog_compiler_pic_CXX
  if test -n "$lt_prog_compiler_pic_CXX"; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5
  $as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; }
-if test "${lt_cv_prog_compiler_pic_works_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_works_CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_pic_works_CXX=no
@@ -20746,7 +20814,7 @@ fi
  wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\"
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
  $as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
-if test "${lt_cv_prog_compiler_static_works_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_static_works_CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_static_works_CXX=no
@@ -20786,7 +20854,7 @@ fi
  
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
  $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o_CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_c_o_CXX=no
@@ -20838,7 +20906,7 @@ $as_echo "$lt_cv_prog_compiler_c_o_CXX" >&6; }
  
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
  $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o_CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_c_o_CXX=no
@@ -20979,7 +21047,7 @@ x|xyes)
        # to ld, don't add -lc before -lgcc.
        { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
  $as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
-if test "${lt_cv_archive_cmds_need_lc_CXX+set}" = set; then :
+if ${lt_cv_archive_cmds_need_lc_CXX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    $RM conftest*
@@ -21516,7 +21584,7 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu)
    shlibpath_overrides_runpath=no
  
    # Some binutils ld are patched to set DT_RUNPATH
-  if test "${lt_cv_shlibpath_overrides_runpath+set}" = set; then :
+  if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_shlibpath_overrides_runpath=no
@@ -22338,7 +22406,7 @@ esac
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
  $as_echo_n "checking for $compiler option to produce PIC... " >&6; }
-if test "${lt_cv_prog_compiler_pic_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_F77+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_pic_F77=$lt_prog_compiler_pic_F77
@@ -22353,7 +22421,7 @@ lt_prog_compiler_pic_F77=$lt_cv_prog_compiler_pic_F77
  if test -n "$lt_prog_compiler_pic_F77"; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works" >&5
  $as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works... " >&6; }
-if test "${lt_cv_prog_compiler_pic_works_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_works_F77+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_pic_works_F77=no
@@ -22411,7 +22479,7 @@ fi
  wl=$lt_prog_compiler_wl_F77 eval lt_tmp_static_flag=\"$lt_prog_compiler_static_F77\"
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
  $as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
-if test "${lt_cv_prog_compiler_static_works_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_static_works_F77+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_static_works_F77=no
@@ -22451,7 +22519,7 @@ fi
  
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
  $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o_F77+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_c_o_F77=no
@@ -22503,7 +22571,7 @@ $as_echo "$lt_cv_prog_compiler_c_o_F77" >&6; }
  
      { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
  $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o_F77+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_prog_compiler_c_o_F77=no
@@ -23063,7 +23131,7 @@ _LT_EOF
          if test "${lt_cv_aix_libpath+set}" = set; then
    aix_libpath=$lt_cv_aix_libpath
  else
-  if test "${lt_cv_aix_libpath__F77+set}" = set; then :
+  if ${lt_cv_aix_libpath__F77+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat > conftest.$ac_ext <<_ACEOF
@@ -23110,7 +23178,7 @@ fi
          if test "${lt_cv_aix_libpath+set}" = set; then
    aix_libpath=$lt_cv_aix_libpath
  else
-  if test "${lt_cv_aix_libpath__F77+set}" = set; then :
+  if ${lt_cv_aix_libpath__F77+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat > conftest.$ac_ext <<_ACEOF
@@ -23403,7 +23471,7 @@ fi
         # This should be the same for all languages, so no per-tag cache variable.
         { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5
  $as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; }
-if test "${lt_cv_irix_exported_symbol+set}" = set; then :
+if ${lt_cv_irix_exported_symbol+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    save_LDFLAGS="$LDFLAGS"
@@ -23711,7 +23779,7 @@ x|xyes)
        # to ld, don't add -lc before -lgcc.
        { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
  $as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
-if test "${lt_cv_archive_cmds_need_lc_F77+set}" = set; then :
+if ${lt_cv_archive_cmds_need_lc_F77+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    $RM conftest*
@@ -24248,7 +24316,7 @@ linux* | k*bsd*-gnu | kopensolaris*-gnu)
    shlibpath_overrides_runpath=no
  
    # Some binutils ld are patched to set DT_RUNPATH
-  if test "${lt_cv_shlibpath_overrides_runpath+set}" = set; then :
+  if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    lt_cv_shlibpath_overrides_runpath=no
@@ -24621,7 +24689,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
  if test "$enable_shared" = yes && test "$enable_static" = yes; then
    case $library_names_spec in
      *libname.a*)
-      as_fn_error "cannot create both shared and static libraries on this system, --disable one of the two" "$LINENO" 5
+      as_fn_error $? "cannot create both shared and static libraries on this system, --disable one of the two" "$LINENO" 5
        ;;
    esac
  fi
@@ -24641,7 +24709,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
  $as_echo_n "checking for ANSI C header files... " >&6; }
-if test "${ac_cv_header_stdc+set}" = set; then :
+if ${ac_cv_header_stdc+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -24753,7 +24821,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether time.h and sys/time.h may both be included" >&5
  $as_echo_n "checking whether time.h and sys/time.h may both be included... " >&6; }
-if test "${ac_cv_header_time+set}" = set; then :
+if ${ac_cv_header_time+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -24816,8 +24884,7 @@ for ac_header in fcntl.h float.h invent.h langinfo.h locale.h nl_types.h sys/att
  do :
    as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
  ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
-eval as_val=\$$as_ac_Header
-   if test "x$as_val" = x""yes; then :
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
    cat >>confdefs.h <<_ACEOF
  #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
  _ACEOF
@@ -24841,7 +24908,7 @@ do :
  # endif
  #endif
  "
-if test "x$ac_cv_header_sys_resource_h" = x""yes; then :
+if test "x$ac_cv_header_sys_resource_h" = xyes; then :
    cat >>confdefs.h <<_ACEOF
  #define HAVE_SYS_RESOURCE_H 1
  _ACEOF
@@ -24858,7 +24925,7 @@ do :
  # include <sys/param.h>
  #endif
  "
-if test "x$ac_cv_header_sys_sysctl_h" = x""yes; then :
+if test "x$ac_cv_header_sys_sysctl_h" = xyes; then :
    cat >>confdefs.h <<_ACEOF
  #define HAVE_SYS_SYSCTL_H 1
  _ACEOF
@@ -24875,7 +24942,7 @@ do :
  # include <sys/sysinfo.h>
  #endif
  "
-if test "x$ac_cv_header_machine_hal_sysinfo_h" = x""yes; then :
+if test "x$ac_cv_header_machine_hal_sysinfo_h" = xyes; then :
    cat >>confdefs.h <<_ACEOF
  #define HAVE_MACHINE_HAL_SYSINFO_H 1
  _ACEOF
@@ -24897,7 +24964,7 @@ done
  # to the man page (but aren't), in glibc they're in stdio.h.
  #
  ac_fn_c_check_decl "$LINENO" "fgetc" "ac_cv_have_decl_fgetc" "$ac_includes_default"
-if test "x$ac_cv_have_decl_fgetc" = x""yes; then :
+if test "x$ac_cv_have_decl_fgetc" = xyes; then :
    ac_have_decl=1
  else
    ac_have_decl=0
@@ -24907,7 +24974,7 @@ cat >>confdefs.h <<_ACEOF
  #define HAVE_DECL_FGETC $ac_have_decl
  _ACEOF
  ac_fn_c_check_decl "$LINENO" "fscanf" "ac_cv_have_decl_fscanf" "$ac_includes_default"
-if test "x$ac_cv_have_decl_fscanf" = x""yes; then :
+if test "x$ac_cv_have_decl_fscanf" = xyes; then :
    ac_have_decl=1
  else
    ac_have_decl=0
@@ -24917,7 +24984,7 @@ cat >>confdefs.h <<_ACEOF
  #define HAVE_DECL_FSCANF $ac_have_decl
  _ACEOF
  ac_fn_c_check_decl "$LINENO" "optarg" "ac_cv_have_decl_optarg" "$ac_includes_default"
-if test "x$ac_cv_have_decl_optarg" = x""yes; then :
+if test "x$ac_cv_have_decl_optarg" = xyes; then :
    ac_have_decl=1
  else
    ac_have_decl=0
@@ -24927,7 +24994,7 @@ cat >>confdefs.h <<_ACEOF
  #define HAVE_DECL_OPTARG $ac_have_decl
  _ACEOF
  ac_fn_c_check_decl "$LINENO" "ungetc" "ac_cv_have_decl_ungetc" "$ac_includes_default"
-if test "x$ac_cv_have_decl_ungetc" = x""yes; then :
+if test "x$ac_cv_have_decl_ungetc" = xyes; then :
    ac_have_decl=1
  else
    ac_have_decl=0
@@ -24937,7 +25004,7 @@ cat >>confdefs.h <<_ACEOF
  #define HAVE_DECL_UNGETC $ac_have_decl
  _ACEOF
  ac_fn_c_check_decl "$LINENO" "vfprintf" "ac_cv_have_decl_vfprintf" "$ac_includes_default"
-if test "x$ac_cv_have_decl_vfprintf" = x""yes; then :
+if test "x$ac_cv_have_decl_vfprintf" = xyes; then :
    ac_have_decl=1
  else
    ac_have_decl=0
@@ -24950,7 +25017,7 @@ _ACEOF
  ac_fn_c_check_decl "$LINENO" "sys_errlist" "ac_cv_have_decl_sys_errlist" "#include <stdio.h>
  #include <errno.h>
  "
-if test "x$ac_cv_have_decl_sys_errlist" = x""yes; then :
+if test "x$ac_cv_have_decl_sys_errlist" = xyes; then :
    ac_have_decl=1
  else
    ac_have_decl=0
@@ -24962,7 +25029,7 @@ _ACEOF
  ac_fn_c_check_decl "$LINENO" "sys_nerr" "ac_cv_have_decl_sys_nerr" "#include <stdio.h>
  #include <errno.h>
  "
-if test "x$ac_cv_have_decl_sys_nerr" = x""yes; then :
+if test "x$ac_cv_have_decl_sys_nerr" = xyes; then :
    ac_have_decl=1
  else
    ac_have_decl=0
@@ -24975,7 +25042,7 @@ _ACEOF
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking return type of signal handlers" >&5
  $as_echo_n "checking return type of signal handlers... " >&6; }
-if test "${ac_cv_type_signal+set}" = set; then :
+if ${ac_cv_type_signal+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -25018,7 +25085,7 @@ _ACEOF
  # the default includes are sufficient for all these types
  #
  ac_fn_c_check_type "$LINENO" "intmax_t" "ac_cv_type_intmax_t" "$ac_includes_default"
-if test "x$ac_cv_type_intmax_t" = x""yes; then :
+if test "x$ac_cv_type_intmax_t" = xyes; then :
  
  cat >>confdefs.h <<_ACEOF
  #define HAVE_INTMAX_T 1
@@ -25027,7 +25094,7 @@ _ACEOF
  
  fi
  ac_fn_c_check_type "$LINENO" "long double" "ac_cv_type_long_double" "$ac_includes_default"
-if test "x$ac_cv_type_long_double" = x""yes; then :
+if test "x$ac_cv_type_long_double" = xyes; then :
  
  cat >>confdefs.h <<_ACEOF
  #define HAVE_LONG_DOUBLE 1
@@ -25036,7 +25103,7 @@ _ACEOF
  
  fi
  ac_fn_c_check_type "$LINENO" "long long" "ac_cv_type_long_long" "$ac_includes_default"
-if test "x$ac_cv_type_long_long" = x""yes; then :
+if test "x$ac_cv_type_long_long" = xyes; then :
  
  cat >>confdefs.h <<_ACEOF
  #define HAVE_LONG_LONG 1
@@ -25045,7 +25112,7 @@ _ACEOF
  
  fi
  ac_fn_c_check_type "$LINENO" "ptrdiff_t" "ac_cv_type_ptrdiff_t" "$ac_includes_default"
-if test "x$ac_cv_type_ptrdiff_t" = x""yes; then :
+if test "x$ac_cv_type_ptrdiff_t" = xyes; then :
  
  cat >>confdefs.h <<_ACEOF
  #define HAVE_PTRDIFF_T 1
@@ -25054,7 +25121,7 @@ _ACEOF
  
  fi
  ac_fn_c_check_type "$LINENO" "quad_t" "ac_cv_type_quad_t" "$ac_includes_default"
-if test "x$ac_cv_type_quad_t" = x""yes; then :
+if test "x$ac_cv_type_quad_t" = xyes; then :
  
  cat >>confdefs.h <<_ACEOF
  #define HAVE_QUAD_T 1
@@ -25063,7 +25130,7 @@ _ACEOF
  
  fi
  ac_fn_c_check_type "$LINENO" "uint_least32_t" "ac_cv_type_uint_least32_t" "$ac_includes_default"
-if test "x$ac_cv_type_uint_least32_t" = x""yes; then :
+if test "x$ac_cv_type_uint_least32_t" = xyes; then :
  
  cat >>confdefs.h <<_ACEOF
  #define HAVE_UINT_LEAST32_T 1
@@ -25072,7 +25139,7 @@ _ACEOF
  
  fi
  ac_fn_c_check_type "$LINENO" "intptr_t" "ac_cv_type_intptr_t" "$ac_includes_default"
-if test "x$ac_cv_type_intptr_t" = x""yes; then :
+if test "x$ac_cv_type_intptr_t" = xyes; then :
  
  cat >>confdefs.h <<_ACEOF
  #define HAVE_INTPTR_T 1
@@ -25084,7 +25151,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for preprocessor stringizing operator" >&5
  $as_echo_n "checking for preprocessor stringizing operator... " >&6; }
-if test "${ac_cv_c_stringize+set}" = set; then :
+if ${ac_cv_c_stringize+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -25116,7 +25183,7 @@ fi
  # But we don't use it in C++ currently.
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working volatile" >&5
  $as_echo_n "checking for working volatile... " >&6; }
-if test "${ac_cv_c_volatile+set}" = set; then :
+if ${ac_cv_c_volatile+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -25151,7 +25218,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C/C++ restrict keyword" >&5
  $as_echo_n "checking for C/C++ restrict keyword... " >&6; }
-if test "${ac_cv_c_restrict+set}" = set; then :
+if ${ac_cv_c_restrict+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_cv_c_restrict=no
@@ -25198,7 +25265,7 @@ _ACEOF
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether <stdarg.h> exists and works" >&5
  $as_echo_n "checking whether <stdarg.h> exists and works... " >&6; }
-if test "${gmp_cv_c_stdarg+set}" = set; then :
+if ${gmp_cv_c_stdarg+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -25239,7 +25306,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((const)) works" >&5
  $as_echo_n "checking whether gcc __attribute__ ((const)) works... " >&6; }
-if test "${gmp_cv_c_attribute_const+set}" = set; then :
+if ${gmp_cv_c_attribute_const+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -25271,7 +25338,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((malloc)) works" >&5
  $as_echo_n "checking whether gcc __attribute__ ((malloc)) works... " >&6; }
-if test "${gmp_cv_c_attribute_malloc+set}" = set; then :
+if ${gmp_cv_c_attribute_malloc+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.c <<EOF
@@ -25305,7 +25372,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((mode (XX))) works" >&5
  $as_echo_n "checking whether gcc __attribute__ ((mode (XX))) works... " >&6; }
-if test "${gmp_cv_c_attribute_mode+set}" = set; then :
+if ${gmp_cv_c_attribute_mode+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -25337,7 +25404,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((noreturn)) works" >&5
  $as_echo_n "checking whether gcc __attribute__ ((noreturn)) works... " >&6; }
-if test "${gmp_cv_c_attribute_noreturn+set}" = set; then :
+if ${gmp_cv_c_attribute_noreturn+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -25370,7 +25437,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5
  $as_echo_n "checking for inline... " >&6; }
-if test "${ac_cv_c_inline+set}" = set; then :
+if ${ac_cv_c_inline+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_cv_c_inline=no
@@ -25459,7 +25526,7 @@ case $host in
  *-ncr-sysv4.3*)
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mwvalidcheckl in -lmw" >&5
  $as_echo_n "checking for _mwvalidcheckl in -lmw... " >&6; }
-if test "${ac_cv_lib_mw__mwvalidcheckl+set}" = set; then :
+if ${ac_cv_lib_mw__mwvalidcheckl+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -25493,13 +25560,13 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mw__mwvalidcheckl" >&5
  $as_echo "$ac_cv_lib_mw__mwvalidcheckl" >&6; }
-if test "x$ac_cv_lib_mw__mwvalidcheckl" = x""yes; then :
+if test "x$ac_cv_lib_mw__mwvalidcheckl" = xyes; then :
    LIBM="-lmw"
  fi
  
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5
  $as_echo_n "checking for cos in -lm... " >&6; }
-if test "${ac_cv_lib_m_cos+set}" = set; then :
+if ${ac_cv_lib_m_cos+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -25533,7 +25600,7 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5
  $as_echo "$ac_cv_lib_m_cos" >&6; }
-if test "x$ac_cv_lib_m_cos" = x""yes; then :
+if test "x$ac_cv_lib_m_cos" = xyes; then :
    LIBM="$LIBM -lm"
  fi
  
@@ -25541,7 +25608,7 @@ fi
  *)
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5
  $as_echo_n "checking for cos in -lm... " >&6; }
-if test "${ac_cv_lib_m_cos+set}" = set; then :
+if ${ac_cv_lib_m_cos+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -25575,7 +25642,7 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5
  $as_echo "$ac_cv_lib_m_cos" >&6; }
-if test "x$ac_cv_lib_m_cos" = x""yes; then :
+if test "x$ac_cv_lib_m_cos" = xyes; then :
    LIBM="-lm"
  fi
  
@@ -25589,7 +25656,7 @@ esac
  # for constant arguments.  Useless!
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5
  $as_echo_n "checking for working alloca.h... " >&6; }
-if test "${gmp_cv_header_alloca+set}" = set; then :
+if ${gmp_cv_header_alloca+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -25622,7 +25689,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca (via gmp-impl.h)" >&5
  $as_echo_n "checking for alloca (via gmp-impl.h)... " >&6; }
-if test "${gmp_cv_func_alloca+set}" = set; then :
+if ${gmp_cv_func_alloca+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -25662,7 +25729,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to allocate temporary memory" >&5
  $as_echo_n "checking how to allocate temporary memory... " >&6; }
-if test "${gmp_cv_option_alloca+set}" = set; then :
+if ${gmp_cv_option_alloca+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $enable_alloca in
@@ -25692,7 +25759,7 @@ $as_echo "$gmp_cv_option_alloca" >&6; }
  case $gmp_cv_option_alloca in
    alloca)
      if test $gmp_cv_func_alloca = no; then
-      as_fn_error "--enable-alloca=alloca specified, but alloca not available" "$LINENO" 5
+      as_fn_error $? "--enable-alloca=alloca specified, but alloca not available" "$LINENO" 5
      fi
      $as_echo "#define WANT_TMP_ALLOCA 1" >>confdefs.h
  
@@ -25715,7 +25782,7 @@ case $gmp_cv_option_alloca in
      ;;
    *)
      # checks at the start of configure.in should protect us
-    as_fn_error "unrecognised --enable-alloca=$gmp_cv_option_alloca" "$LINENO" 5
+    as_fn_error $? "unrecognised --enable-alloca=$gmp_cv_option_alloca" "$LINENO" 5
      ;;
  esac
  
@@ -25753,7 +25820,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
  
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
  $as_echo_n "checking whether byte ordering is bigendian... " >&6; }
-if test "${ac_cv_c_bigendian+set}" = set; then :
+if ${ac_cv_c_bigendian+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_cv_c_bigendian=unknown
@@ -25988,7 +26055,7 @@ $as_echo "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking format of \`double' floating point" >&5
  $as_echo_n "checking format of \`double' floating point... " >&6; }
-if test "${gmp_cv_c_double_format+set}" = set; then :
+if ${gmp_cv_c_double_format+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    gmp_cv_c_double_format=unknown
@@ -26247,22 +26314,17 @@ esac
  #   syssgi - IRIX specific
  #   times - not in mingw
  #
-# clock_gettime is in librt on *-*-osf5.1.  We could look for it
-# there, but that's not worth bothering with unless it has a decent
-# resolution (in a quick test clock_getres said only 1 millisecond).
-#
  # AC_FUNC_STRNLEN is not used because we don't want the AC_LIBOBJ
  # replacement setups it gives.  It detects a faulty strnlen on AIX, but
  # missing out on that test is ok since our only use of strnlen is in
  # __gmp_replacement_vsnprintf which is not required on AIX since it has a
  # vsnprintf.
  #
-for ac_func in alarm attr_get clock clock_gettime cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times
+for ac_func in alarm attr_get clock cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times
  do :
    as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
  ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
-eval as_val=\$$as_ac_var
-   if test "x$as_val" = x""yes; then :
+if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
    cat >>confdefs.h <<_ACEOF
  #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
  _ACEOF
@@ -26271,9 +26333,79 @@ fi
  done
  
  
+# clock_gettime is in librt on *-*-osf5.1 and on glibc, so att -lrt to
+# TUNE_LIBS if needed. On linux (tested on x86_32, 2.6.26),
+# clock_getres reports ns accuracy, while in a quick test on osf
+# clock_getres said only 1 millisecond.
+
+old_LIBS="$LIBS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5
+$as_echo_n "checking for library containing clock_gettime... " >&6; }
+if ${ac_cv_search_clock_gettime+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+int
+main ()
+{
+return clock_gettime ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_search_clock_gettime=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext
+  if ${ac_cv_search_clock_gettime+:} false; then :
+  break
+fi
+done
+if ${ac_cv_search_clock_gettime+:} false; then :
+
+else
+  ac_cv_search_clock_gettime=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5
+$as_echo "$ac_cv_search_clock_gettime" >&6; }
+ac_res=$ac_cv_search_clock_gettime
+if test "$ac_res" != no; then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+
+$as_echo "#define HAVE_CLOCK_GETTIME 1" >>confdefs.h
+
+fi
+
+TUNE_LIBS="$LIBS"
+LIBS="$old_LIBS"
+
+
+
  
  ac_fn_c_check_func "$LINENO" "vsnprintf" "ac_cv_func_vsnprintf"
-if test "x$ac_cv_func_vsnprintf" = x""yes; then :
+if test "x$ac_cv_func_vsnprintf" = xyes; then :
    gmp_vsnprintf_exists=yes
  else
    gmp_vsnprintf_exists=no
@@ -26284,7 +26416,7 @@ if test "$gmp_vsnprintf_exists" = no; then
  else
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether vsnprintf works" >&5
  $as_echo_n "checking whether vsnprintf works... " >&6; }
-if test "${gmp_cv_func_vsnprintf+set}" = set; then :
+if ${gmp_cv_func_vsnprintf+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    gmp_cv_func_vsnprintf=yes
@@ -26370,7 +26502,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether sscanf needs writable input" >&5
  $as_echo_n "checking whether sscanf needs writable input... " >&6; }
-if test "${gmp_cv_func_sscanf_writable_input+set}" = set; then :
+if ${gmp_cv_func_sscanf_writable_input+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $host in
@@ -26387,7 +26519,7 @@ case $gmp_cv_func_sscanf_writable_input in
  $as_echo "#define SSCANF_WRITABLE_INPUT 1" >>confdefs.h
   ;;
    no)  ;;
-  *)   as_fn_error "unrecognised \$gmp_cv_func_sscanf_writable_input" "$LINENO" 5 ;;
+  *)   as_fn_error $? "unrecognised \$gmp_cv_func_sscanf_writable_input" "$LINENO" 5 ;;
  esac
  
  
@@ -26396,7 +26528,7 @@ esac
  #
  ac_fn_c_check_member "$LINENO" "struct pst_processor" "psp_iticksperclktick" "ac_cv_member_struct_pst_processor_psp_iticksperclktick" "#include <sys/pstat.h>
  "
-if test "x$ac_cv_member_struct_pst_processor_psp_iticksperclktick" = x""yes; then :
+if test "x$ac_cv_member_struct_pst_processor_psp_iticksperclktick" = xyes; then :
  
  $as_echo "#define HAVE_PSP_ITICKSPERCLKTICK 1" >>confdefs.h
  
@@ -26421,7 +26553,7 @@ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
  for ac_header in sstream
  do :
    ac_fn_cxx_check_header_mongrel "$LINENO" "sstream" "ac_cv_header_sstream" "$ac_includes_default"
-if test "x$ac_cv_header_sstream" = x""yes; then :
+if test "x$ac_cv_header_sstream" = xyes; then :
    cat >>confdefs.h <<_ACEOF
  #define HAVE_SSTREAM 1
  _ACEOF
@@ -26432,7 +26564,7 @@ done
  
    ac_fn_cxx_check_type "$LINENO" "std::locale" "ac_cv_type_std__locale" "#include <locale>
  "
-if test "x$ac_cv_type_std__locale" = x""yes; then :
+if test "x$ac_cv_type_std__locale" = xyes; then :
  
  cat >>confdefs.h <<_ACEOF
  #define HAVE_STD__LOCALE 1
@@ -26464,8 +26596,8 @@ fi
  #       divrem_1 and pre_divrem_1.
  
  gmp_mpn_functions_optional="umul udiv                                  \
-  invert_limb sqr_diagonal                                             \
-  mul_2 mul_3 mul_4                                                    \
+  invert_limb sqr_diagonal sqr_diag_addlsh1                            \
+  mul_2 mul_3 mul_4 mul_5 mul_6                                                \
    addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8       \
    addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n                    \
    addlsh2_n sublsh2_n rsblsh2_n                                                \
@@ -26473,29 +26605,36 @@ gmp_mpn_functions_optional="umul udiv                                 \
    add_n_sub_n addaddmul_1msb0"
  
  gmp_mpn_functions="$extra_functions                                       \
-  add add_1 add_n sub sub_1 sub_n neg com mul_1 addmul_1                  \
-  submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2     \
+  add add_1 add_n sub sub_1 sub_n addcnd_n subcnd_n neg com               \
+  mul_1 addmul_1 submul_1                                                 \
+  add_err1_n add_err2_n add_err3_n sub_err1_n sub_err2_n sub_err3_n       \
+  lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2             \
    fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump            \
    mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc                                 \
    mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul          \
+  mulmid_basecase toom42_mulmid mulmid_n mulmid                                   \
    random random2 pow_1                                                    \
    rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp        \
    perfsqr perfpow                                                         \
-  gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step                    \
-  gcdext_lehmer gcdext_subdiv_step                                        \
-  div_q tdiv_qr jacbase get_d                                             \
-  matrix22_mul hgcd2 hgcd mullo_n mullo_basecase                          \
+  gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step                               \
+  gcdext_lehmer                                                                   \
+  div_q tdiv_qr jacbase jacobi_2 jacobi get_d                             \
+  matrix22_mul matrix22_mul1_inverse_vector                               \
+  hgcd_matrix hgcd2 hgcd_step hgcd_reduce hgcd hgcd_appr                  \
+  hgcd2_jacobi hgcd_jacobi                                                \
+  mullo_n mullo_basecase                                                  \
    toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul                  \
-  toom33_mul toom43_mul toom53_mul toom63_mul                             \
+  toom33_mul toom43_mul toom53_mul toom54_mul toom63_mul                  \
    toom44_mul                                                              \
    toom6h_mul toom6_sqr toom8h_mul toom8_sqr                               \
    toom_couple_handling                                                    \
    toom2_sqr toom3_sqr toom4_sqr                                                   \
-  toom_eval_dgr3_pm1 toom_eval_dgr3_pm2                                   \
+  toom_eval_dgr3_pm1 toom_eval_dgr3_pm2                                           \
    toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp          \
    toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts       \
    toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts     \
    invertappr invert binvert mulmod_bnm1 sqrmod_bnm1                       \
+  div_qr_2 div_qr_2n_pi1 div_qr_2u_pi1                                    \
    sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q                                \
    dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q                                \
    mu_div_qr mu_divappr_q mu_div_q                                         \
@@ -26503,11 +26642,13 @@ gmp_mpn_functions="$extra_functions                                      \
    sbpi1_bdiv_q sbpi1_bdiv_qr                                              \
    dcpi1_bdiv_q dcpi1_bdiv_qr                                              \
    mu_bdiv_q mu_bdiv_qr                                                    \
-  bdiv_q bdiv_qr                                                          \
-  divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec subcnd_n           \
-  redc_1_sec trialdiv remove                                              \
+  bdiv_q bdiv_qr broot brootinv bsqrt bsqrtinv                            \
+  divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec            \
+  sb_div_qr_sec sb_div_r_sec sbpi1_div_qr_sec sbpi1_div_r_sec             \
+  trialdiv remove                                                         \
    and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n                    \
-  copyi copyd zero                                                        \
+  copyi copyd zero tabselect                                              \
+  comb_tables                                                             \
    $gmp_mpn_functions_optional"
  
  
@@ -26605,6 +26746,14 @@ gmp_mpn_functions_optional=$remove_from_list_tmp
    pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
    mode1o)      tmp_fbase=modexact_1c_odd ;;
    pre_mod_1)   tmp_fbase=preinv_mod_1 ;;
+  mod_1_1)     tmp_fbase=mod_1_1p ;;
+  mod_1_1_cps) tmp_fbase=mod_1_1p_cps ;;
+  mod_1_2)     tmp_fbase=mod_1s_2p ;;
+  mod_1_2_cps) tmp_fbase=mod_1s_2p_cps ;;
+  mod_1_3)     tmp_fbase=mod_1s_3p ;;
+  mod_1_3_cps) tmp_fbase=mod_1s_3p_cps ;;
+  mod_1_4)     tmp_fbase=mod_1s_4p ;;
+  mod_1_4_cps) tmp_fbase=mod_1s_4p_cps ;;
    *)           tmp_fbase=$tmp_fn ;;
  esac
  
@@ -26643,6 +26792,14 @@ _ACEOF
    pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
    mode1o)      tmp_fbase=modexact_1c_odd ;;
    pre_mod_1)   tmp_fbase=preinv_mod_1 ;;
+  mod_1_1)     tmp_fbase=mod_1_1p ;;
+  mod_1_1_cps) tmp_fbase=mod_1_1p_cps ;;
+  mod_1_2)     tmp_fbase=mod_1s_2p ;;
+  mod_1_2_cps) tmp_fbase=mod_1s_2p_cps ;;
+  mod_1_3)     tmp_fbase=mod_1s_3p ;;
+  mod_1_3_cps) tmp_fbase=mod_1s_3p_cps ;;
+  mod_1_4)     tmp_fbase=mod_1s_4p ;;
+  mod_1_4_cps) tmp_fbase=mod_1s_4p_cps ;;
    *)           tmp_fbase=$tmp_fn ;;
  esac
  
@@ -26665,6 +26822,14 @@ esac
    pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
    mode1o)      tmp_fbase=modexact_1c_odd ;;
    pre_mod_1)   tmp_fbase=preinv_mod_1 ;;
+  mod_1_1)     tmp_fbase=mod_1_1p ;;
+  mod_1_1_cps) tmp_fbase=mod_1_1p_cps ;;
+  mod_1_2)     tmp_fbase=mod_1s_2p ;;
+  mod_1_2_cps) tmp_fbase=mod_1s_2p_cps ;;
+  mod_1_3)     tmp_fbase=mod_1s_3p ;;
+  mod_1_3_cps) tmp_fbase=mod_1s_3p_cps ;;
+  mod_1_4)     tmp_fbase=mod_1s_4p ;;
+  mod_1_4_cps) tmp_fbase=mod_1s_4p_cps ;;
    *)           tmp_fbase=$tmp_fn ;;
  esac
  
@@ -26735,6 +26900,13 @@ esac
  tmp_mulfunc=
  case $tmp_fn in
    add_n|sub_n)       tmp_mulfunc="aors_n"    ;;
+  add_err1_n|sub_err1_n)
+                    tmp_mulfunc="aors_err1_n" ;;
+  add_err2_n|sub_err2_n)
+                    tmp_mulfunc="aors_err2_n" ;;
+  add_err3_n|sub_err3_n)
+                    tmp_mulfunc="aors_err3_n" ;;
+  addcnd_n|subcnd_n) tmp_mulfunc="aorscnd_n"   ;;
    addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
    popcount|hamdist)  tmp_mulfunc="popham"    ;;
    and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
@@ -26760,6 +26932,10 @@ case $tmp_fn in
                      tmp_mulfunc="aorrlsh_n sorrlsh_n";;
    rsh1add_n|rsh1sub_n)
                      tmp_mulfunc="rsh1aors_n";;
+  sb_div_qr_sec|sb_div_r_sec)
+                    tmp_mulfunc="sb_div_sec";;
+  sbpi1_div_qr_sec|sbpi1_div_r_sec)
+                    tmp_mulfunc="sbpi1_div_sec";;
  esac
  
  
@@ -26768,6 +26944,17 @@ esac
            tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
            if test -f $tmp_file; then
  
+           # If the host uses a non-standard ABI, check if tmp_file supports it
+           #
+           if test -n "$GMP_NONSTD_ABI" && test $tmp_ext != "c"; then
+             abi=`sed -n 's/^[         ]*ABI_SUPPORT(\(.*\))/\1/p' $tmp_file `
+             if echo "$abi" | grep -q "\\b${GMP_NONSTD_ABI}\\b"; then
+               true
+             else
+               continue
+             fi
+           fi
+
              mpn_objects="$mpn_objects ${tmp_prefix}_$tmp_fn.lo"
              mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_prefix}_$tmp_fn.lo"
  
@@ -26777,6 +26964,14 @@ esac
    pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
    mode1o)      tmp_fbase=modexact_1c_odd ;;
    pre_mod_1)   tmp_fbase=preinv_mod_1 ;;
+  mod_1_1)     tmp_fbase=mod_1_1p ;;
+  mod_1_1_cps) tmp_fbase=mod_1_1p_cps ;;
+  mod_1_2)     tmp_fbase=mod_1s_2p ;;
+  mod_1_2_cps) tmp_fbase=mod_1s_2p_cps ;;
+  mod_1_3)     tmp_fbase=mod_1s_3p ;;
+  mod_1_3_cps) tmp_fbase=mod_1s_3p_cps ;;
+  mod_1_4)     tmp_fbase=mod_1s_4p ;;
+  mod_1_4_cps) tmp_fbase=mod_1s_4p_cps ;;
    *)           tmp_fbase=$tmp_fn ;;
  esac
  
@@ -26807,6 +27002,7 @@ define(OPERATION_$tmp_fn)
  define(__gmpn_$tmp_fbase, __gmpn_${tmp_fbase}_$tmp_suffix)
  define(__gmpn_$tmp_fbasec,__gmpn_${tmp_fbasec}_${tmp_suffix})
  define(__gmpn_preinv_${tmp_fbase},__gmpn_preinv_${tmp_fbase}_${tmp_suffix})
+define(__gmpn_${tmp_fbase}_cps,__gmpn_${tmp_fbase}_cps_${tmp_suffix})
  
  $tmp_d_n_l  For k6 and k7 gcd_1 calling their corresponding mpn_modexact_1_odd
  ifdef(\`__gmpn_modexact_1_odd',,
@@ -26824,6 +27020,7 @@ include($mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.asm)
  #define __gmpn_$tmp_fbase           __gmpn_${tmp_fbase}_$tmp_suffix
  #define __gmpn_$tmp_fbasec          __gmpn_${tmp_fbasec}_${tmp_suffix}
  #define __gmpn_preinv_${tmp_fbase}  __gmpn_preinv_${tmp_fbase}_${tmp_suffix}
+#define __gmpn_${tmp_fbase}_cps     __gmpn_${tmp_fbase}_cps_${tmp_suffix}
  
  #include \"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.c\"
  " >mpn/${tmp_prefix}_$tmp_fn.c
@@ -26838,6 +27035,13 @@ include($mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.asm)
              if grep "^PROLOGUE(mpn_preinv_$tmp_fn)" $tmp_file >/dev/null; then
                echo "DECL_preinv_$tmp_fbase (__gmpn_preinv_${tmp_fbase}_$tmp_suffix);" >>fat.h
                CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.preinv_$tmp_fbase = __gmpn_preinv_${tmp_fbase}_${tmp_suffix}; \\
+"
+            fi
+
+            # Ditto for any mod_1...cps variant
+            if grep "^PROLOGUE(mpn_${tmp_fbase}_cps)" $tmp_file >/dev/null; then
+              echo "DECL_${tmp_fbase}_cps (__gmpn_${tmp_fbase}_cps_$tmp_suffix);" >>fat.h
+              CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.${tmp_fbase}_cps = __gmpn_${tmp_fbase}_cps_${tmp_suffix}; \\
  "
              fi
            fi
@@ -26886,6 +27090,13 @@ for tmp_fn in $gmp_mpn_functions; do
  tmp_mulfunc=
  case $tmp_fn in
    add_n|sub_n)       tmp_mulfunc="aors_n"    ;;
+  add_err1_n|sub_err1_n)
+                    tmp_mulfunc="aors_err1_n" ;;
+  add_err2_n|sub_err2_n)
+                    tmp_mulfunc="aors_err2_n" ;;
+  add_err3_n|sub_err3_n)
+                    tmp_mulfunc="aors_err3_n" ;;
+  addcnd_n|subcnd_n) tmp_mulfunc="aorscnd_n"   ;;
    addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
    popcount|hamdist)  tmp_mulfunc="popham"    ;;
    and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
@@ -26911,6 +27122,10 @@ case $tmp_fn in
                      tmp_mulfunc="aorrlsh_n sorrlsh_n";;
    rsh1add_n|rsh1sub_n)
                      tmp_mulfunc="rsh1aors_n";;
+  sb_div_qr_sec|sb_div_r_sec)
+                    tmp_mulfunc="sb_div_sec";;
+  sbpi1_div_qr_sec|sbpi1_div_r_sec)
+                    tmp_mulfunc="sbpi1_div_sec";;
  esac
  
  
@@ -26967,6 +27182,17 @@ esac
              esac
            fi
  
+         # If the host uses a non-standard ABI, check if tmp_file supports it
+         #
+         if test -n "$GMP_NONSTD_ABI" && test $tmp_ext != "c"; then
+           abi=`sed -n 's/^[   ]*ABI_SUPPORT(\(.*\))/\1/p' $tmp_file `
+           if echo "$abi" | grep -q "\\b${GMP_NONSTD_ABI}\\b"; then
+             true
+           else
+             continue
+           fi
+         fi
+
            found=yes
            eval found_$tmp_ext=yes
  
@@ -27016,7 +27242,7 @@ _ACEOF
        fi
      done
      if test $found = no; then
-      as_fn_error "no version of $tmp_fn found in path: $path" "$LINENO" 5
+      as_fn_error $? "no version of $tmp_fn found in path: $path" "$LINENO" 5
      fi
    fi
  done
@@ -27035,7 +27261,7 @@ if test $found_asm = yes; then
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suitable m4" >&5
  $as_echo_n "checking for suitable m4... " >&6; }
-if test "${gmp_cv_prog_m4+set}" = set; then :
+if ${gmp_cv_prog_m4+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$M4"; then
@@ -27069,7 +27295,7 @@ EOF
      done
      IFS="$ac_save_ifs"
      if test -z "$gmp_cv_prog_m4"; then
-      as_fn_error "No usable m4 in \$PATH or /usr/5bin (see config.log for reasons)." "$LINENO" 5
+      as_fn_error $? "No usable m4 in \$PATH or /usr/5bin (see config.log for reasons)." "$LINENO" 5
      fi
    fi
    rm -f conftest.m4
@@ -27083,7 +27309,7 @@ M4="$gmp_cv_prog_m4"
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if m4wrap produces spurious output" >&5
  $as_echo_n "checking if m4wrap produces spurious output... " >&6; }
-if test "${gmp_cv_m4_m4wrap_spurious+set}" = set; then :
+if ${gmp_cv_m4_m4wrap_spurious+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    # hide the d-n-l from autoconf's error checking
@@ -27124,7 +27350,7 @@ fi
  if test "$gmp_asm_syntax_testing" != no; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to text section" >&5
  $as_echo_n "checking how to switch to text section... " >&6; }
-if test "${gmp_cv_asm_text+set}" = set; then :
+if ${gmp_cv_asm_text+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    for i in ".text" ".code" ".csect .text[PR]"; do
@@ -27152,7 +27378,7 @@ rm -f conftest*
  
  done
  if test -z "$gmp_cv_asm_text"; then
-  as_fn_error "Cannot determine text section directive" "$LINENO" 5
+  as_fn_error $? "Cannot determine text section directive" "$LINENO" 5
  fi
  
  fi
@@ -27162,7 +27388,7 @@ echo "define(<TEXT>, <$gmp_cv_asm_text>)" >> $gmp_tmpconfigm4
  
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to data section" >&5
  $as_echo_n "checking how to switch to data section... " >&6; }
-if test "${gmp_cv_asm_data+set}" = set; then :
+if ${gmp_cv_asm_data+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $host in
@@ -27178,7 +27404,7 @@ echo "define(<DATA>, <$gmp_cv_asm_data>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler label suffix" >&5
  $as_echo_n "checking for assembler label suffix... " >&6; }
-if test "${gmp_cv_asm_label_suffix+set}" = set; then :
+if ${gmp_cv_asm_label_suffix+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    gmp_cv_asm_label_suffix=unknown
@@ -27208,7 +27434,7 @@ rm -f conftest*
  
  done
  if test "$gmp_cv_asm_label_suffix" = "unknown"; then
-  as_fn_error "Cannot determine label suffix" "$LINENO" 5
+  as_fn_error $? "Cannot determine label suffix" "$LINENO" 5
  fi
  
  fi
@@ -27219,7 +27445,7 @@ echo "define(<LABEL_SUFFIX>, <$gmp_cv_asm_label_suffix>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler global directive" >&5
  $as_echo_n "checking for assembler global directive... " >&6; }
-if test "${gmp_cv_asm_globl+set}" = set; then :
+if ${gmp_cv_asm_globl+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $host in
@@ -27236,7 +27462,7 @@ echo "define(<GLOBL>, <$gmp_cv_asm_globl>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler global directive attribute" >&5
  $as_echo_n "checking for assembler global directive attribute... " >&6; }
-if test "${gmp_cv_asm_globl_attr+set}" = set; then :
+if ${gmp_cv_asm_globl_attr+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $gmp_cv_asm_globl in
@@ -27252,7 +27478,7 @@ echo "define(<GLOBL_ATTR>, <$gmp_cv_asm_globl_attr>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if globals are prefixed by underscore" >&5
  $as_echo_n "checking if globals are prefixed by underscore... " >&6; }
-if test "${gmp_cv_asm_underscore+set}" = set; then :
+if ${gmp_cv_asm_underscore+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    gmp_cv_asm_underscore="unknown"
@@ -27323,7 +27549,7 @@ esac
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to read-only data section" >&5
  $as_echo_n "checking how to switch to read-only data section... " >&6; }
-if test "${gmp_cv_asm_rodata+set}" = set; then :
+if ${gmp_cv_asm_rodata+:} false; then :
    $as_echo_n "(cached) " >&6
  else
  
@@ -27381,7 +27607,7 @@ echo "define(<RODATA>, <$gmp_cv_asm_rodata>)" >> $gmp_tmpconfigm4
  
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler .type directive" >&5
  $as_echo_n "checking for assembler .type directive... " >&6; }
-if test "${gmp_cv_asm_type+set}" = set; then :
+if ${gmp_cv_asm_type+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    gmp_cv_asm_type=
@@ -27419,7 +27645,7 @@ echo "define(<TYPE>, <$gmp_cv_asm_type>)" >> $gmp_tmpconfigm4
  
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler .size directive" >&5
  $as_echo_n "checking for assembler .size directive... " >&6; }
-if test "${gmp_cv_asm_size+set}" = set; then :
+if ${gmp_cv_asm_size+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    gmp_cv_asm_size=
@@ -27456,7 +27682,7 @@ echo "define(<SIZE>, <$gmp_cv_asm_size>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler local label prefix" >&5
  $as_echo_n "checking for assembler local label prefix... " >&6; }
-if test "${gmp_cv_asm_lsym_prefix+set}" = set; then :
+if ${gmp_cv_asm_lsym_prefix+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    gmp_tmp_pre_appears=yes
@@ -27530,7 +27756,7 @@ _ACEOF
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler byte directive" >&5
  $as_echo_n "checking for assembler byte directive... " >&6; }
-if test "${gmp_cv_asm_byte+set}" = set; then :
+if ${gmp_cv_asm_byte+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    for i in .byte data1; do
@@ -27560,7 +27786,7 @@ rm -f conftest*
  
  done
  if test -z "$gmp_cv_asm_byte"; then
-  as_fn_error "Cannot determine how to emit a data byte" "$LINENO" 5
+  as_fn_error $? "Cannot determine how to emit a data byte" "$LINENO" 5
  fi
  
  fi
@@ -27574,7 +27800,7 @@ $as_echo "$gmp_cv_asm_byte" >&6; }
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to define a 32-bit word" >&5
  $as_echo_n "checking how to define a 32-bit word... " >&6; }
-if test "${gmp_cv_asm_w32+set}" = set; then :
+if ${gmp_cv_asm_w32+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $host in
@@ -27622,7 +27848,7 @@ rm -f conftest*
      ;;
  esac
  if test -z "$gmp_cv_asm_w32"; then
-  as_fn_error "cannot determine how to define a 32-bit word" "$LINENO" 5
+  as_fn_error $? "cannot determine how to define a 32-bit word" "$LINENO" 5
  fi
  
  fi
@@ -27637,7 +27863,7 @@ echo "define(<W32>, <$gmp_cv_asm_w32>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if .align assembly directive is logarithmic" >&5
  $as_echo_n "checking if .align assembly directive is logarithmic... " >&6; }
-if test "${gmp_cv_asm_align_log+set}" = set; then :
+if ${gmp_cv_asm_align_log+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.s <<EOF
@@ -27667,7 +27893,7 @@ else
    cat conftest.out >&5
    echo "configure: failed program was:" >&5
    cat conftest.s >&5
-  as_fn_error "cannot assemble alignment test" "$LINENO" 5
+  as_fn_error $? "cannot assemble alignment test" "$LINENO" 5
  fi
  rm -f conftest*
  
@@ -27690,7 +27916,7 @@ echo "include_mpn(\`pa32/pa-defs.m4')" >> $gmp_tmpconfigm4i
      ia64*-*-* | itanium-*-* | itanium2-*-*)
        { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler .align padding is good" >&5
  $as_echo_n "checking whether assembler .align padding is good... " >&6; }
-if test "${gmp_cv_asm_ia64_align_ok+set}" = set; then :
+if ${gmp_cv_asm_ia64_align_ok+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.awk <<\EOF
@@ -27816,7 +28042,7 @@ echo "define(<IA64_ALIGN_OK>, <\`$gmp_cv_asm_ia64_align_ok'>)" >> $gmp_tmpconfig
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler instruction and register style" >&5
  $as_echo_n "checking assembler instruction and register style... " >&6; }
-if test "${gmp_cv_asm_m68k_instruction+set}" = set; then :
+if ${gmp_cv_asm_m68k_instruction+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    for i in "addl %d0,%d1" "add.l %d0,%d1" "addl d0,d1" "add.l d0,d1"; do
@@ -27844,7 +28070,7 @@ rm -f conftest*
  
  done
  if test -z "$gmp_cv_asm_m68k_instruction"; then
-  as_fn_error "cannot determine assembler instruction and register style" "$LINENO" 5
+  as_fn_error $? "cannot determine assembler instruction and register style" "$LINENO" 5
  fi
  
  fi
@@ -27855,7 +28081,7 @@ case $gmp_cv_asm_m68k_instruction in
  "addl %d0,%d1")  want_dot_size=no;  want_register_percent=yes ;;
  "add.l d0,d1")   want_dot_size=yes; want_register_percent=no  ;;
  "add.l %d0,%d1") want_dot_size=yes; want_register_percent=yes ;;
-*) as_fn_error "oops, unrecognised instruction and register style" "$LINENO" 5 ;;
+*) as_fn_error $? "oops, unrecognised instruction and register style" "$LINENO" 5 ;;
  esac
  
  echo "define(<WANT_REGISTER_PERCENT>, <\`$want_register_percent'>)" >> $gmp_tmpconfigm4
@@ -27868,18 +28094,18 @@ echo "define(<WANT_DOT_SIZE>, <\`$want_dot_size'>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler addressing style" >&5
  $as_echo_n "checking assembler addressing style... " >&6; }
-if test "${gmp_cv_asm_m68k_addressing+set}" = set; then :
+if ${gmp_cv_asm_m68k_addressing+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    case $gmp_cv_asm_m68k_instruction in
  addl*)  movel=movel ;;
  add.l*) movel=move.l ;;
-*) as_fn_error "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
+*) as_fn_error $? "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
  esac
  case $gmp_cv_asm_m68k_instruction in
  *"%d0,%d1") dreg=%d0; areg=%a0 ;;
  *"d0,d1")   dreg=d0;  areg=a0  ;;
-*) as_fn_error "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
+*) as_fn_error $? "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
  esac
  cat >conftest.s <<EOF
         $gmp_cv_asm_text
@@ -27913,7 +28139,7 @@ else
    cat conftest.out >&5
    echo "configure: failed program was:" >&5
    cat conftest.s >&5
-  as_fn_error "cannot determine assembler addressing style" "$LINENO" 5
+  as_fn_error $? "cannot determine assembler addressing style" "$LINENO" 5
  fi
  rm -f conftest*
  
@@ -27931,7 +28157,7 @@ echo "define(<WANT_ADDRESSING>, <\`$gmp_cv_asm_m68k_addressing'>)" >> $gmp_tmpco
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler shortest branches" >&5
  $as_echo_n "checking assembler shortest branches... " >&6; }
-if test "${gmp_cv_asm_m68k_branches+set}" = set; then :
+if ${gmp_cv_asm_m68k_branches+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    for i in jra jbra bra; do
@@ -27960,7 +28186,7 @@ rm -f conftest*
  
  done
  if test -z "$gmp_cv_asm_m68k_branches"; then
-  as_fn_error "cannot determine assembler branching style" "$LINENO" 5
+  as_fn_error $? "cannot determine assembler branching style" "$LINENO" 5
  fi
  
  fi
@@ -27975,7 +28201,7 @@ echo "define(<WANT_BRANCHES>, <\`$gmp_cv_asm_m68k_branches'>)" >> $gmp_tmpconfig
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler output is PIC by default" >&5
  $as_echo_n "checking whether compiler output is PIC by default... " >&6; }
-if test "${gmp_cv_asm_powerpc_pic+set}" = set; then :
+if ${gmp_cv_asm_powerpc_pic+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    gmp_cv_asm_powerpc_pic=yes
@@ -28012,7 +28238,7 @@ echo "define(<PIC_ALWAYS>,<$gmp_cv_asm_powerpc_pic>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler needs r on registers" >&5
  $as_echo_n "checking if the assembler needs r on registers... " >&6; }
-if test "${gmp_cv_asm_powerpc_r_registers+set}" = set; then :
+if ${gmp_cv_asm_powerpc_r_registers+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.s <<EOF
@@ -28047,7 +28273,7 @@ else
    cat conftest.out >&5
    echo "configure: failed program was:" >&5
    cat conftest.s >&5
-  as_fn_error "neither \"mtctr 6\" nor \"mtctr r6\" works" "$LINENO" 5
+  as_fn_error $? "neither \"mtctr 6\" nor \"mtctr r6\" works" "$LINENO" 5
  fi
  rm -f conftest*
  
@@ -28068,7 +28294,7 @@ echo "include_mpn(\`powerpc32/powerpc-defs.m4')" >> $gmp_tmpconfigm4i
        case $host in
          *-*-aix*)
           case $ABI in
-           64 | aix64)
+           mode64)
  echo "include_mpn(\`powerpc64/aix.m4')" >> $gmp_tmpconfigm4i
   ;;
              *)
@@ -28109,13 +28335,13 @@ echo "include_mpn(\`powerpc32/eabi.m4')" >> $gmp_tmpconfigm4i
  echo "include_mpn(\`powerpc32/aix.m4')" >> $gmp_tmpconfigm4i
  
        ;;
-    sparcv9*-*-* | ultrasparc*-*-* | sparc64-*-*)
+    *sparc*-*-*)
        case $ABI in
          64)
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler accepts \".register\"" >&5
  $as_echo_n "checking if the assembler accepts \".register\"... " >&6; }
-if test "${gmp_cv_asm_sparc_register+set}" = set; then :
+if ${gmp_cv_asm_sparc_register+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.s <<EOF
@@ -28154,7 +28380,7 @@ echo "define(<HAVE_REGISTER>,<$gmp_cv_asm_sparc_register>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the .align directive accepts an 0x90 fill in .text" >&5
  $as_echo_n "checking if the .align directive accepts an 0x90 fill in .text... " >&6; }
-if test "${gmp_cv_asm_align_fill_0x90+set}" = set; then :
+if ${gmp_cv_asm_align_fill_0x90+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.s <<EOF
@@ -28206,7 +28432,7 @@ echo "include_mpn(\`x86/x86-defs.m4')" >> $gmp_tmpconfigm4i
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler COFF type directives" >&5
  $as_echo_n "checking for assembler COFF type directives... " >&6; }
-if test "${gmp_cv_asm_x86_coff_type+set}" = set; then :
+if ${gmp_cv_asm_x86_coff_type+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.s <<EOF
@@ -28250,7 +28476,7 @@ echo "define(<HAVE_COFF_TYPE>, <$gmp_cv_asm_x86_coff_type>)" >> $gmp_tmpconfigm4
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if _GLOBAL_OFFSET_TABLE_ is prefixed by underscore" >&5
  $as_echo_n "checking if _GLOBAL_OFFSET_TABLE_ is prefixed by underscore... " >&6; }
-if test "${gmp_cv_asm_x86_got_underscore+set}" = set; then :
+if ${gmp_cv_asm_x86_got_underscore+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    gmp_cv_asm_x86_got_underscore="not applicable"
@@ -28298,7 +28524,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler takes cl with shldl" >&5
  $as_echo_n "checking if the assembler takes cl with shldl... " >&6; }
-if test "${gmp_cv_asm_x86_shldl_cl+set}" = set; then :
+if ${gmp_cv_asm_x86_shldl_cl+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    cat >conftest.s <<EOF
@@ -28386,10 +28612,10 @@ if test "$enable_static" = yes; then
      fi
      mcount_nonpic_call=`grep 'call.*mcount' conftest.s`
      if test -z "$mcount_nonpic_call"; then
-      as_fn_error "Cannot find mcount call for non-PIC" "$LINENO" 5
+      as_fn_error $? "Cannot find mcount call for non-PIC" "$LINENO" 5
      fi
    else
-    as_fn_error "Cannot compile test program for non-PIC" "$LINENO" 5
+    as_fn_error $? "Cannot compile test program for non-PIC" "$LINENO" 5
    fi
  fi
  
@@ -28415,10 +28641,10 @@ if test "$enable_shared" = yes; then
      fi
      mcount_pic_call=`grep 'call.*mcount' conftest.s`
      if test -z "$mcount_pic_call"; then
-      as_fn_error "Cannot find mcount call for PIC" "$LINENO" 5
+      as_fn_error $? "Cannot find mcount call for PIC" "$LINENO" 5
      fi
    else
-    as_fn_error "Cannot compile test program for PIC" "$LINENO" 5
+    as_fn_error $? "Cannot compile test program for PIC" "$LINENO" 5
    fi
  fi
  
@@ -28447,7 +28673,7 @@ echo "include_mpn(\`x86/darwin.m4')" >> $gmp_tmpconfigm4i
   ;;
           esac
            ;;
-        64)
+        64|x32)
  
  echo "include_mpn(\`x86_64/x86_64-defs.m4')" >> $gmp_tmpconfigm4i
  
@@ -28457,6 +28683,10 @@ echo "include_mpn(\`x86_64/x86_64-defs.m4')" >> $gmp_tmpconfigm4i
             *-*-darwin*)
  
  echo "include_mpn(\`x86_64/darwin.m4')" >> $gmp_tmpconfigm4i
+ ;;
+           *-*-mingw* | *-*-cygwin)
+
+echo "include_mpn(\`x86_64/dos64.m4')" >> $gmp_tmpconfigm4i
   ;;
           esac
            ;;
@@ -28485,7 +28715,7 @@ for gmp_mparam_dir in $path; do
    fi
  done
  if test -z "$gmp_mparam_source"; then
-  as_fn_error "no version of gmp-mparam.h found in path: $path" "$LINENO" 5
+  as_fn_error $? "no version of gmp-mparam.h found in path: $path" "$LINENO" 5
  fi
  
  # For a helpful message from tune/tuneup.c
@@ -28503,17 +28733,19 @@ _ACEOF
  
  
  
-# Copy any SQR_TOOM2_THRESHOLD from gmp-mparam.h to config.m4.
-# Some versions of sqr_basecase.asm use this.
+# Copy relevant parameters from gmp-mparam.h to config.m4.
+# We only do this for parameters that are used by some assembly files.
  # Fat binaries do this on a per-file basis, so skip in that case.
  #
  if test -z "$fat_path"; then
-  tmp_gmp_karatsuba_sqr_threshold=`sed -n 's/^#define SQR_TOOM2_THRESHOLD[     ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
-  if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
+  for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD SHLD_SLOW SHRD_SLOW; do
+    value=`sed -n 's/^#define '$i'[    ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+    if test -n "$value"; then
  
-echo "define(<SQR_TOOM2_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)" >> $gmp_tmpconfigm4
+echo "define(<$i>,<$value>)" >> $gmp_tmpconfigm4
  
-  fi
+    fi
+  done
  fi
  
  
@@ -28530,7 +28762,7 @@ fi
  # This bug is HP SR number 8606223364.
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5
  $as_echo_n "checking size of void *... " >&6; }
-if test "${ac_cv_sizeof_void_p+set}" = set; then :
+if ${ac_cv_sizeof_void_p+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" "ac_cv_sizeof_void_p"        "$ac_includes_default"; then :
@@ -28539,9 +28771,8 @@ else
    if test "$ac_cv_type_void_p" = yes; then
       { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (void *)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (void *)
+See \`config.log' for more details" "$LINENO" 5; }
     else
       ac_cv_sizeof_void_p=0
     fi
@@ -28564,7 +28795,7 @@ _ACEOF
  # This bug is HP SR number 8606223364.
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned short" >&5
  $as_echo_n "checking size of unsigned short... " >&6; }
-if test "${ac_cv_sizeof_unsigned_short+set}" = set; then :
+if ${ac_cv_sizeof_unsigned_short+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned short))" "ac_cv_sizeof_unsigned_short"        "$ac_includes_default"; then :
@@ -28573,9 +28804,8 @@ else
    if test "$ac_cv_type_unsigned_short" = yes; then
       { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (unsigned short)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (unsigned short)
+See \`config.log' for more details" "$LINENO" 5; }
     else
       ac_cv_sizeof_unsigned_short=0
     fi
@@ -28598,7 +28828,7 @@ _ACEOF
  # This bug is HP SR number 8606223364.
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned" >&5
  $as_echo_n "checking size of unsigned... " >&6; }
-if test "${ac_cv_sizeof_unsigned+set}" = set; then :
+if ${ac_cv_sizeof_unsigned+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned))" "ac_cv_sizeof_unsigned"        "$ac_includes_default"; then :
@@ -28607,9 +28837,8 @@ else
    if test "$ac_cv_type_unsigned" = yes; then
       { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (unsigned)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (unsigned)
+See \`config.log' for more details" "$LINENO" 5; }
     else
       ac_cv_sizeof_unsigned=0
     fi
@@ -28632,7 +28861,7 @@ _ACEOF
  # This bug is HP SR number 8606223364.
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned long" >&5
  $as_echo_n "checking size of unsigned long... " >&6; }
-if test "${ac_cv_sizeof_unsigned_long+set}" = set; then :
+if ${ac_cv_sizeof_unsigned_long+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long))" "ac_cv_sizeof_unsigned_long"        "$ac_includes_default"; then :
@@ -28641,9 +28870,8 @@ else
    if test "$ac_cv_type_unsigned_long" = yes; then
       { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (unsigned long)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (unsigned long)
+See \`config.log' for more details" "$LINENO" 5; }
     else
       ac_cv_sizeof_unsigned_long=0
     fi
@@ -28666,7 +28894,7 @@ _ACEOF
  # This bug is HP SR number 8606223364.
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of mp_limb_t" >&5
  $as_echo_n "checking size of mp_limb_t... " >&6; }
-if test "${ac_cv_sizeof_mp_limb_t+set}" = set; then :
+if ${ac_cv_sizeof_mp_limb_t+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (mp_limb_t))" "ac_cv_sizeof_mp_limb_t"        "#define __GMP_WITHIN_CONFIGURE 1   /* ignore template stuff */
@@ -28681,9 +28909,8 @@ else
    if test "$ac_cv_type_mp_limb_t" = yes; then
       { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
  $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (mp_limb_t)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (mp_limb_t)
+See \`config.log' for more details" "$LINENO" 5; }
     else
       ac_cv_sizeof_mp_limb_t=0
     fi
@@ -28701,7 +28928,7 @@ _ACEOF
  
  
  if test "$ac_cv_sizeof_mp_limb_t" = 0; then
-  as_fn_error "Oops, mp_limb_t doesn't seem to work" "$LINENO" 5
+  as_fn_error $? "Oops, mp_limb_t doesn't seem to work" "$LINENO" 5
  fi
  GMP_LIMB_BITS=`expr 8 \* $ac_cv_sizeof_mp_limb_t`
  
@@ -28719,12 +28946,12 @@ echo "define(<SIZEOF_UNSIGNED>,<$ac_cv_sizeof_unsigned>)" >> $gmp_tmpconfigm4
  mparam_bits=`sed -n 's/^#define GMP_LIMB_BITS[         ][      ]*\([0-9]*\).*$/\1/p' $gmp_mparam_source`
  if test -n "$mparam_bits" && test "$mparam_bits" -ne $GMP_LIMB_BITS; then
    if test "$test_CFLAGS" = set; then
-    as_fn_error "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+    as_fn_error $? "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
  in this configuration expects $mparam_bits bits.
  You appear to have set \$CFLAGS, perhaps you also need to tell GMP the
  intended ABI, see \"ABI and ISA\" in the manual." "$LINENO" 5
    else
-    as_fn_error "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+    as_fn_error $? "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
  in this configuration expects $mparam_bits bits." "$LINENO" 5
    fi
  fi
@@ -28740,20 +28967,6 @@ echo "define(<GMP_NUMB_BITS>,eval(GMP_LIMB_BITS-GMP_NAIL_BITS))" >> $gmp_tmpconf
  
  
  
-# Exclude the mpn random functions from mpbsd since that would drag in the
-# top-level rand things, all of which are unnecessary for libmp.  There's
-# other unnecessary objects too actually, if we could be bothered figuring
-# out exactly which they are.
-#
-mpn_objs_in_libmp=
-for i in $mpn_objs_in_libgmp; do
-  case $i in
-  *random*) ;;
-  *) mpn_objs_in_libmp="$mpn_objs_in_libmp $i" ;;
-  esac
-done
-
-
  
  
  
@@ -28848,7 +29061,7 @@ esac
  
  ac_fn_c_check_type "$LINENO" "stack_t" "ac_cv_type_stack_t" "#include <signal.h>
  "
-if test "x$ac_cv_type_stack_t" = x""yes; then :
+if test "x$ac_cv_type_stack_t" = xyes; then :
  
  cat >>confdefs.h <<_ACEOF
  #define HAVE_STACK_T 1
@@ -28886,7 +29099,7 @@ LIBCURSES=
  if test $with_readline != no; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for tputs in -lncurses" >&5
  $as_echo_n "checking for tputs in -lncurses... " >&6; }
-if test "${ac_cv_lib_ncurses_tputs+set}" = set; then :
+if ${ac_cv_lib_ncurses_tputs+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -28920,12 +29133,12 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ncurses_tputs" >&5
  $as_echo "$ac_cv_lib_ncurses_tputs" >&6; }
-if test "x$ac_cv_lib_ncurses_tputs" = x""yes; then :
+if test "x$ac_cv_lib_ncurses_tputs" = xyes; then :
    LIBCURSES=-lncurses
  else
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for tputs in -lcurses" >&5
  $as_echo_n "checking for tputs in -lcurses... " >&6; }
-if test "${ac_cv_lib_curses_tputs+set}" = set; then :
+if ${ac_cv_lib_curses_tputs+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -28959,7 +29172,7 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curses_tputs" >&5
  $as_echo "$ac_cv_lib_curses_tputs" >&6; }
-if test "x$ac_cv_lib_curses_tputs" = x""yes; then :
+if test "x$ac_cv_lib_curses_tputs" = xyes; then :
    LIBCURSES=-lcurses
  fi
  
@@ -28972,7 +29185,7 @@ if test $with_readline = detect; then
    use_readline=no
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5
  $as_echo_n "checking for readline in -lreadline... " >&6; }
-if test "${ac_cv_lib_readline_readline+set}" = set; then :
+if ${ac_cv_lib_readline_readline+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    ac_check_lib_save_LIBS=$LIBS
@@ -29006,11 +29219,11 @@ LIBS=$ac_check_lib_save_LIBS
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5
  $as_echo "$ac_cv_lib_readline_readline" >&6; }
-if test "x$ac_cv_lib_readline_readline" = x""yes; then :
+if test "x$ac_cv_lib_readline_readline" = xyes; then :
    ac_fn_c_check_header_mongrel "$LINENO" "readline/readline.h" "ac_cv_header_readline_readline_h" "$ac_includes_default"
-if test "x$ac_cv_header_readline_readline_h" = x""yes; then :
+if test "x$ac_cv_header_readline_readline_h" = xyes; then :
    ac_fn_c_check_header_mongrel "$LINENO" "readline/history.h" "ac_cv_header_readline_history_h" "$ac_includes_default"
-if test "x$ac_cv_header_readline_history_h" = x""yes; then :
+if test "x$ac_cv_header_readline_history_h" = xyes; then :
    use_readline=yes
  fi
  
@@ -29039,7 +29252,7 @@ do
  set dummy $ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_YACC+set}" = set; then :
+if ${ac_cv_prog_YACC+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$YACC"; then
@@ -29051,7 +29264,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_YACC="$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -29082,7 +29295,7 @@ do
  set dummy $ac_prog; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LEX+set}" = set; then :
+if ${ac_cv_prog_LEX+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$LEX"; then
@@ -29094,7 +29307,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_LEX="$ac_prog"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -29126,7 +29339,8 @@ a { ECHO; }
  b { REJECT; }
  c { yymore (); }
  d { yyless (1); }
-e { yyless (input () != 0); }
+e { /* IRIX 6.5 flex 2.5.4 underquotes its yyless argument.  */
+    yyless ((input () != 0)); }
  f { unput (yytext[0]); }
  . { BEGIN INITIAL; }
  %%
@@ -29152,7 +29366,7 @@ $as_echo "$ac_try_echo"; } >&5
    test $ac_status = 0; }
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking lex output file root" >&5
  $as_echo_n "checking lex output file root... " >&6; }
-if test "${ac_cv_prog_lex_root+set}" = set; then :
+if ${ac_cv_prog_lex_root+:} false; then :
    $as_echo_n "(cached) " >&6
  else
  
@@ -29161,7 +29375,7 @@ if test -f lex.yy.c; then
  elif test -f lexyy.c; then
    ac_cv_prog_lex_root=lexyy
  else
-  as_fn_error "cannot find output from $LEX; giving up" "$LINENO" 5
+  as_fn_error $? "cannot find output from $LEX; giving up" "$LINENO" 5
  fi
  fi
  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_lex_root" >&5
@@ -29171,7 +29385,7 @@ LEX_OUTPUT_ROOT=$ac_cv_prog_lex_root
  if test -z "${LEXLIB+set}"; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking lex library" >&5
  $as_echo_n "checking lex library... " >&6; }
-if test "${ac_cv_lib_lex+set}" = set; then :
+if ${ac_cv_lib_lex+:} false; then :
    $as_echo_n "(cached) " >&6
  else
  
@@ -29201,7 +29415,7 @@ fi
  
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether yytext is a pointer" >&5
  $as_echo_n "checking whether yytext is a pointer... " >&6; }
-if test "${ac_cv_prog_lex_yytext_pointer+set}" = set; then :
+if ${ac_cv_prog_lex_yytext_pointer+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    # POSIX says lex can declare yytext either as a pointer or an array; the
@@ -29212,7 +29426,8 @@ ac_save_LIBS=$LIBS
  LIBS="$LEXLIB $ac_save_LIBS"
  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
  /* end confdefs.h.  */
-#define YYTEXT_POINTER 1
+
+  #define YYTEXT_POINTER 1
  `cat $LEX_OUTPUT_ROOT.c`
  _ACEOF
  if ac_fn_c_try_link "$LINENO"; then :
@@ -29249,7 +29464,7 @@ if test -n "$ac_tool_prefix"; then
  set dummy ${ac_tool_prefix}ranlib; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB+set}" = set; then :
+if ${ac_cv_prog_RANLIB+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$RANLIB"; then
@@ -29261,7 +29476,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -29289,7 +29504,7 @@ if test -z "$ac_cv_prog_RANLIB"; then
  set dummy ranlib; ac_word=$2
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
  $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then :
+if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
    $as_echo_n "(cached) " >&6
  else
    if test -n "$ac_ct_RANLIB"; then
@@ -29301,7 +29516,7 @@ do
    IFS=$as_save_IFS
    test -z "$as_dir" && as_dir=.
      for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
      ac_cv_prog_ac_ct_RANLIB="ranlib"
      $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
      break 2
@@ -29367,7 +29582,7 @@ echo "define(\`__CONFIG_M4_INCLUDED__')" >> $gmp_configm4
  # FIXME: Upcoming version of autoconf/automake may not like broken lines.
  #        Right now automake isn't accepting the new AC_CONFIG_FILES scheme.
  
-ac_config_files="$ac_config_files Makefile mpbsd/Makefile mpf/Makefile mpn/Makefile mpq/Makefile mpz/Makefile printf/Makefile scanf/Makefile cxx/Makefile tests/Makefile tests/devel/Makefile tests/mpbsd/Makefile tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile tests/cxx/Makefile doc/Makefile tune/Makefile demos/Makefile demos/calc/Makefile demos/expr/Makefile gmp.h:gmp-h.in mp.h:mp-h.in"
+ac_config_files="$ac_config_files Makefile mpf/Makefile mpn/Makefile mpq/Makefile mpz/Makefile printf/Makefile scanf/Makefile rand/Makefile cxx/Makefile tests/Makefile tests/devel/Makefile tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile tests/cxx/Makefile doc/Makefile tune/Makefile demos/Makefile demos/calc/Makefile demos/expr/Makefile gmp.h:gmp-h.in"
  
  cat >confcache <<\_ACEOF
  # This file is a shell script that caches the results of configure
@@ -29433,10 +29648,21 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
       :end' >>confcache
  if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
    if test -w "$cache_file"; then
-    test "x$cache_file" != "x/dev/null" &&
+    if test "x$cache_file" != "x/dev/null"; then
        { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
  $as_echo "$as_me: updating cache $cache_file" >&6;}
-    cat confcache >$cache_file
+      if test ! -f "$cache_file" || test -h "$cache_file"; then
+       cat confcache >"$cache_file"
+      else
+        case $cache_file in #(
+        */* | ?:*)
+         mv -f confcache "$cache_file"$$ &&
+         mv -f "$cache_file"$$ "$cache_file" ;; #(
+        *)
+         mv -f confcache "$cache_file" ;;
+       esac
+      fi
+    fi
    else
      { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
  $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
@@ -29452,6 +29678,7 @@ DEFS=-DHAVE_CONFIG_H
  
  ac_libobjs=
  ac_ltlibobjs=
+U=
  for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
    # 1. Remove the extension, and $U if already installed.
    ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
@@ -29475,24 +29702,20 @@ else
  fi
  
  if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then
-  as_fn_error "conditional \"MAINTAINER_MODE\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${WANT_MPBSD_TRUE}" && test -z "${WANT_MPBSD_FALSE}"; then
-  as_fn_error "conditional \"WANT_MPBSD\" was never defined.
+  as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined.
  Usually this means the macro was only invoked conditionally." "$LINENO" 5
  fi
  if test -z "${WANT_CXX_TRUE}" && test -z "${WANT_CXX_FALSE}"; then
-  as_fn_error "conditional \"WANT_CXX\" was never defined.
+  as_fn_error $? "conditional \"WANT_CXX\" was never defined.
  Usually this means the macro was only invoked conditionally." "$LINENO" 5
  fi
  if test -z "${ENABLE_STATIC_TRUE}" && test -z "${ENABLE_STATIC_FALSE}"; then
-  as_fn_error "conditional \"ENABLE_STATIC\" was never defined.
+  as_fn_error $? "conditional \"ENABLE_STATIC\" was never defined.
  Usually this means the macro was only invoked conditionally." "$LINENO" 5
  fi
  
  
-: ${CONFIG_STATUS=./config.status}
+: "${CONFIG_STATUS=./config.status}"
  ac_write_fail=0
  ac_clean_files_save=$ac_clean_files
  ac_clean_files="$ac_clean_files $CONFIG_STATUS"
@@ -29593,6 +29816,7 @@ fi
  IFS=" ""       $as_nl"
  
  # Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
  case $0 in #((
    *[\\/]* ) as_myself=$0 ;;
    *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -29638,19 +29862,19 @@ export LANGUAGE
  (unset CDPATH) >/dev/null 2>&1 && unset CDPATH
  
  
-# as_fn_error ERROR [LINENO LOG_FD]
-# ---------------------------------
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
  # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
  # provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with status $?, using 1 if that was 0.
+# script with STATUS, using 1 if that was 0.
  as_fn_error ()
  {
-  as_status=$?; test $as_status -eq 0 && as_status=1
-  if test "$3"; then
-    as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-    $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
    fi
-  $as_echo "$as_me: error: $1" >&2
+  $as_echo "$as_me: error: $2" >&2
    as_fn_exit $as_status
  } # as_fn_error
  
@@ -29788,16 +30012,16 @@ if (echo >conf$$.file) 2>/dev/null; then
      # ... but there are two gotchas:
      # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
      # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
-    # In both cases, we have to default to `cp -p'.
+    # In both cases, we have to default to `cp -pR'.
      ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
-      as_ln_s='cp -p'
+      as_ln_s='cp -pR'
    elif ln conf$$.file conf$$ 2>/dev/null; then
      as_ln_s=ln
    else
-    as_ln_s='cp -p'
+    as_ln_s='cp -pR'
    fi
  else
-  as_ln_s='cp -p'
+  as_ln_s='cp -pR'
  fi
  rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
  rmdir conf$$.dir 2>/dev/null
@@ -29846,7 +30070,7 @@ $as_echo X"$as_dir" |
        test -d "$as_dir" && break
      done
      test -z "$as_dirs" || eval "mkdir $as_dirs"
-  } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
  
  
  } # as_fn_mkdir_p
@@ -29857,28 +30081,16 @@ else
    as_mkdir_p=false
  fi
  
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
-else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-       test -d "$1/.";
-      else
-       case $1 in #(
-       -*)set "./$1";;
-       esac;
-       case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
-       ???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
-fi
-as_executable_p=$as_test_x
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
  
  # Sed expression to map a string onto a valid CPP name.
  as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -29899,8 +30111,8 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
  # report actual input values of CONFIG_FILES etc. instead of their
  # values after options handling.
  ac_log="
-This file was extended by GNU MP $as_me 5.0.5, which was
-generated by GNU Autoconf 2.65.  Invocation command line was
+This file was extended by GNU MP $as_me 5.1.3, which was
+generated by GNU Autoconf 2.69.  Invocation command line was
  
    CONFIG_FILES    = $CONFIG_FILES
    CONFIG_HEADERS  = $CONFIG_HEADERS
@@ -29971,11 +30183,11 @@ _ACEOF
  cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
  ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
  ac_cs_version="\\
-GNU MP config.status 5.0.5
-configured by $0, generated by GNU Autoconf 2.65,
+GNU MP config.status 5.1.3
+configured by $0, generated by GNU Autoconf 2.69,
    with options \\"\$ac_cs_config\\"
  
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
  This config.status script is free software; the Free Software Foundation
  gives unlimited permission to copy, distribute and modify it."
  
@@ -29993,11 +30205,16 @@ ac_need_defaults=:
  while test $# != 0
  do
    case $1 in
-  --*=*)
+  --*=?*)
      ac_option=`expr "X$1" : 'X\([^=]*\)='`
      ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
      ac_shift=:
      ;;
+  --*=)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=
+    ac_shift=:
+    ;;
    *)
      ac_option=$1
      ac_optarg=$2
@@ -30019,6 +30236,7 @@ do
      $ac_shift
      case $ac_optarg in
      *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    '') as_fn_error $? "missing file argument" ;;
      esac
      as_fn_append CONFIG_FILES " '$ac_optarg'"
      ac_need_defaults=false;;
@@ -30031,7 +30249,7 @@ do
      ac_need_defaults=false;;
    --he | --h)
      # Conflict between --help and --header
-    as_fn_error "ambiguous option: \`$1'
+    as_fn_error $? "ambiguous option: \`$1'
  Try \`$0 --help' for more information.";;
    --help | --hel | -h )
      $as_echo "$ac_cs_usage"; exit ;;
@@ -30040,7 +30258,7 @@ Try \`$0 --help' for more information.";;
      ac_cs_silent=: ;;
  
    # This is an error.
-  -*) as_fn_error "unrecognized option: \`$1'
+  -*) as_fn_error $? "unrecognized option: \`$1'
  Try \`$0 --help' for more information." ;;
  
    *) as_fn_append ac_config_targets " $1"
@@ -30060,7 +30278,7 @@ fi
  _ACEOF
  cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
  if \$ac_cs_recheck; then
-  set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
    shift
    \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
    CONFIG_SHELL='$SHELL'
@@ -30563,17 +30781,16 @@ do
      "demos/pexpr-config.h") CONFIG_FILES="$CONFIG_FILES demos/pexpr-config.h:demos/pexpr-config-h.in" ;;
      "demos/calc/calc-config.h") CONFIG_FILES="$CONFIG_FILES demos/calc/calc-config.h:demos/calc/calc-config-h.in" ;;
      "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
-    "mpbsd/Makefile") CONFIG_FILES="$CONFIG_FILES mpbsd/Makefile" ;;
      "mpf/Makefile") CONFIG_FILES="$CONFIG_FILES mpf/Makefile" ;;
      "mpn/Makefile") CONFIG_FILES="$CONFIG_FILES mpn/Makefile" ;;
      "mpq/Makefile") CONFIG_FILES="$CONFIG_FILES mpq/Makefile" ;;
      "mpz/Makefile") CONFIG_FILES="$CONFIG_FILES mpz/Makefile" ;;
      "printf/Makefile") CONFIG_FILES="$CONFIG_FILES printf/Makefile" ;;
      "scanf/Makefile") CONFIG_FILES="$CONFIG_FILES scanf/Makefile" ;;
+    "rand/Makefile") CONFIG_FILES="$CONFIG_FILES rand/Makefile" ;;
      "cxx/Makefile") CONFIG_FILES="$CONFIG_FILES cxx/Makefile" ;;
      "tests/Makefile") CONFIG_FILES="$CONFIG_FILES tests/Makefile" ;;
      "tests/devel/Makefile") CONFIG_FILES="$CONFIG_FILES tests/devel/Makefile" ;;
-    "tests/mpbsd/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpbsd/Makefile" ;;
      "tests/mpf/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpf/Makefile" ;;
      "tests/mpn/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpn/Makefile" ;;
      "tests/mpq/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpq/Makefile" ;;
@@ -30587,9 +30804,8 @@ do
      "demos/calc/Makefile") CONFIG_FILES="$CONFIG_FILES demos/calc/Makefile" ;;
      "demos/expr/Makefile") CONFIG_FILES="$CONFIG_FILES demos/expr/Makefile" ;;
      "gmp.h") CONFIG_FILES="$CONFIG_FILES gmp.h:gmp-h.in" ;;
-    "mp.h") CONFIG_FILES="$CONFIG_FILES mp.h:mp-h.in" ;;
  
-  *) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
    esac
  done
  
@@ -30613,9 +30829,10 @@ fi
  # after its creation but before its name has been assigned to `$tmp'.
  $debug ||
  {
-  tmp=
+  tmp= ac_tmp=
    trap 'exit_status=$?
-  { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+  : "${ac_tmp:=$tmp}"
+  { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
  ' 0
    trap 'as_fn_exit 1' 1 2 13 15
  }
@@ -30623,12 +30840,13 @@ $debug ||
  
  {
    tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
-  test -n "$tmp" && test -d "$tmp"
+  test -d "$tmp"
  }  ||
  {
    tmp=./conf$$-$RANDOM
    (umask 077 && mkdir "$tmp")
-} || as_fn_error "cannot create a temporary directory in ." "$LINENO" 5
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
  
  # Set up the scripts for CONFIG_FILES section.
  # No need to generate them if there are no CONFIG_FILES.
@@ -30645,12 +30863,12 @@ if test "x$ac_cr" = x; then
  fi
  ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
  if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
-  ac_cs_awk_cr='\r'
+  ac_cs_awk_cr='\\r'
  else
    ac_cs_awk_cr=$ac_cr
  fi
  
-echo 'BEGIN {' >"$tmp/subs1.awk" &&
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
  _ACEOF
  
  
@@ -30659,18 +30877,18 @@ _ACEOF
    echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
    echo "_ACEOF"
  } >conf$$subs.sh ||
-  as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
-ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'`
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
  ac_delim='%!_!# '
  for ac_last_try in false false false false false :; do
    . ./conf$$subs.sh ||
-    as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
  
    ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
    if test $ac_delim_n = $ac_delim_num; then
      break
    elif $ac_last_try; then
-    as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
    else
      ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
    fi
@@ -30678,7 +30896,7 @@ done
  rm -f conf$$subs.sh
  
  cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-cat >>"\$tmp/subs1.awk" <<\\_ACAWK &&
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
  _ACEOF
  sed -n '
  h
@@ -30726,7 +30944,7 @@ t delim
  rm -f conf$$subs.awk
  cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
  _ACAWK
-cat >>"\$tmp/subs1.awk" <<_ACAWK &&
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
    for (key in S) S_is_set[key] = 1
    FS = "\a"
  
@@ -30758,21 +30976,29 @@ if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
    sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
  else
    cat
-fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \
-  || as_fn_error "could not setup config files machinery" "$LINENO" 5
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+  || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
  _ACEOF
  
-# VPATH may cause trouble with some makes, so we remove $(srcdir),
-# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
  # trailing colons and then remove the whole line if VPATH becomes empty
  # (actually we leave an empty line to preserve line numbers).
  if test "x$srcdir" = x.; then
-  ac_vpsub='/^[         ]*VPATH[        ]*=/{
-s/:*\$(srcdir):*/:/
-s/:*\${srcdir}:*/:/
-s/:*@srcdir@:*/:/
-s/^\([^=]*=[    ]*\):*/\1/
+  ac_vpsub='/^[         ]*VPATH[        ]*=[    ]*/{
+h
+s///
+s/^/:/
+s/[     ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
  s/:*$//
+x
+s/\(=[  ]*\).*/\1/
+G
+s/\n//
  s/^[^=]*=[      ]*$//
  }'
  fi
@@ -30784,7 +31010,7 @@ fi # test -n "$CONFIG_FILES"
  # No need to generate them if there are no CONFIG_HEADERS.
  # This happens for instance with `./config.status Makefile'.
  if test -n "$CONFIG_HEADERS"; then
-cat >"$tmp/defines.awk" <<\_ACAWK ||
+cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
  BEGIN {
  _ACEOF
  
@@ -30796,11 +31022,11 @@ _ACEOF
  # handling of long lines.
  ac_delim='%!_!# '
  for ac_last_try in false false :; do
-  ac_t=`sed -n "/$ac_delim/p" confdefs.h`
-  if test -z "$ac_t"; then
+  ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_tt"; then
      break
    elif $ac_last_try; then
-    as_fn_error "could not make $CONFIG_HEADERS" "$LINENO" 5
+    as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
    else
      ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
    fi
@@ -30885,7 +31111,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
  _ACAWK
  _ACEOF
  cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-  as_fn_error "could not setup config headers machinery" "$LINENO" 5
+  as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
  fi # test -n "$CONFIG_HEADERS"
  
  
@@ -30898,7 +31124,7 @@ do
    esac
    case $ac_mode$ac_tag in
    :[FHL]*:*);;
-  :L* | :C*:*) as_fn_error "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
    :[FH]-) ac_tag=-:-;;
    :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
    esac
@@ -30917,7 +31143,7 @@ do
      for ac_f
      do
        case $ac_f in
-      -) ac_f="$tmp/stdin";;
+      -) ac_f="$ac_tmp/stdin";;
        *) # Look for the file first in the build tree, then in the source tree
          # (if the path is not absolute).  The absolute path cannot be DOS-style,
          # because $ac_f cannot contain `:'.
@@ -30926,7 +31152,7 @@ do
            [\\/$]*) false;;
            *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
            esac ||
-          as_fn_error "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+          as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
        esac
        case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
        as_fn_append ac_file_inputs " '$ac_f'"
@@ -30952,8 +31178,8 @@ $as_echo "$as_me: creating $ac_file" >&6;}
      esac
  
      case $ac_tag in
-    *:-:* | *:-) cat >"$tmp/stdin" \
-      || as_fn_error "could not create $ac_file" "$LINENO" 5 ;;
+    *:-:* | *:-) cat >"$ac_tmp/stdin" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
      esac
      ;;
    esac
@@ -31089,23 +31315,24 @@ s&@INSTALL@&$ac_INSTALL&;t t
  s&@MKDIR_P@&$ac_MKDIR_P&;t t
  $ac_datarootdir_hack
  "
-eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \
-  || as_fn_error "could not create $ac_file" "$LINENO" 5
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+  >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
  
  test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
-  { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
-  { ac_out=`sed -n '/^[         ]*datarootdir[  ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[         ]*datarootdir[  ]*:*=/p' \
+      "$ac_tmp/out"`; test -z "$ac_out"; } &&
    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined.  Please make sure it is defined." >&5
+which seems to be undefined.  Please make sure it is defined" >&5
  $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined.  Please make sure it is defined." >&2;}
+which seems to be undefined.  Please make sure it is defined" >&2;}
  
-  rm -f "$tmp/stdin"
+  rm -f "$ac_tmp/stdin"
    case $ac_file in
-  -) cat "$tmp/out" && rm -f "$tmp/out";;
-  *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";;
+  -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+  *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
    esac \
-  || as_fn_error "could not create $ac_file" "$LINENO" 5
+  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
   ;;
    :H)
    #
@@ -31114,21 +31341,21 @@ which seems to be undefined.  Please make sure it is defined." >&2;}
    if test x"$ac_file" != x-; then
      {
        $as_echo "/* $configure_input  */" \
-      && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs"
-    } >"$tmp/config.h" \
-      || as_fn_error "could not create $ac_file" "$LINENO" 5
-    if diff "$ac_file" "$tmp/config.h" >/dev/null 2>&1; then
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
+    } >"$ac_tmp/config.h" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
        { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
  $as_echo "$as_me: $ac_file is unchanged" >&6;}
      else
        rm -f "$ac_file"
-      mv "$tmp/config.h" "$ac_file" \
-       || as_fn_error "could not create $ac_file" "$LINENO" 5
+      mv "$ac_tmp/config.h" "$ac_file" \
+       || as_fn_error $? "could not create $ac_file" "$LINENO" 5
      fi
    else
      $as_echo "/* $configure_input  */" \
-      && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs" \
-      || as_fn_error "could not create -" "$LINENO" 5
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
+      || as_fn_error $? "could not create -" "$LINENO" 5
    fi
  # Compute "$ac_file"'s index in $config_headers.
  _am_arg="$ac_file"
@@ -31182,19 +31409,19 @@ $as_echo X"$_am_arg" |
  $as_echo "$as_me: linking $ac_source to $ac_file" >&6;}
  
      if test ! -r "$ac_source"; then
-      as_fn_error "$ac_source: file not found" "$LINENO" 5
+      as_fn_error $? "$ac_source: file not found" "$LINENO" 5
      fi
      rm -f "$ac_file"
  
      # Try a relative symlink, then a hard link, then a copy.
-    case $srcdir in
+    case $ac_source in
      [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;;
         *) ac_rel_source=$ac_top_build_prefix$ac_source ;;
      esac
      ln -s "$ac_rel_source" "$ac_file" 2>/dev/null ||
        ln "$ac_source" "$ac_file" 2>/dev/null ||
        cp -p "$ac_source" "$ac_file" ||
-      as_fn_error "cannot link or copy $ac_source to $ac_file" "$LINENO" 5
+      as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5
    fi
   ;;
    :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
@@ -32167,7 +32394,7 @@ _ACEOF
  ac_clean_files=$ac_clean_files_save
  
  test $ac_write_fail = 0 ||
-  as_fn_error "write failure creating $CONFIG_STATUS" "$LINENO" 5
+  as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
  
  
  # configure is writing to config.log, and then calls config.status.
@@ -32188,10 +32415,45 @@ if test "$no_create" != yes; then
    exec 5>>config.log
    # Use ||, not &&, to avoid exiting from the if with $? = 1, which
    # would make configure fail if this is the last instruction.
-  $ac_cs_success || as_fn_exit $?
+  $ac_cs_success || as_fn_exit 1
  fi
  if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
  $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
  fi
  
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: summary of build options:
+
+  Version:           ${PACKAGE_STRING}
+  Host type:         ${host}
+  ABI:               ${ABI}
+  Install prefix:    ${prefix}
+  Compiler:          ${CC}
+  Static libraries:  ${enable_static}
+  Shared libraries:  ${enable_shared}
+" >&5
+$as_echo "$as_me: summary of build options:
+
+  Version:           ${PACKAGE_STRING}
+  Host type:         ${host}
+  ABI:               ${ABI}
+  Install prefix:    ${prefix}
+  Compiler:          ${CC}
+  Static libraries:  ${enable_static}
+  Shared libraries:  ${enable_shared}
+" >&6;}
+
+if test x$cross_compiling = xyes ; then
+   case "$host" in
+     *-*-mingw* | *-*-cygwin)
+     if test x$ABI = x64 ; then
+       { $as_echo "$as_me:${as_lineno-$LINENO}: If wine64 is installed, use make check TESTS_ENVIRONMENT=wine64." >&5
+$as_echo "$as_me: If wine64 is installed, use make check TESTS_ENVIRONMENT=wine64." >&6;}
+     else
+       { $as_echo "$as_me:${as_lineno-$LINENO}: If wine is installed, use make check TESTS_ENVIRONMENT=wine." >&5
+$as_echo "$as_me: If wine is installed, use make check TESTS_ENVIRONMENT=wine." >&6;}
+     fi
+     ;;
+   esac
+fi
diff --git a/configure.ac b/configure.ac

new file mode 100644 (file)

index 0000000..2e559b1
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,3697 @@
+dnl  Process this file with autoconf to produce a configure script.
+
+
+define(GMP_COPYRIGHT,[[
+
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+]])
+
+AC_COPYRIGHT(GMP_COPYRIGHT)
+AH_TOP(/*GMP_COPYRIGHT*/)
+
+AC_REVISION($Revision$)
+AC_PREREQ(2.59)
+AC_INIT(GNU MP, GMP_VERSION, [gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html], gmp)
+AC_CONFIG_SRCDIR(gmp-impl.h)
+m4_pattern_forbid([^[ \t]*GMP_])
+m4_pattern_allow(GMP_LDFLAGS)
+m4_pattern_allow(GMP_LIMB_BITS)
+m4_pattern_allow(GMP_MPARAM_H_SUGGEST)
+m4_pattern_allow(GMP_NAIL_BITS)
+m4_pattern_allow(GMP_NUMB_BITS)
+m4_pattern_allow(GMP_NONSTD_ABI)
+
+# If --target is not used then $target_alias is empty, but if say
+# "./configure athlon-pc-freebsd3.5" is used, then all three of
+# $build_alias, $host_alias and $target_alias are set to
+# "athlon-pc-freebsd3.5".
+#
+if test -n "$target_alias" && test "$target_alias" != "$host_alias"; then
+  AC_MSG_ERROR([--target is not appropriate for GMP
+Use --build=CPU-VENDOR-OS if you need to specify your CPU and/or system
+explicitly.  Use --host if cross-compiling (see "Installing GMP" in the
+manual for more on this).])
+fi
+
+GMP_INIT(config.m4)
+
+AC_CANONICAL_HOST
+
+dnl  Automake "no-dependencies" is used because include file dependencies
+dnl  are not useful to us.  Pretty much everything depends just on gmp.h,
+dnl  gmp-impl.h and longlong.h, and yet only rarely does everything need to
+dnl  be rebuilt for changes to those files.
+dnl
+dnl  "no-dependencies" also helps with the way we're setup to run
+dnl  AC_PROG_CXX only conditionally.  If dependencies are used then recent
+dnl  automake (eg 1.7.2) appends an AM_CONDITIONAL to AC_PROG_CXX, and then
+dnl  gets upset if it's not actually executed.
+dnl
+dnl  Note that there's a copy of these options in the top-level Makefile.am,
+dnl  so update there too if changing anything.
+dnl
+AM_INIT_AUTOMAKE([1.8 gnu no-dependencies])
+AC_CONFIG_HEADERS(config.h:config.in)
+AM_MAINTAINER_MODE
+
+
+AC_ARG_ENABLE(assert,
+AC_HELP_STRING([--enable-assert],[enable ASSERT checking [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-assert, need yes or no]) ;;
+esac],
+[enable_assert=no])
+
+if test "$enable_assert" = "yes"; then
+  AC_DEFINE(WANT_ASSERT,1,
+  [Define to 1 to enable ASSERT checking, per --enable-assert])
+  want_assert_01=1
+else
+  want_assert_01=0
+fi
+GMP_DEFINE_RAW(["define(<WANT_ASSERT>,$want_assert_01)"])
+
+
+AC_ARG_ENABLE(alloca,
+AC_HELP_STRING([--enable-alloca],[how to get temp memory [[default=reentrant]]]),
+[case $enableval in
+alloca|malloc-reentrant|malloc-notreentrant) ;;
+yes|no|reentrant|notreentrant) ;;
+debug) ;;
+*)
+  AC_MSG_ERROR([bad value $enableval for --enable-alloca, need one of:
+yes no reentrant notreentrant alloca malloc-reentrant malloc-notreentrant debug]) ;;
+esac],
+[enable_alloca=reentrant])
+
+
+# IMPROVE ME: The default for C++ is disabled.  The tests currently
+# performed below for a working C++ compiler are not particularly strong,
+# and in general can't be expected to get the right setup on their own.  The
+# most significant problem is getting the ABI the same.  Defaulting CXXFLAGS
+# to CFLAGS takes only a small step towards this.  It's also probably worth
+# worrying whether the C and C++ runtimes from say gcc and a vendor C++ can
+# work together.  Some rather broken C++ installations were encountered
+# during testing, and though such things clearly aren't GMP's problem, if
+# --enable-cxx=detect were to be the default then some careful checks of
+# which, if any, C++ compiler on the system is up to scratch would be
+# wanted.
+#
+AC_ARG_ENABLE(cxx,
+AC_HELP_STRING([--enable-cxx],[enable C++ support [[default=no]]]),
+[case $enableval in
+yes|no|detect) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-cxx, need yes/no/detect]) ;;
+esac],
+[enable_cxx=no])
+
+
+AC_ARG_ENABLE(assembly,
+AC_HELP_STRING([--enable-assembly],[enable the use of assembly loops [[default=yes]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-assembly, need yes or no]) ;;
+esac],
+[enable_assembly=yes])
+
+if test "$enable_assembly" = "yes"; then
+  AC_DEFINE(WANT_ASSEMBLY,1,
+  [Defined to 1 as per --enable-assembly])
+fi
+
+
+AC_ARG_ENABLE(fft,
+AC_HELP_STRING([--enable-fft],[enable FFTs for multiplication [[default=yes]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-fft, need yes or no]) ;;
+esac],
+[enable_fft=yes])
+
+if test "$enable_fft" = "yes"; then
+  AC_DEFINE(WANT_FFT,1,
+  [Define to 1 to enable FFTs for multiplication, per --enable-fft])
+fi
+
+
+AC_ARG_ENABLE(old-fft-full,
+AC_HELP_STRING([--enable-old-fft-full],[enable old mpn_mul_fft_full for multiplication [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-old-fft-full, need yes or no]) ;;
+esac],
+[enable_old_fft_full=no])
+
+if test "$enable_old_fft_full" = "yes"; then
+  AC_DEFINE(WANT_OLD_FFT_FULL,1,
+  [Define to 1 to enable old mpn_mul_fft_full for multiplication, per --enable-old-fft-full])
+fi
+
+
+AC_ARG_ENABLE(nails,
+AC_HELP_STRING([--enable-nails],[use nails on limbs [[default=no]]]),
+[case $enableval in
+[yes|no|[02468]|[0-9][02468]]) ;;
+[*[13579]])
+  AC_MSG_ERROR([bad value $enableval for --enable-nails, only even nail sizes supported]) ;;
+*)
+  AC_MSG_ERROR([bad value $enableval for --enable-nails, need yes/no/number]) ;;
+esac],
+[enable_nails=no])
+
+case $enable_nails in
+yes) GMP_NAIL_BITS=2 ;;
+no)  GMP_NAIL_BITS=0 ;;
+*)   GMP_NAIL_BITS=$enable_nails ;;
+esac
+AC_SUBST(GMP_NAIL_BITS)
+
+
+AC_ARG_ENABLE(profiling,
+AC_HELP_STRING([--enable-profiling],
+               [build with profiler support [[default=no]]]),
+[case $enableval in
+no|prof|gprof|instrument) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-profiling, need no/prof/gprof/instrument]) ;;
+esac],
+[enable_profiling=no])
+
+case $enable_profiling in
+  prof)
+    AC_DEFINE(WANT_PROFILING_PROF, 1,
+              [Define to 1 if --enable-profiling=prof])
+    ;;
+  gprof)
+    AC_DEFINE(WANT_PROFILING_GPROF, 1,
+              [Define to 1 if --enable-profiling=gprof])
+    ;;
+  instrument)
+    AC_DEFINE(WANT_PROFILING_INSTRUMENT, 1,
+              [Define to 1 if --enable-profiling=instrument])
+    ;;
+esac
+
+GMP_DEFINE_RAW(["define(<WANT_PROFILING>,<\`$enable_profiling'>)"])
+
+# -fomit-frame-pointer is incompatible with -pg on some chips
+if test "$enable_profiling" = gprof; then
+  fomit_frame_pointer=
+else
+  fomit_frame_pointer="-fomit-frame-pointer"
+fi
+
+
+AC_ARG_WITH(readline,
+AC_HELP_STRING([--with-readline],
+               [readline support in calc demo program [[default=detect]]]),
+[case $withval in
+yes|no|detect) ;;
+*) AC_MSG_ERROR([bad value $withval for --with-readline, need yes/no/detect]) ;;
+esac],
+[with_readline=detect])
+
+
+AC_ARG_ENABLE(fat,
+AC_HELP_STRING([--enable-fat],
+               [build a fat binary on systems that support it [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-fat, need yes or no]) ;;
+esac],
+[enable_fat=no])
+
+
+AC_ARG_ENABLE(minithres,
+AC_HELP_STRING([--enable-minithres],
+               [choose minimal thresholds for testing [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-minithres, need yes or no]) ;;
+esac],
+[enable_minithres=no])
+
+
+if test $enable_fat = yes && test $enable_assembly = no ; then
+  AC_MSG_ERROR([when doing a fat build, disabling assembly will not work])
+fi
+
+
+tmp_host=`echo $host_cpu | sed 's/\./_/'`
+AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_$tmp_host)
+GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_HOST_CPU_$tmp_host')", POST)
+
+dnl  The HAVE_HOST_CPU_ list here only needs to have entries for those which
+dnl  are going to be tested, not everything that can possibly be selected.
+dnl
+dnl  The HAVE_HOST_CPU_FAMILY_ list similarly, and note that the AC_DEFINEs
+dnl  for these are under the cpu specific setups below.
+
+AH_VERBATIM([HAVE_HOST_CPU_1],
+[/* Define one of these to 1 for the host CPU family.
+   If your CPU is not in any of these families, leave all undefined.
+   For an AMD64 chip, define "x86" in ABI=32, but not in ABI=64. */
+#undef HAVE_HOST_CPU_FAMILY_alpha
+#undef HAVE_HOST_CPU_FAMILY_m68k
+#undef HAVE_HOST_CPU_FAMILY_power
+#undef HAVE_HOST_CPU_FAMILY_powerpc
+#undef HAVE_HOST_CPU_FAMILY_x86
+#undef HAVE_HOST_CPU_FAMILY_x86_64
+
+/* Define one of the following to 1 for the host CPU, as per the output of
+   ./config.guess.  If your CPU is not listed here, leave all undefined.  */
+#undef HAVE_HOST_CPU_alphaev67
+#undef HAVE_HOST_CPU_alphaev68
+#undef HAVE_HOST_CPU_alphaev7
+#undef HAVE_HOST_CPU_m68020
+#undef HAVE_HOST_CPU_m68030
+#undef HAVE_HOST_CPU_m68040
+#undef HAVE_HOST_CPU_m68060
+#undef HAVE_HOST_CPU_m68360
+#undef HAVE_HOST_CPU_powerpc604
+#undef HAVE_HOST_CPU_powerpc604e
+#undef HAVE_HOST_CPU_powerpc750
+#undef HAVE_HOST_CPU_powerpc7400
+#undef HAVE_HOST_CPU_supersparc
+#undef HAVE_HOST_CPU_i386
+#undef HAVE_HOST_CPU_i586
+#undef HAVE_HOST_CPU_i686
+#undef HAVE_HOST_CPU_pentium
+#undef HAVE_HOST_CPU_pentiummmx
+#undef HAVE_HOST_CPU_pentiumpro
+#undef HAVE_HOST_CPU_pentium2
+#undef HAVE_HOST_CPU_pentium3
+#undef HAVE_HOST_CPU_s390_z900
+#undef HAVE_HOST_CPU_s390_z990
+#undef HAVE_HOST_CPU_s390_z9
+#undef HAVE_HOST_CPU_s390_z10
+#undef HAVE_HOST_CPU_s390_z196
+
+/* Define to 1 iff we have a s390 with 64-bit registers.  */
+#undef HAVE_HOST_CPU_s390_zarch])
+
+
+# Table of compilers, options, and mpn paths.  This code has various related
+# purposes
+#
+#   - better default CC/CFLAGS selections than autoconf otherwise gives
+#   - default CC/CFLAGS selections for extra CPU types specific to GMP
+#   - a few tests for known bad compilers
+#   - choice of ABIs on suitable systems
+#   - selection of corresponding mpn search path
+#
+# After GMP specific searches and tests, the standard autoconf AC_PROG_CC is
+# called.  User selections of CC etc are respected.
+#
+# Care is taken not to use macros like AC_TRY_COMPILE during the GMP
+# pre-testing, since they of course depend on AC_PROG_CC, and also some of
+# them cache their results, which is not wanted.
+#
+# The ABI selection mechanism is unique to GMP.  All that reaches autoconf
+# is a different selection of CC/CFLAGS according to the best ABI the system
+# supports, and/or what the user selects.  Naturally the mpn assembler code
+# selected is very dependent on the ABI.
+#
+# The closest the standard tools come to a notion of ABI is something like
+# "sparc64" which encodes a CPU and an ABI together.  This doesn't seem to
+# scale well for GMP, where exact CPU types like "ultrasparc2" are wanted,
+# separate from the ABI used on them.
+#
+#
+# The variables set here are
+#
+#   cclist              the compiler choices
+#   xx_cflags           flags for compiler xx
+#   xx_cflags_maybe     flags for compiler xx, if they work
+#   xx_cppflags         cpp flags for compiler xx
+#   xx_cflags_optlist   list of sets of optional flags
+#   xx_cflags_yyy       set yyy of optional flags for compiler xx
+#   xx_ldflags          -Wc,-foo flags for libtool linking with compiler xx
+#   ar_flags            extra flags for $AR
+#   nm_flags            extra flags for $NM
+#   limb                limb size, can be "longlong"
+#   path                mpn search path
+#   extra_functions     extra mpn functions
+#   fat_path            fat binary mpn search path [if fat binary desired]
+#   fat_functions       fat functions
+#   fat_thresholds      fat thresholds
+#
+# Suppose xx_cflags_optlist="arch", then flags from $xx_cflags_arch are
+# tried, and the first flag that works will be used.  An optlist like "arch
+# cpu optimize" can be used to get multiple independent sets of flags tried.
+# The first that works from each will be used.  If no flag in a set works
+# then nothing from that set is added.
+#
+# For multiple ABIs, the scheme extends as follows.
+#
+#   abilist               set of ABI choices
+#   cclist_aa             compiler choices in ABI aa
+#   xx_aa_cflags          flags for xx in ABI aa
+#   xx_aa_cflags_maybe    flags for xx in ABI aa, if they work
+#   xx_aa_cppflags        cpp flags for xx in ABI aa
+#   xx_aa_cflags_optlist  list of sets of optional flags in ABI aa
+#   xx_aa_cflags_yyy      set yyy of optional flags for compiler xx in ABI aa
+#   xx_aa_ldflags         -Wc,-foo flags for libtool linking
+#   ar_aa_flags           extra flags for $AR in ABI aa
+#   nm_aa_flags           extra flags for $NM in ABI aa
+#   limb_aa               limb size in ABI aa, can be "longlong"
+#   path_aa               mpn search path in ABI aa
+#   extra_functions_aa    extra mpn functions in ABI aa
+#
+# As a convenience, the unadorned xx_cflags (etc) are used for the last ABI
+# in ablist, if an xx_aa_cflags for that ABI isn't given.  For example if
+# abilist="64 32" then $cc_64_cflags will be used for the 64-bit ABI, but
+# for the 32-bit either $cc_32_cflags or $cc_cflags is used, whichever is
+# defined.  This makes it easy to add some 64-bit compilers and flags to an
+# unadorned 32-bit set.
+#
+# limb=longlong (or limb_aa=longlong) applies to all compilers within that
+# ABI.  It won't work to have some needing long long and some not, since a
+# single instantiated gmp.h will be used by both.
+#
+# SPEED_CYCLECOUNTER, cyclecounter_size and CALLING_CONVENTIONS_OBJS are
+# also set here, with an ABI suffix.
+#
+#
+#
+# A table-driven approach like this to mapping cpu type to good compiler
+# options is a bit of a maintenance burden, but there's not much uniformity
+# between options specifications on different compilers.  Some sort of
+# separately updatable tool might be cute.
+#
+# The use of lots of variables like this, direct and indirect, tends to
+# obscure when and how various things are done, but unfortunately it's
+# pretty much the only way.  If shell subroutines were portable then actual
+# code like "if this .. do that" could be written, but attempting the same
+# with full copies of GMP_PROG_CC_WORKS etc expanded at every point would
+# hugely bloat the output.
+
+
+AC_ARG_VAR(ABI, [desired ABI (for processors supporting more than one ABI)])
+
+# abilist needs to be non-empty, "standard" is just a generic name here
+abilist="standard"
+
+# FIXME: We'd like to prefer an ANSI compiler, perhaps by preferring
+# c89 over cc here.  But note that on HP-UX c89 provides a castrated
+# environment, and would want to be excluded somehow.  Maybe
+# AC_PROG_CC_STDC already does enough to stick cc into ANSI mode and
+# we don't need to worry.
+#
+cclist="gcc cc"
+
+gcc_cflags="-O2 -pedantic"
+gcc_64_cflags="-O2 -pedantic"
+cc_cflags="-O"
+cc_64_cflags="-O"
+
+SPEED_CYCLECOUNTER_OBJ=
+cyclecounter_size=2
+
+AC_SUBST(HAVE_HOST_CPU_FAMILY_power,  0)
+AC_SUBST(HAVE_HOST_CPU_FAMILY_powerpc,0)
+
+case $host in
+
+  a29k*-*-*)
+    path="a29k"
+    ;;
+
+
+  alpha*-*-*)
+    AC_DEFINE(HAVE_HOST_CPU_FAMILY_alpha)
+    case $host_cpu in
+      alphaev5* | alphapca5*)
+       path="alpha/ev5 alpha" ;;
+      alphaev67 | alphaev68 | alphaev7*)
+        path="alpha/ev67 alpha/ev6 alpha" ;;
+      alphaev6)
+       path="alpha/ev6 alpha" ;;
+      *)
+        path="alpha" ;;
+    esac
+    extra_functions="cntlz"
+    gcc_cflags_optlist="asm cpu oldas" # need asm ahead of cpu, see below
+    gcc_cflags_oldas="-Wa,-oldas"     # see GMP_GCC_WA_OLDAS.
+
+    # gcc 2.7.2.3 doesn't know any -mcpu= for alpha, apparently.
+    # gcc 2.95 knows -mcpu= ev4, ev5, ev56, pca56, ev6.
+    # gcc 3.0 adds nothing.
+    # gcc 3.1 adds ev45, ev67 (but ev45 is the same as ev4).
+    # gcc 3.2 adds nothing.
+    #
+    # gcc version "2.9-gnupro-99r1" under "-O2 -mcpu=ev6" strikes internal
+    # compiler errors too easily and is rejected by GMP_PROG_CC_WORKS.  Each
+    # -mcpu=ev6 below has a fallback to -mcpu=ev56 for this reason.
+    #
+    case $host_cpu in
+      alpha)        gcc_cflags_cpu="-mcpu=ev4" ;;
+      alphaev5)     gcc_cflags_cpu="-mcpu=ev5" ;;
+      alphaev56)    gcc_cflags_cpu="-mcpu=ev56" ;;
+      alphapca56 | alphapca57)
+                    gcc_cflags_cpu="-mcpu=pca56" ;;
+      alphaev6)     gcc_cflags_cpu="-mcpu=ev6 -mcpu=ev56" ;;
+      alphaev67 | alphaev68 | alphaev7*)
+                    gcc_cflags_cpu="-mcpu=ev67 -mcpu=ev6 -mcpu=ev56" ;;
+    esac
+
+    # gcc version "2.9-gnupro-99r1" on alphaev68-dec-osf5.1 has been seen
+    # accepting -mcpu=ev6, but not putting the assembler in the right mode
+    # for what it produces.  We need to do this for it, and need to do it
+    # before testing the -mcpu options.
+    #
+    # On old versions of gcc, which don't know -mcpu=, we believe an
+    # explicit -Wa,-mev5 etc will be necessary to put the assembler in
+    # the right mode for our .asm files and longlong.h asm blocks.
+    #
+    # On newer versions of gcc, when -mcpu= is known, we must give a -Wa
+    # which is at least as high as the code gcc will generate.  gcc
+    # establishes what it needs with a ".arch" directive, our command line
+    # option seems to override that.
+    #
+    # gas prior to 2.14 doesn't accept -mev67, but -mev6 seems enough for
+    # ctlz and cttz (in 2.10.0 at least).
+    #
+    # OSF `as' accepts ev68 but stupidly treats it as ev4.  -arch only seems
+    # to affect insns like ldbu which are expanded as macros when necessary.
+    # Insns like ctlz which were never available as macros are always
+    # accepted and always generate their plain code.
+    #
+    case $host_cpu in
+      alpha)        gcc_cflags_asm="-Wa,-arch,ev4 -Wa,-mev4" ;;
+      alphaev5)     gcc_cflags_asm="-Wa,-arch,ev5 -Wa,-mev5" ;;
+      alphaev56)    gcc_cflags_asm="-Wa,-arch,ev56 -Wa,-mev56" ;;
+      alphapca56 | alphapca57)
+                    gcc_cflags_asm="-Wa,-arch,pca56 -Wa,-mpca56" ;;
+      alphaev6)     gcc_cflags_asm="-Wa,-arch,ev6 -Wa,-mev6" ;;
+      alphaev67 | alphaev68 | alphaev7*)
+                    gcc_cflags_asm="-Wa,-arch,ev67 -Wa,-mev67 -Wa,-arch,ev6 -Wa,-mev6" ;;
+    esac
+
+    # It might be better to ask "cc" whether it's Cray C or DEC C,
+    # instead of relying on the OS part of $host.  But it's hard to
+    # imagine either of those compilers anywhere except their native
+    # systems.
+    #
+    GMP_INCLUDE_MPN(alpha/alpha-defs.m4)
+    case $host in
+      *-cray-unicos*)
+        cc_cflags="-O"         # no -g, it silently disables all optimizations
+        GMP_INCLUDE_MPN(alpha/unicos.m4)
+        # Don't perform any assembly syntax tests on this beast.
+        gmp_asm_syntax_testing=no
+        ;;
+      *-*-osf*)
+        GMP_INCLUDE_MPN(alpha/default.m4)
+        cc_cflags=""
+        cc_cflags_optlist="opt cpu"
+
+        # not sure if -fast works on old versions, so make it optional
+       cc_cflags_opt="-fast -O2"
+
+       # DEC C V5.9-005 knows ev4, ev5, ev56, pca56, ev6.
+       # Compaq C V6.3-029 adds ev67.
+       #
+       case $host_cpu in
+         alpha)       cc_cflags_cpu="-arch~ev4~-tune~ev4" ;;
+         alphaev5)    cc_cflags_cpu="-arch~ev5~-tune~ev5" ;;
+         alphaev56)   cc_cflags_cpu="-arch~ev56~-tune~ev56" ;;
+         alphapca56 | alphapca57)
+            cc_cflags_cpu="-arch~pca56~-tune~pca56" ;;
+         alphaev6)    cc_cflags_cpu="-arch~ev6~-tune~ev6" ;;
+         alphaev67 | alphaev68 | alphaev7*)
+            cc_cflags_cpu="-arch~ev67~-tune~ev67 -arch~ev6~-tune~ev6" ;;
+       esac
+        ;;
+      *)
+        GMP_INCLUDE_MPN(alpha/default.m4)
+        ;;
+    esac
+
+    case $host in
+      *-*-unicos*)
+        # tune/alpha.asm assumes int==4bytes but unicos uses int==8bytes
+        ;;
+      *)
+        SPEED_CYCLECOUNTER_OBJ=alpha.lo
+        cyclecounter_size=1 ;;
+    esac
+    ;;
+
+
+  # Cray vector machines.
+  # This must come after alpha* so that we can recognize present and future
+  # vector processors with a wildcard.
+  *-cray-unicos*)
+    gmp_asm_syntax_testing=no
+    cclist="cc"
+    # We used to have -hscalar0 here as a workaround for miscompilation of
+    # mpz/import.c, but let's hope Cray fixes their bugs instead, since
+    # -hscalar0 causes disastrously poor code to be generated.
+    cc_cflags="-O3 -hnofastmd -htask0 -Wa,-B"
+    path="cray"
+    ;;
+
+
+  arm*-*-*)
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_cflags_optlist="arch tune"
+    gcc_cflags_maybe="-marm"
+    gcc_testlist="gcc-arm-umodsi"
+    GMP_INCLUDE_MPN(arm/arm-defs.m4)
+    CALLING_CONVENTIONS_OBJS='arm32call.lo arm32check.lo'
+
+    case $host_cpu in
+      armsa1 | armv4*)
+       path="arm"
+       gcc_cflags_arch="-march=armv4"
+       ;;
+      armxscale | arm926 | arm946 | arm966 | arm1026 | armv5*)
+       path="arm/v5 arm"
+       gcc_cflags_arch="-march=armv5"
+       ;;
+      arm11mpcore | arm1136 | arm1176 | armv6*)
+       path="arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv6"
+       ;;
+      arm1156)
+       path="arm/v6t2 arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv6t2"
+       ;;
+      armcortexa9)
+       path="arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv7-a"
+       gcc_cflags_tune="-mtune=cortex-a9"
+       ;;
+      armcortexa15)
+       path="arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv7-a"
+       gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
+       ;;
+      armcortexa5 | armcortexa8 | armv7a*)
+       path="arm/v6t2 arm/v6 arm/v5 arm"
+       gcc_cflags_arch="-march=armv7-a"
+       ;;
+      *)
+       path="arm"
+       ;;
+    esac
+    ;;
+
+
+  clipper*-*-*)
+    path="clipper"
+    ;;
+
+
+  # Fujitsu
+  [f30[01]-fujitsu-sysv*])
+    cclist="gcc vcc"
+    # FIXME: flags for vcc?
+    vcc_cflags="-g"
+    path="fujitsu"
+    ;;
+
+
+  hppa*-*-*)
+    # HP cc (the one sold separately) is K&R by default, but AM_C_PROTOTYPES
+    # will add "-Ae", or "-Aa -D_HPUX_SOURCE", to put it into ansi mode, if
+    # possible.
+    #
+    # gcc for hppa 2.0 can be built either for 2.0n (32-bit) or 2.0w
+    # (64-bit), but not both, so there's no option to choose the desired
+    # mode, we must instead detect which of the two it is.  This is done by
+    # checking sizeof(long), either 4 or 8 bytes respectively.  Do this in
+    # ABI=1.0 too, in case someone tries to build that with a 2.0w gcc.
+    #
+    gcc_cflags_optlist="arch"
+    gcc_testlist="sizeof-long-4"
+    SPEED_CYCLECOUNTER_OBJ=hppa.lo
+    cyclecounter_size=1
+
+    # FIXME: For hppa2.0*, path should be "pa32/hppa2_0 pa32/hppa1_1 pa32".
+    # (Can't remember why this isn't done already, have to check what .asm
+    # files are available in each and how they run on a typical 2.0 cpu.)
+    #
+    case $host_cpu in
+      hppa1.0*)    path="pa32" ;;
+      hppa7000*)   path="pa32/hppa1_1 pa32" ;;
+      hppa2.0* | hppa64)
+                   path="pa32/hppa2_0 pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
+      *)           # default to 7100
+                   path="pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
+    esac
+
+    # gcc 2.7.2.3 knows -mpa-risc-1-0 and -mpa-risc-1-1
+    # gcc 2.95 adds -mpa-risc-2-0, plus synonyms -march=1.0, 1.1 and 2.0
+    #
+    # We don't use -mpa-risc-2-0 in ABI=1.0 because 64-bit registers may not
+    # be saved by the kernel on an old system.  Actually gcc (as of 3.2)
+    # only adds a few float instructions with -mpa-risc-2-0, so it would
+    # probably be safe, but let's not take the chance.  In any case, a
+    # configuration like --host=hppa2.0 ABI=1.0 is far from optimal.
+    #
+    case $host_cpu in
+      hppa1.0*)           gcc_cflags_arch="-mpa-risc-1-0" ;;
+      *)                  # default to 7100
+                          gcc_cflags_arch="-mpa-risc-1-1" ;;
+    esac
+
+    case $host_cpu in
+      hppa1.0*)    cc_cflags="+O2" ;;
+      *)           # default to 7100
+                   cc_cflags="+DA1.1 +O2" ;;
+    esac
+
+    case $host in
+      hppa2.0*-*-* | hppa64-*-*)
+       cclist_20n="gcc cc"
+        abilist="2.0n 1.0"
+        path_20n="pa64"
+       limb_20n=longlong
+        any_20n_testlist="sizeof-long-4"
+        SPEED_CYCLECOUNTER_OBJ_20n=hppa2.lo
+        cyclecounter_size_20n=2
+
+        # -mpa-risc-2-0 is only an optional flag, in case an old gcc is
+        # used.  Assembler support for 2.0 is essential though, for our asm
+        # files.
+       gcc_20n_cflags="$gcc_cflags"
+       gcc_20n_cflags_optlist="arch"
+        gcc_20n_cflags_arch="-mpa-risc-2-0 -mpa-risc-1-1"
+        gcc_20n_testlist="sizeof-long-4 hppa-level-2.0"
+
+        cc_20n_cflags="+DA2.0 +e +O2 -Wl,+vnocompatwarnings"
+        cc_20n_testlist="hpc-hppa-2-0"
+
+       # ABI=2.0w is available for hppa2.0w and hppa2.0, but not for
+       # hppa2.0n, on the assumption that that the latter indicates a
+       # desire for ABI=2.0n.
+       case $host in
+        hppa2.0n-*-*) ;;
+        *)
+          # HPUX 10 and earlier cannot run 2.0w.  Not sure about other
+          # systems (GNU/Linux for instance), but lets assume they're ok.
+          case $host in
+            [*-*-hpux[1-9] | *-*-hpux[1-9].* | *-*-hpux10 | *-*-hpux10.*]) ;;
+           [*-*-linux*])  abilist="1.0" ;; # due to linux permanent kernel bug
+            *)    abilist="2.0w $abilist" ;;
+          esac
+
+          cclist_20w="gcc cc"
+         gcc_20w_cflags="$gcc_cflags -mpa-risc-2-0"
+          cc_20w_cflags="+DD64 +O2"
+          cc_20w_testlist="hpc-hppa-2-0"
+          path_20w="pa64"
+         any_20w_testlist="sizeof-long-8"
+          SPEED_CYCLECOUNTER_OBJ_20w=hppa2w.lo
+          cyclecounter_size_20w=2
+         ;;
+        esac
+        ;;
+    esac
+    ;;
+
+
+  i960*-*-*)
+    path="i960"
+    ;;
+
+
+  IA64_PATTERN)
+    abilist="64"
+    GMP_INCLUDE_MPN(ia64/ia64-defs.m4)
+    SPEED_CYCLECOUNTER_OBJ=ia64.lo
+    any_32_testlist="sizeof-long-4"
+
+    case $host_cpu in
+      itanium)   path="ia64/itanium  ia64" ;;
+      itanium2)  path="ia64/itanium2 ia64" ;;
+      *)         path="ia64" ;;
+    esac
+
+    gcc_64_cflags_optlist="tune"
+    gcc_32_cflags_optlist=$gcc_64_cflags_optlist
+
+    # gcc pre-release 3.4 adds -mtune itanium and itanium2
+    case $host_cpu in
+      itanium)   gcc_cflags_tune="-mtune=itanium" ;;
+      itanium2)  gcc_cflags_tune="-mtune=itanium2" ;;
+    esac
+
+    case $host in
+      *-*-linux*)
+       cclist="gcc icc"
+       icc_cflags="-no-gcc"
+       icc_cflags_optlist="opt"
+       # Don't use -O3, it is for "large data sets" and also miscompiles GMP.
+       # But icc miscompiles GMP at any optimization level, at higher levels
+       # it miscompiles more files...
+       icc_cflags_opt="-O2 -O1"
+       ;;
+
+      *-*-hpux*)
+        # HP cc sometimes gets internal errors if the optimization level is
+        # too high.  GMP_PROG_CC_WORKS detects this, the "_opt" fallbacks
+        # let us use whatever seems to work.
+        #
+        abilist="32 64"
+        any_64_testlist="sizeof-long-8"
+
+        cclist_32="gcc cc"
+        path_32="ia64"
+        cc_32_cflags=""
+        cc_32_cflags_optlist="opt"
+        cc_32_cflags_opt="+O3 +O2 +O1"
+        gcc_32_cflags="$gcc_cflags -milp32"
+        limb_32=longlong
+        SPEED_CYCLECOUNTER_OBJ_32=ia64.lo
+        cyclecounter_size_32=2
+
+        # Must have +DD64 in CPPFLAGS to get the right __LP64__ for headers,
+        # but also need it in CFLAGS for linking programs, since automake
+        # only uses CFLAGS when linking, not CPPFLAGS.
+        # FIXME: Maybe should use cc_64_ldflags for this, but that would
+        # need GMP_LDFLAGS used consistently by all the programs.
+        #
+        cc_64_cflags="+DD64"
+        cc_64_cppflags="+DD64"
+        cc_64_cflags_optlist="opt"
+        cc_64_cflags_opt="+O3 +O2 +O1"
+        gcc_64_cflags="$gcc_cflags -mlp64"
+        ;;
+    esac
+    ;;
+
+
+  # Motorola 68k
+  #
+  M68K_PATTERN)
+    AC_DEFINE(HAVE_HOST_CPU_FAMILY_m68k)
+    GMP_INCLUDE_MPN(m68k/m68k-defs.m4)
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_cflags_optlist="arch"
+
+    # gcc 2.7.2 knows -m68000, -m68020, -m68030, -m68040.
+    # gcc 2.95 adds -mcpu32, -m68060.
+    # FIXME: Maybe "-m68020 -mnobitfield" would suit cpu32 on 2.7.2.
+    #
+    case $host_cpu in
+    m68020)  gcc_cflags_arch="-m68020" ;;
+    m68030)  gcc_cflags_arch="-m68030" ;;
+    m68040)  gcc_cflags_arch="-m68040" ;;
+    m68060)  gcc_cflags_arch="-m68060 -m68000" ;;
+    m68360)  gcc_cflags_arch="-mcpu32 -m68000" ;;
+    *)       gcc_cflags_arch="-m68000" ;;
+    esac
+
+    # FIXME: m68k/mc68020 looks like it's ok for cpu32, but this wants to be
+    # tested.  Will need to introduce an m68k/cpu32 if m68k/mc68020 ever uses
+    # the bitfield instructions.
+    case $host_cpu in
+    [m680[234]0 | m68360])  path="m68k/mc68020 m68k" ;;
+    *)                      path="m68k" ;;
+    esac
+    ;;
+
+
+  # Motorola 88k
+  m88k*-*-*)
+    path="m88k"
+    ;;
+  m88110*-*-*)
+    gcc_cflags="$gcc_cflags -m88110"
+    path="m88k/mc88110 m88k"
+    ;;
+
+
+  # National Semiconductor 32k
+  ns32k*-*-*)
+    path="ns32k"
+    ;;
+
+
+  # IRIX 5 and earlier can only run 32-bit o32.
+  #
+  # IRIX 6 and up always has a 64-bit mips CPU can run n32 or 64.  n32 is
+  # preferred over 64, but only because that's been the default in past
+  # versions of GMP.  The two are equally efficient.
+  #
+  # Linux kernel 2.2.13 arch/mips/kernel/irixelf.c has a comment about not
+  # supporting n32 or 64.
+  #
+  # For reference, libtool (eg. 1.5.6) recognises the n32 ABI and knows the
+  # right options to use when linking (both cc and gcc), so no need for
+  # anything special from us.
+  #
+  mips*-*-*)
+    abilist="o32"
+    gcc_cflags_optlist="abi"
+    gcc_cflags_abi="-mabi=32"
+    gcc_testlist="gcc-mips-o32"
+    path="mips32"
+    cc_cflags="-O2 -o32"   # no -g, it disables all optimizations
+    # this suits both mips32 and mips64
+    GMP_INCLUDE_MPN(mips32/mips-defs.m4)
+
+    case $host in
+      [mips64*-*-* | mips*-*-irix[6789]*])
+        abilist="n32 64 o32"
+
+        cclist_n32="gcc cc"
+        gcc_n32_cflags="$gcc_cflags -mabi=n32"
+        cc_n32_cflags="-O2 -n32"       # no -g, it disables all optimizations
+        limb_n32=longlong
+        path_n32="mips64"
+
+        cclist_64="gcc cc"
+        gcc_64_cflags="$gcc_cflags -mabi=64"
+        gcc_64_ldflags="-Wc,-mabi=64"
+        cc_64_cflags="-O2 -64"         # no -g, it disables all optimizations
+        cc_64_ldflags="-Wc,-64"
+        path_64="mips64"
+        ;;
+    esac
+    ;;
+
+
+  # Darwin (powerpc-apple-darwin1.3) has it's hacked gcc installed as cc.
+  # Our usual "gcc in disguise" detection means gcc_cflags etc here gets
+  # used.
+  #
+  # The darwin pre-compiling preprocessor is disabled with -no-cpp-precomp
+  # since it doesn't like "__attribute__ ((mode (SI)))" etc in gmp-impl.h,
+  # and so always ends up running the plain preprocessor anyway.  This could
+  # be done in CPPFLAGS rather than CFLAGS, but there's not many places
+  # preprocessing is done separately, and this is only a speedup, the normal
+  # preprocessor gets run if there's any problems.
+  #
+  # We used to use -Wa,-mppc with gcc, but can't remember exactly why.
+  # Presumably it was for old versions of gcc where -mpowerpc doesn't put
+  # the assembler in the right mode.  In any case -Wa,-mppc is not good, for
+  # instance -mcpu=604 makes recent gcc use -m604 to get access to the
+  # "fsel" instruction, but a -Wa,-mppc overrides that, making code that
+  # comes out with fsel fail.
+  #
+  # (Note also that the darwin assembler doesn't accept "-mppc", so any
+  # -Wa,-mppc was used only if it worked.  The right flag on darwin would be
+  # "-arch ppc" or some such, but that's already the default.)
+  #
+  [powerpc*-*-* | power[3-9]-*-*])
+    AC_DEFINE(HAVE_HOST_CPU_FAMILY_powerpc)
+    HAVE_HOST_CPU_FAMILY_powerpc=1
+    abilist="32"
+    cclist="gcc cc"
+    cc_cflags="-O2"
+    gcc_32_cflags="$gcc_cflags -mpowerpc"
+    gcc_cflags_optlist="precomp subtype asm cpu"
+    gcc_cflags_precomp="-no-cpp-precomp"
+    gcc_cflags_subtype="-force_cpusubtype_ALL" # for vmx on darwin
+    gcc_cflags_asm=""
+    gcc_cflags_cpu=""
+    vmx_path=""
+
+    # grab this object, though it's not a true cycle counter routine
+    SPEED_CYCLECOUNTER_OBJ=powerpc.lo
+    cyclecounter_size=0
+
+    case $host_cpu in
+      powerpc740 | powerpc750)
+        path="powerpc32/750 powerpc32" ;;
+      powerpc7400 | powerpc7410)
+        path="powerpc32/vmx powerpc32/750 powerpc32" ;;
+      [powerpc74[45]?])
+        path="powerpc32/vmx powerpc32" ;;
+      *)
+        path="powerpc32" ;;
+    esac
+
+    case $host_cpu in
+      powerpc401)   gcc_cflags_cpu="-mcpu=401" ;;
+      powerpc403)   gcc_cflags_cpu="-mcpu=403"
+                   xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
+      powerpc405)   gcc_cflags_cpu="-mcpu=405" ;;
+      powerpc505)   gcc_cflags_cpu="-mcpu=505" ;;
+      powerpc601)   gcc_cflags_cpu="-mcpu=601"
+                   xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
+      powerpc602)   gcc_cflags_cpu="-mcpu=602"
+                   xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
+      powerpc603)   gcc_cflags_cpu="-mcpu=603"
+                   xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+      powerpc603e)  gcc_cflags_cpu="-mcpu=603e -mcpu=603"
+                   xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+      powerpc604)   gcc_cflags_cpu="-mcpu=604"
+                   xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+      powerpc604e)  gcc_cflags_cpu="-mcpu=604e -mcpu=604"
+                   xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+      powerpc620)   gcc_cflags_cpu="-mcpu=620" ;;
+      powerpc630)   gcc_cflags_cpu="-mcpu=630"
+                   xlc_cflags_arch="-qarch=pwr3"
+                   cpu_path="p3 p3-p7" ;;
+      powerpc740)   gcc_cflags_cpu="-mcpu=740" ;;
+      powerpc7400 | powerpc7410)
+                   gcc_cflags_asm="-Wa,-maltivec"
+                   gcc_cflags_cpu="-mcpu=7400 -mcpu=750" ;;
+      [powerpc74[45]?])
+                   gcc_cflags_asm="-Wa,-maltivec"
+                   gcc_cflags_cpu="-mcpu=7450" ;;
+      powerpc750)   gcc_cflags_cpu="-mcpu=750" ;;
+      powerpc801)   gcc_cflags_cpu="-mcpu=801" ;;
+      powerpc821)   gcc_cflags_cpu="-mcpu=821" ;;
+      powerpc823)   gcc_cflags_cpu="-mcpu=823" ;;
+      powerpc860)   gcc_cflags_cpu="-mcpu=860" ;;
+      powerpc970)   gcc_cflags_cpu="-mtune=970"
+                   xlc_cflags_arch="-qarch=970 -qarch=pwr3"
+                   vmx_path="powerpc64/vmx"
+                   cpu_path="p4 p3-p7" ;;
+      power4)      gcc_cflags_cpu="-mtune=power4"
+                   xlc_cflags_arch="-qarch=pwr4"
+                   cpu_path="p4 p3-p7" ;;
+      power5)      gcc_cflags_cpu="-mtune=power5 -mtune=power4"
+                   xlc_cflags_arch="-qarch=pwr5"
+                   cpu_path="p5 p4 p3-p7" ;;
+      power6)      gcc_cflags_cpu="-mtune=power6"
+                   xlc_cflags_arch="-qarch=pwr6"
+                   cpu_path="p6 p3-p7" ;;
+      power7)      gcc_cflags_cpu="-mtune=power7 -mtune=power5"
+                   xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
+                   cpu_path="p7 p5 p4 p3-p7" ;;
+    esac
+
+    case $host in
+      *-*-aix*)
+       cclist="gcc xlc cc"
+       gcc_32_cflags_maybe="-maix32"
+       xlc_cflags="-O2 -qmaxmem=20000"
+       xlc_cflags_optlist="arch"
+       xlc_32_cflags_maybe="-q32"
+       ar_32_flags="-X32"
+       nm_32_flags="-X32"
+    esac
+
+    case $host in
+      POWERPC64_PATTERN)
+       case $host in
+         *-*-aix*)
+           # On AIX a true 64-bit ABI is available.
+           # Need -Wc to pass object type flags through to the linker.
+           abilist="mode64 $abilist"
+           cclist_mode64="gcc xlc"
+           gcc_mode64_cflags="$gcc_cflags -maix64 -mpowerpc64"
+           gcc_mode64_cflags_optlist="cpu"
+           gcc_mode64_ldflags="-Wc,-maix64"
+           xlc_mode64_cflags="-O2 -q64 -qmaxmem=20000"
+           xlc_mode64_cflags_optlist="arch"
+           xlc_mode64_ldflags="-Wc,-q64"
+           # Must indicate object type to ar and nm
+           ar_mode64_flags="-X64"
+           nm_mode64_flags="-X64"
+           path_mode64=""
+           p=""
+           for i in $cpu_path
+             do path_mode64="${path_mode64}powerpc64/mode64/$i "
+                path_mode64="${path_mode64}powerpc64/$i "
+                p="${p} powerpc32/$i "
+             done
+           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           path="$p $path"
+           # grab this object, though it's not a true cycle counter routine
+           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+           cyclecounter_size_mode64=0
+           ;;
+         *-*-darwin*)
+           # On Darwin we can use 64-bit instructions with a longlong limb,
+           # but the chip still in 32-bit mode.
+           # In theory this can be used on any OS which knows how to save
+           # 64-bit registers in a context switch.
+           #
+           # Note that we must use -mpowerpc64 with gcc, since the
+           # longlong.h macros expect limb operands in a single 64-bit
+           # register, not two 32-bit registers as would be given for a
+           # long long without -mpowerpc64.  In theory we could detect and
+           # accommodate both styles, but the proper 64-bit registers will
+           # be fastest and are what we really want to use.
+           #
+           # One would think -mpowerpc64 would set the assembler in the right
+           # mode to handle 64-bit instructions.  But for that, also
+           # -force_cpusubtype_ALL is needed.
+           #
+           # Do not use -fast for Darwin, it actually adds options
+           # incompatible with a shared library.
+           #
+           abilist="mode64 mode32 $abilist"
+           gcc_32_cflags_maybe="-m32"
+           gcc_cflags_opt="-O3 -O2 -O1"        # will this become used?
+           cclist_mode32="gcc"
+           gcc_mode32_cflags_maybe="-m32"
+           gcc_mode32_cflags="-mpowerpc64"
+           gcc_mode32_cflags_optlist="subtype cpu opt"
+           gcc_mode32_cflags_subtype="-force_cpusubtype_ALL"
+           gcc_mode32_cflags_opt="-O3 -O2 -O1"
+           limb_mode32=longlong
+           cclist_mode64="gcc"
+           gcc_mode64_cflags="-m64"
+           gcc_mode64_cflags_optlist="cpu opt"
+           gcc_mode64_cflags_opt="-O3 -O2 -O1"
+           path_mode64=""
+           path_mode32=""
+           p=""
+           for i in $cpu_path
+             do path_mode64="${path_mode64}powerpc64/mode64/$i "
+                path_mode64="${path_mode64}powerpc64/$i "
+                path_mode32="${path_mode32}powerpc64/mode32/$i "
+                path_mode32="${path_mode32}powerpc64/$i "
+                p="${p} powerpc32/$i "
+             done
+           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
+           path="$p $path"
+           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+           cyclecounter_size_mode64=0
+           any_mode64_testlist="sizeof-long-8"
+           ;;
+         *-*-linux* | *-*-*bsd*)
+           # On GNU/Linux, assume the processor is in 64-bit mode.  Some
+           # environments have a gcc that is always in 64-bit mode, while
+           # others require -m64, hence the use of cflags_maybe.  The
+           # sizeof-long-8 test checks the mode is right (for the no option
+           # case).
+           #
+           # -mpowerpc64 is not used, since it should be the default in
+           # 64-bit mode.  (We need its effect for the various longlong.h
+           # asm macros to be right of course.)
+           #
+           # gcc64 was an early port of gcc to 64-bit mode, but should be
+           # obsolete before too long.  We prefer plain gcc when it knows
+           # 64-bits.
+           #
+           abilist="mode64 mode32 $abilist"
+           gcc_32_cflags_maybe="-m32"
+           cclist_mode32="gcc"
+           gcc_mode32_cflags_maybe="-m32"
+           gcc_mode32_cflags="-mpowerpc64"
+           gcc_mode32_cflags_optlist="cpu opt"
+           gcc_mode32_cflags_opt="-O3 -O2 -O1"
+           limb_mode32=longlong
+           cclist_mode64="gcc gcc64"
+           gcc_mode64_cflags_maybe="-m64"
+           gcc_mode64_cflags_optlist="cpu opt"
+           gcc_mode64_cflags_opt="-O3 -O2 -O1"
+           path_mode64=""
+           path_mode32=""
+           p=""
+           for i in $cpu_path
+             do path_mode64="${path_mode64}powerpc64/mode64/$i "
+                path_mode64="${path_mode64}powerpc64/$i "
+                path_mode32="${path_mode32}powerpc64/mode32/$i "
+                path_mode32="${path_mode32}powerpc64/$i "
+                p="${p} powerpc32/$i "
+             done
+           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+           path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
+           path="$p $path"
+           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+           cyclecounter_size_mode64=0
+           any_mode64_testlist="sizeof-long-8"
+           ;;
+       esac
+       ;;
+    esac
+    ;;
+
+
+  # POWER 32-bit
+  [power-*-* | power[12]-*-* | power2sc-*-*])
+    AC_DEFINE(HAVE_HOST_CPU_FAMILY_power)
+    HAVE_HOST_CPU_FAMILY_power=1
+    cclist="gcc"
+    extra_functions="udiv_w_sdiv"
+    path="power"
+
+    # gcc 2.7.2 knows rios1, rios2, rsc
+    #
+    # -mcpu=rios2 can tickle an AIX assembler bug (see GMP_PROG_CC_WORKS) so
+    # there needs to be a fallback to just -mpower.
+    #
+    gcc_cflags_optlist="cpu"
+    case $host in
+      power-*-*)    gcc_cflags_cpu="-mcpu=power -mpower" ;;
+      power1-*-*)   gcc_cflags_cpu="-mcpu=rios1 -mpower" ;;
+      power2-*-*)   gcc_cflags_cpu="-mcpu=rios2 -mpower" ;;
+      power2sc-*-*) gcc_cflags_cpu="-mcpu=rsc   -mpower" ;;
+    esac
+    case $host in
+    *-*-aix*)
+      cclist="gcc xlc"
+      xlc_cflags="-O2 -qarch=pwr -qmaxmem=20000"
+      ;;
+    esac
+    ;;
+
+
+  pyramid-*-*)
+    path="pyr"
+    ;;
+
+
+  # IBM System/390 and z/Architecture
+  S390_PATTERN | S390X_PATTERN)
+    abilist="32"
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_cflags_optlist="arch"
+    path="s390_32"
+    extra_functions="udiv_w_sdiv"
+    gcc_32_cflags_maybe="-m31"
+
+    case $host_cpu in
+      s390)
+       ;;
+      z900 | z900esa)
+        cpu="z900"
+        gccarch="$cpu"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      z990 | z990esa)
+        cpu="z990"
+        gccarch="$cpu"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      z9 | z9esa)
+        cpu="z9"
+       gccarch="z9-109"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      z10 | z10esa)
+        cpu="z10"
+       gccarch="z10"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      z196 | z196esa)
+        cpu="z196"
+       gccarch="z196"
+       path="s390_32/esame/$cpu s390_32/esame s390_32"
+       gcc_cflags_arch="-march=$gccarch"
+       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+       extra_functions=""
+        ;;
+      esac
+
+    case $host in
+      S390X_PATTERN)
+       abilist="64 32"
+       cclist_64="gcc"
+       gcc_64_cflags_optlist="arch"
+       gcc_64_cflags="$gcc_cflags -m64"
+       path_64="s390_64/$host_cpu s390_64"
+       extra_functions=""
+       ;;
+      esac
+    ;;
+
+
+  sh-*-*)   path="sh" ;;
+  [sh[2-4]-*-*])  path="sh/sh2 sh" ;;
+
+
+  *sparc*-*-*)
+    # sizeof(long)==4 or 8 is tested, to ensure we get the right ABI.  We've
+    # had various bug reports where users have set CFLAGS for their desired
+    # mode, but not set our ABI.  For some reason it's sparc where this
+    # keeps coming up, presumably users there are accustomed to driving the
+    # compiler mode that way.  The effect of our testlist setting is to
+    # reject ABI=64 in favour of ABI=32 if the user has forced the flags to
+    # 32-bit mode.
+    #
+    abilist="32"
+    cclist="gcc acc cc"
+    any_testlist="sizeof-long-4"
+    GMP_INCLUDE_MPN(sparc32/sparc-defs.m4)
+
+    case $host_cpu in
+      sparcv8 | microsparc | turbosparc)
+        path="sparc32/v8 sparc32" ;;
+      supersparc)
+        path="sparc32/v8/supersparc sparc32/v8 sparc32" ;;
+      [sparc64 | sparcv9* | ultrasparc | ultrasparc[234]*])
+        path="sparc32/v9 sparc32/v8 sparc32" ;;
+      [ultrasparct[12345]])
+        path="sparc32/ultrasparct1 sparc32/v8 sparc32" ;;
+      *)
+        path="sparc32" ;;
+    esac
+
+    # gcc 2.7.2 doesn't know about v9 and doesn't pass -xarch=v8plus to the
+    # assembler.  Add it explicitly since the solaris assembler won't accept
+    # our sparc32/v9 asm code without it.  gas accepts -xarch=v8plus too, so
+    # it can be in the cflags unconditionally (though gas doesn't need it).
+    #
+    # gcc -m32 is needed to force 32-bit mode on a dual-ABI system, but past
+    # gcc doesn't know that flag, hence cflags_maybe.  Note that -m32 cannot
+    # be done through the optlist since the plain cflags would be run first
+    # and we don't want to require the default mode (whatever it is) works.
+    #
+    # Note it's gcc_32_cflags_maybe and not gcc_cflags_maybe because the
+    # latter would be used in the 64-bit ABI on systems like "*bsd" where
+    # abilist="64" only.
+    #
+    case $host_cpu in
+      [ultrasparct[345]])
+        gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plusd" ;;
+      sparc64 | sparcv9* | ultrasparc*)
+        gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
+    esac
+    gcc_32_cflags_maybe="-m32"
+    gcc_cflags_optlist="cpu"
+
+    # gcc 2.7.2 knows -mcypress, -msupersparc, -mv8, -msparclite.
+    # gcc 2.95 knows -mcpu= v7, hypersparc, sparclite86x, f930, f934,
+    #   sparclet, tsc701, v9, ultrasparc.  A warning is given that the
+    #   plain -m forms will disappear.
+    # gcc 3.0 adds nothing.
+    # gcc 3.1 adds nothing.
+    # gcc 3.2 adds nothing.
+    # gcc 3.3 adds ultrasparc3.
+    #
+    case $host_cpu in
+      supersparc)           gcc_cflags_cpu="-mcpu=supersparc -msupersparc" ;;
+      sparcv8 | microsparc | turbosparc)
+                           gcc_cflags_cpu="-mcpu=v8 -mv8" ;;
+      sparc64 | sparcv9*)   gcc_cflags_cpu="-mcpu=v9 -mv8" ;;
+      ultrasparc3)          gcc_cflags_cpu="-mcpu=ultrasparc3 -mcpu=ultrasparc -mv8" ;;
+      ultrasparc*)          gcc_cflags_cpu="-mcpu=ultrasparc -mv8" ;;
+      *)                    gcc_cflags_cpu="-mcpu=v7 -mcypress" ;;
+    esac
+
+    # SunPRO cc and acc, and SunOS bundled cc
+    case $host in
+      *-*-solaris* | *-*-sunos*)
+       # Note no -g, it disables all optimizations.
+       cc_cflags=
+       cc_cflags_optlist="opt arch cpu"
+
+        # SunOS cc doesn't know -xO4, fallback to -O2.
+       cc_cflags_opt="-xO4 -O2"
+
+        # SunOS cc doesn't know -xarch, apparently always generating v7
+        # code, so make this optional
+       case $host_cpu in
+         sparcv8 | microsparc | supersparc | turbosparc)
+                                             cc_cflags_arch="-xarch=v8" ;;
+          [ultrasparct[345]])                 cc_cflags_arch="-xarch=v8plusd" ;;
+         sparc64 | sparcv9* | ultrasparc*)   cc_cflags_arch="-xarch=v8plus" ;;
+         *)                                  cc_cflags_arch="-xarch=v7" ;;
+       esac
+
+        # SunOS cc doesn't know -xchip and doesn't seem to have an equivalent.
+       # SunPRO cc 5 recognises -xchip=generic, old, super, super2, micro,
+       #   micro2, hyper, hyper2, powerup, ultra, ultra2, ultra2i.
+       # SunPRO cc 6 adds -xchip=ultra2e, ultra3cu.
+        #
+       # FIXME: Which of ultra, ultra2 or ultra2i is the best fallback for
+       # ultrasparc3?
+       #
+       case $host_cpu in
+         supersparc)   cc_cflags_cpu="-xchip=super" ;;
+         microsparc)   cc_cflags_cpu="-xchip=micro" ;;
+         turbosparc)   cc_cflags_cpu="-xchip=micro2" ;;
+         ultrasparc)   cc_cflags_cpu="-xchip=ultra" ;;
+         ultrasparc2)  cc_cflags_cpu="-xchip=ultra2" ;;
+         ultrasparc2i) cc_cflags_cpu="-xchip=ultra2i" ;;
+         ultrasparc3)  cc_cflags_cpu="-xchip=ultra3 -xchip=ultra" ;;
+         *)            cc_cflags_cpu="-xchip=generic" ;;
+       esac
+    esac
+
+    case $host_cpu in
+      sparc64 | sparcv9* | ultrasparc*)
+        case $host in
+          # Solaris 6 and earlier cannot run ABI=64 since it doesn't save
+          # registers properly, so ABI=32 is left as the only choice.
+          #
+          [*-*-solaris2.[0-6] | *-*-solaris2.[0-6].*]) ;;
+
+          # BSD sparc64 ports are 64-bit-only systems, so ABI=64 is the only
+          # choice.  In fact they need no special compiler flags, gcc -m64
+          # is the default, but it doesn't hurt to add it.  v9 CPUs always
+          # use the sparc64 port, since the plain 32-bit sparc ports don't
+          # run on a v9.
+          #
+          *-*-*bsd*) abilist="64" ;;
+
+          # For all other systems, we try both 64 and 32.
+          #
+          # GNU/Linux sparc64 has only recently gained a 64-bit user mode.
+          # In the past sparc64 meant a v9 cpu, but there were no 64-bit
+          # operations in user mode.  We assume that if "gcc -m64" works
+          # then the system is suitable.  Hopefully even if someone attempts
+          # to put a new gcc and/or glibc on an old system it won't run.
+          #
+          *) abilist="64 32" ;;
+        esac
+
+       case $host_cpu in
+         ultrasparc | ultrasparc2 | ultrasparc2i)
+           path_64="sparc64/ultrasparc1234 sparc64" ;;
+         [ultrasparc[34]])
+           path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
+         [ultrasparct[12345]])
+           path_64="sparc64/ultrasparct1 sparc64" ;;
+         *)
+           path_64="sparc64"
+       esac
+
+        cclist_64="gcc"
+        any_64_testlist="sizeof-long-8"
+
+        # gcc -mptr64 is probably implied by -m64, but we're not sure if
+        # this was always so.  On Solaris in the past we always used both
+        # "-m64 -mptr64".
+        #
+        # gcc -Wa,-xarch=v9 is thought to be necessary in some cases on
+        # solaris, but it would seem likely that if gcc is going to generate
+        # 64-bit code it will have to add that option itself where needed.
+        # An extra copy of this option should be harmless though, but leave
+        # it until we're sure.  (Might want -xarch=v9a or -xarch=v9b for the
+        # higher cpu types instead.)
+        #
+        gcc_64_cflags="$gcc_cflags -m64 -mptr64"
+        gcc_64_ldflags="-Wc,-m64"
+        gcc_64_cflags_optlist="cpu"
+
+        case $host in
+          *-*-solaris*)
+            # Sun cc.
+            #
+            # We used to have -fast and some fixup options here, but it
+            # recurrently caused problems with miscompilation.  Of course,
+            # -fast is documented as miscompiling things for the sake of speed.
+            #
+            cclist_64="$cclist_64 cc"
+            cc_64_cflags="-xO3 -xarch=v9"
+            cc_64_cflags_optlist="cpu"
+            ;;
+        esac
+
+        # using the v9 %tick register
+        SPEED_CYCLECOUNTER_OBJ_32=sparcv9.lo
+        SPEED_CYCLECOUNTER_OBJ_64=sparcv9.lo
+        cyclecounter_size_32=2
+        cyclecounter_size_64=2
+        ;;
+    esac
+    ;;
+
+
+  # VAX
+  vax*-*-*elf*)
+    # Use elf conventions (i.e., '%' register prefix, no global prefix)
+    #
+    GMP_INCLUDE_MPN(vax/elf.m4)
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    path="vax"
+    extra_functions="udiv_w_sdiv"
+    ;;
+  vax*-*-*)
+    # Default to aout conventions (i.e., no register prefix, '_' global prefix)
+    #
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    path="vax"
+    extra_functions="udiv_w_sdiv"
+    ;;
+
+
+  # AMD and Intel x86 configurations, including AMD64
+  #
+  # Rumour has it gcc -O2 used to give worse register allocation than just
+  # -O, but lets assume that's no longer true.
+  #
+  # -m32 forces 32-bit mode on a bi-arch 32/64 amd64 build of gcc.  -m64 is
+  # the default in such a build (we think), so -m32 is essential for ABI=32.
+  # This is, of course, done for any $host_cpu, not just x86_64, so we can
+  # get such a gcc into the right mode to cross-compile to say i486-*-*.
+  #
+  # -m32 is not available in gcc 2.95 and earlier, hence cflags_maybe to use
+  # it when it works.  We check sizeof(long)==4 to ensure we get the right
+  # mode, in case -m32 has failed not because it's an old gcc, but because
+  # it's a dual 32/64-bit gcc without a 32-bit libc, or whatever.
+  #
+  X86_PATTERN | X86_64_PATTERN)
+    abilist="32"
+    cclist="gcc icc cc"
+    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+    gcc_32_cflags_maybe="-m32"
+    icc_cflags="-no-gcc"
+    icc_cflags_optlist="opt"
+    icc_cflags_opt="-O3 -O2 -O1"
+    any_32_testlist="sizeof-long-4"
+    CALLING_CONVENTIONS_OBJS='x86call.lo x86check$U.lo'
+
+    # Availability of rdtsc is checked at run-time.
+    SPEED_CYCLECOUNTER_OBJ=pentium.lo
+
+    # gcc 2.7.2 only knows i386 and i486, using -m386 or -m486.  These
+    #     represent -mcpu= since -m486 doesn't generate 486 specific insns.
+    # gcc 2.95 adds k6, pentium and pentiumpro, and takes -march= and -mcpu=.
+    # gcc 3.0 adds athlon.
+    # gcc 3.1 adds k6-2, k6-3, pentium-mmx, pentium2, pentium3, pentium4,
+    #     athlon-tbird, athlon-4, athlon-xp, athlon-mp.
+    # gcc 3.2 adds winchip2.
+    # gcc 3.3 adds winchip-c6.
+    # gcc 3.3.1 from mandrake adds k8 and knows -mtune.
+    # gcc 3.4 adds c3, c3-2, k8, and deprecates -mcpu in favour of -mtune.
+    #
+    # In gcc 2.95.[0123], -march=pentiumpro provoked a stack slot bug in an
+    # old version of mpz/powm.c.  Seems to be fine with the current code, so
+    # no need for any restrictions on that option.
+    #
+    # -march=pentiumpro can fail if the assembler doesn't know "cmov"
+    # (eg. solaris 2.8 native "as"), so always have -march=pentium after
+    # that as a fallback.
+    #
+    # -march=pentium4 and -march=k8 enable SSE2 instructions, which may or
+    # may not be supported by the assembler and/or the OS, and is bad in gcc
+    # prior to 3.3.  The tests will reject these if no good, so fallbacks
+    # like "-march=pentium4 -mno-sse2" are given to try also without SSE2.
+    # Note the relevant -march types are listed in the optflags handling
+    # below, be sure to update there if adding new types emitting SSE2.
+    #
+    # -mtune is used at the start of each cpu option list to give something
+    # gcc 3.4 will use, thereby avoiding warnings from -mcpu.  -mcpu forms
+    # are retained for use by prior gcc.  For example pentium has
+    # "-mtune=pentium -mcpu=pentium ...", the -mtune is for 3.4 and the
+    # -mcpu for prior.  If there's a brand new choice in 3.4 for a chip,
+    # like k8 for x86_64, then it can be the -mtune at the start, no need to
+    # duplicate anything.
+    #
+    gcc_cflags_optlist="cpu arch"
+    case $host_cpu in
+      i386*)
+       gcc_cflags_cpu="-mtune=i386 -mcpu=i386 -m386"
+       gcc_cflags_arch="-march=i386"
+       path="x86"
+       ;;
+      i486*)
+       gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=i486"
+       path="x86/i486 x86"
+       ;;
+      i586 | pentium)
+       gcc_cflags_cpu="-mtune=pentium -mcpu=pentium -m486"
+       gcc_cflags_arch="-march=pentium"
+       path="x86/pentium x86"
+       ;;
+      pentiummmx)
+       gcc_cflags_cpu="-mtune=pentium-mmx -mcpu=pentium-mmx -mcpu=pentium -m486"
+       gcc_cflags_arch="-march=pentium-mmx -march=pentium"
+       path="x86/pentium/mmx x86/pentium x86"
+       ;;
+      i686 | pentiumpro)
+       gcc_cflags_cpu="-mtune=pentiumpro -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentiumpro -march=pentium"
+       path="x86/p6 x86"
+       ;;
+      pentium2)
+       gcc_cflags_cpu="-mtune=pentium2 -mcpu=pentium2 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium2 -march=pentiumpro -march=pentium"
+       path="x86/p6/mmx x86/p6 x86"
+       ;;
+      pentium3)
+       gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+       path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       ;;
+      pentiumm)
+       gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+       path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       ;;
+      k6)
+       gcc_cflags_cpu="-mtune=k6 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6"
+       path="x86/k6/mmx x86/k6 x86"
+       ;;
+      k62)
+       gcc_cflags_cpu="-mtune=k6-2 -mcpu=k6-2 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6-2 -march=k6"
+       path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+       ;;
+      k63)
+       gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6-3 -march=k6"
+       path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+       ;;
+      geode)
+       gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k6-3 -march=k6"
+       path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+       ;;
+      athlon)
+       # Athlon instruction costs are close to P6 (3 cycle load latency,
+       # 4-6 cycle mul, 40 cycle div, pairable adc, etc) so if gcc doesn't
+       # know athlon (eg. 2.95.2 doesn't) then fall back on pentiumpro.
+       gcc_cflags_cpu="-mtune=athlon -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=athlon -march=pentiumpro -march=pentium"
+       path="x86/k7/mmx x86/k7 x86"
+       ;;
+      i786 | pentium4)
+       # pentiumpro is the primary fallback when gcc doesn't know pentium4.
+       # This gets us cmov to eliminate branches.  Maybe "athlon" would be
+       # a possibility on gcc 3.0.
+       #
+       gcc_cflags_cpu="-mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=pentium4 -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium"
+       gcc_64_cflags_cpu="-mtune=nocona"
+       path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86"
+       path_64="x86_64/pentium4 x86_64"
+       ;;
+      viac32)
+       # Not sure of the best fallbacks here for -mcpu.
+       # c3-2 has sse and mmx, so pentium3 is good for -march.
+       gcc_cflags_cpu="-mtune=c3-2 -mcpu=c3-2 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=c3-2 -march=pentium3 -march=pentiumpro -march=pentium"
+       path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       ;;
+      viac3*)
+       # Not sure of the best fallbacks here.
+       gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
+       path="x86/pentium/mmx x86/pentium x86"
+       ;;
+      athlon64 | k8 | x86_64)
+       gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
+       path="x86/k8 x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/k8 x86_64"
+       ;;
+      k10)
+       gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
+       gcc_cflags_arch="-march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+       path="x86/k10 x86/k8 x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/k10 x86_64/k8 x86_64"
+       ;;
+      bobcat)
+       gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
+       gcc_cflags_arch="-march=btver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+       path="x86/bobcat x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/bobcat x86_64/k10 x86_64/k8 x86_64"
+       ;;
+      bulldozer | bd1)
+       gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
+       gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+       path="x86/bd1 x86/k7/mmx x86/k7 x86"
+       path_64="x86_64/bd1 x86_64/k10 x86_64/k8 x86_64"
+       ;;
+      core2)
+       gcc_cflags_cpu="-mtune=core2 -mtune=k8"
+       gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+       path="x86/core2 x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       path_64="x86_64/core2 x86_64"
+       ;;
+      corei | coreinhm | coreiwsm)
+       gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+       gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+       path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       path_64="x86_64/coreinhm x86_64/core2 x86_64"
+       ;;
+      coreisbr | coreihwl | coreibwl)
+       gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+       gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+       path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+       path_64="x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+       ;;
+      atom)
+       gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
+       gcc_cflags_arch="-march=atom -march=pentium3"
+       path="x86/atom/sse2 x86/atom/mmx x86/atom x86"
+       path_64="x86_64/atom x86_64"
+       ;;
+      nano)
+       gcc_cflags_cpu="-mtune=nano"
+       gcc_cflags_arch="-march=nano"
+       path="x86/nano x86"
+       path_64="x86_64/nano x86_64"
+       ;;
+      *)
+       gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
+       gcc_cflags_arch="-march=i486"
+       path="x86"
+       path_64="x86_64"
+       ;;
+    esac
+
+    case $host in
+      X86_64_PATTERN)
+       cclist_64="gcc"
+       gcc_64_cflags="$gcc_cflags -m64"
+       gcc_64_cflags_optlist="cpu arch"
+       CALLING_CONVENTIONS_OBJS_64='amd64call.lo amd64check$U.lo'
+       SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo
+       cyclecounter_size_64=2
+
+       cclist_x32="gcc"
+       gcc_x32_cflags="$gcc_cflags -mx32"
+       gcc_x32_cflags_optlist="$gcc_64_cflags_optlist"
+       CALLING_CONVENTIONS_OBJS_x32="$CALLING_CONVENTIONS_OBJS_64"
+       SPEED_CYCLECOUNTER_OBJ_x32="$SPEED_CYCLECOUNTER_OBJ_64"
+       cyclecounter_size_x32="$cyclecounter_size_64"
+       path_x32="$path_64"
+       limb_x32=longlong
+       any_x32_testlist="sizeof-long-4"
+
+       abilist="64 x32 32"
+       if test "$enable_assembly" = "yes" ; then
+           extra_functions_64="invert_limb_table"
+           extra_functions_x32=$extra_functions_64
+       fi
+
+       case $host in
+         *-*-solaris*)
+           # Sun cc.
+           cclist_64="$cclist_64 cc"
+           cc_64_cflags="-xO3 -m64"
+           ;;
+         *-*-mingw* | *-*-cygwin)
+           limb_64=longlong
+           CALLING_CONVENTIONS_OBJS_64=""
+           AC_DEFINE(HOST_DOS64,1,[Define to 1 for Windos/64])
+           GMP_NONSTD_ABI_64=DOS64
+           ;;
+       esac
+       ;;
+    esac
+    ;;
+
+
+  # FIXME: z8kx won't get through config.sub.  Could make 16 versus 32 bit
+  # limb an ABI option perhaps.
+  z8kx*-*-*)
+    path="z8000x"
+    extra_functions="udiv_w_sdiv"
+    ;;
+  z8k*-*-*)
+    path="z8000"
+    extra_functions="udiv_w_sdiv"
+    ;;
+
+
+  # Special CPU "none" used to select generic C, now this is obsolete.
+  none-*-*)
+    enable_assembly=no
+    AC_MSG_WARN([the \"none\" host is obsolete, use --disable-assembly])
+    ;;
+
+esac
+
+# mingw can be built by the cygwin gcc if -mno-cygwin is added.  For
+# convenience add this automatically if it works.  Actual mingw gcc accepts
+# -mno-cygwin too, but of course is the default.  mingw only runs on the
+# x86s, but allow any CPU here so as to catch "none" too.
+#
+case $host in
+  *-*-mingw*)
+    gcc_cflags_optlist="$gcc_cflags_optlist nocygwin"
+    gcc_cflags_nocygwin="-mno-cygwin"
+    ;;
+esac
+
+
+CFLAGS_or_unset=${CFLAGS-'(unset)'}
+CPPFLAGS_or_unset=${CPPFLAGS-'(unset)'}
+
+cat >&AC_FD_CC <<EOF
+User:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS_or_unset
+CPPFLAGS=$CPPFLAGS_or_unset
+MPN_PATH=$MPN_PATH
+GMP:
+abilist=$abilist
+cclist=$cclist
+EOF
+
+
+test_CFLAGS=${CFLAGS+set}
+test_CPPFLAGS=${CPPFLAGS+set}
+
+for abi in $abilist; do
+  abi_last="$abi"
+done
+
+# If the user specifies an ABI then it must be in $abilist, after that
+# $abilist is restricted to just that choice.
+#
+if test -n "$ABI"; then
+  found=no
+  for abi in $abilist; do
+    if test $abi = "$ABI"; then found=yes; break; fi
+  done
+  if test $found = no; then
+    AC_MSG_ERROR([ABI=$ABI is not among the following valid choices: $abilist])
+  fi
+  abilist="$ABI"
+fi
+
+found_compiler=no
+
+for abi in $abilist; do
+
+  echo "checking ABI=$abi"
+
+  # Suppose abilist="64 32", then for abi=64, will have abi1="_64" and
+  # abi2="_64".  For abi=32, will have abi1="_32" and abi2="".  This is how
+  # $gcc_cflags becomes a fallback for $gcc_32_cflags (the last in the
+  # abilist), but there's no fallback for $gcc_64_cflags.
+  #
+  abi1=[`echo _$abi | sed 's/[.]//g'`]
+  if test $abi = $abi_last; then abi2=; else abi2="$abi1"; fi
+
+  # Compiler choices under this ABI
+                              eval cclist_chosen=\"\$cclist$abi1\"
+  test -n "$cclist_chosen" || eval cclist_chosen=\"\$cclist$abi2\"
+
+  # If there's a user specified $CC then don't use a list for
+  # $cclist_chosen, just a single value for $ccbase.
+  #
+  if test -n "$CC"; then
+
+    # The first word of $CC, stripped of any directory.  For instance
+    # CC="/usr/local/bin/gcc -pipe" will give "gcc".
+    #
+    for ccbase in $CC; do break; done
+    ccbase=`echo $ccbase | sed 's:.*/::'`
+
+    # If this $ccbase is in $cclist_chosen then it's a compiler we know and
+    # we can do flags defaulting with it.  If not, then $cclist_chosen is
+    # set to "unrecognised" so no default flags are used.
+    #
+    # "unrecognised" is used to avoid bad effects with eval if $ccbase has
+    # non-symbol characters.  For instance ccbase=my+cc would end up with
+    # something like cflags="$my+cc_cflags" which would give
+    # cflags="+cc_cflags" rather than the intended empty string for an
+    # unknown compiler.
+    #
+    found=unrecognised
+    for i in $cclist_chosen; do
+      if test "$ccbase" = $i; then
+        found=$ccbase
+        break
+      fi
+    done
+    cclist_chosen=$found
+  fi
+
+  for ccbase in $cclist_chosen; do
+
+    # When cross compiling, look for a compiler with the $host_alias as a
+    # prefix, the same way that AC_CHECK_TOOL does.  But don't do this to a
+    # user-selected $CC.
+    #
+    # $cross_compiling will be yes/no/maybe at this point.  Do the host
+    # prefixing for "maybe" as well as "yes".
+    #
+    if test "$cross_compiling" != no && test -z "$CC"; then
+      cross_compiling_prefix="${host_alias}-"
+    fi
+
+    for ccprefix in $cross_compiling_prefix ""; do
+
+      cc="$CC"
+      test -n "$cc" || cc="$ccprefix$ccbase"
+
+      # If the compiler is gcc but installed under another name, then change
+      # $ccbase so as to use the flags we know for gcc.  This helps for
+      # instance when specifying CC=gcc272 on Debian GNU/Linux, or the
+      # native cc which is really gcc on NeXT or MacOS-X.
+      #
+      # FIXME: There's a slight misfeature here.  If cc is actually gcc but
+      # gcc is not a known compiler under this $abi then we'll end up
+      # testing it with no flags and it'll work, but chances are it won't be
+      # in the right mode for the ABI we desire.  Let's quietly hope this
+      # doesn't happen.
+      #
+      if test $ccbase != gcc; then
+        GMP_PROG_CC_IS_GNU($cc,ccbase=gcc)
+      fi
+
+      # Similarly if the compiler is IBM xlc but invoked as cc or whatever
+      # then change $ccbase and make the default xlc flags available.
+      if test $ccbase != xlc; then
+        GMP_PROG_CC_IS_XLC($cc,ccbase=xlc)
+      fi
+
+      # acc was Sun's first unbundled compiler back in the SunOS days, or
+      # something like that, but today its man page says it's not meant to
+      # be used directly (instead via /usr/ucb/cc).  The options are pretty
+      # much the same as the main SunPRO cc, so share those configs.
+      #
+      case $host in
+        *sparc*-*-solaris* | *sparc*-*-sunos*)
+          if test "$ccbase" = acc; then ccbase=cc; fi ;;
+      esac
+
+      for tmp_cflags_maybe in yes no; do
+                             eval cflags=\"\$${ccbase}${abi1}_cflags\"
+        test -n "$cflags" || eval cflags=\"\$${ccbase}${abi2}_cflags\"
+
+       if test "$tmp_cflags_maybe" = yes; then
+          # don't try cflags_maybe when the user set CFLAGS
+          if test "$test_CFLAGS" = set; then continue; fi
+                                     eval cflags_maybe=\"\$${ccbase}${abi1}_cflags_maybe\"
+          test -n "$cflags_maybe" || eval cflags_maybe=\"\$${ccbase}${abi2}_cflags_maybe\"
+          # don't try cflags_maybe if there's nothing set
+          if test -z "$cflags_maybe"; then continue; fi
+          cflags="$cflags_maybe $cflags"
+        fi
+
+        # Any user CFLAGS, even an empty string, takes precedence
+        if test "$test_CFLAGS" = set; then cflags=$CFLAGS; fi
+
+        # Any user CPPFLAGS, even an empty string, takes precedence
+                               eval cppflags=\"\$${ccbase}${abi1}_cppflags\"
+        test -n "$cppflags" || eval cppflags=\"\$${ccbase}${abi2}_cppflags\"
+        if test "$test_CPPFLAGS" = set; then cppflags=$CPPFLAGS; fi
+
+        # --enable-profiling adds -p/-pg even to user-specified CFLAGS.
+        # This is convenient, but it's perhaps a bit naughty to modify user
+        # CFLAGS.
+        case "$enable_profiling" in
+          prof)       cflags="$cflags -p" ;;
+          gprof)      cflags="$cflags -pg" ;;
+          instrument) cflags="$cflags -finstrument-functions" ;;
+        esac
+
+        GMP_PROG_CC_WORKS($cc $cflags $cppflags,,continue)
+
+        # If we're supposed to be using a "long long" for a limb, check that
+        # it works.
+                                  eval limb_chosen=\"\$limb$abi1\"
+        test -n "$limb_chosen" || eval limb_chosen=\"\$limb$abi2\"
+        if test "$limb_chosen" = longlong; then
+          GMP_PROG_CC_WORKS_LONGLONG($cc $cflags $cppflags,,continue)
+        fi
+
+        # The tests to perform on this $cc, if any
+                               eval testlist=\"\$${ccbase}${abi1}_testlist\"
+        test -n "$testlist" || eval testlist=\"\$${ccbase}${abi2}_testlist\"
+        test -n "$testlist" || eval testlist=\"\$any${abi1}_testlist\"
+        test -n "$testlist" || eval testlist=\"\$any${abi2}_testlist\"
+
+        testlist_pass=yes
+        for tst in $testlist; do
+          case $tst in
+          hpc-hppa-2-0)   GMP_HPC_HPPA_2_0($cc,,testlist_pass=no) ;;
+          gcc-arm-umodsi) GMP_GCC_ARM_UMODSI($cc,,testlist_pass=no) ;;
+          gcc-mips-o32)   GMP_GCC_MIPS_O32($cc,,testlist_pass=no) ;;
+          hppa-level-2.0) GMP_HPPA_LEVEL_20($cc $cflags,,testlist_pass=no) ;;
+          sizeof*)       GMP_C_TEST_SIZEOF($cc $cflags,$tst,,testlist_pass=no) ;;
+          esac
+          if test $testlist_pass = no; then break; fi
+        done
+
+        if test $testlist_pass = yes; then
+          found_compiler=yes
+          break
+        fi
+      done
+
+      if test $found_compiler = yes; then break; fi
+    done
+
+    if test $found_compiler = yes; then break; fi
+  done
+
+  if test $found_compiler = yes; then break; fi
+done
+
+
+# If we recognised the CPU, as indicated by $path being set, then insist
+# that we have a working compiler, either from our $cclist choices or from
+# $CC.  We can't let AC_PROG_CC look around for a compiler because it might
+# find one that we've rejected (for not supporting the modes our asm code
+# demands, etc).
+#
+# If we didn't recognise the CPU (and this includes host_cpu=none), then
+# fall through and let AC_PROG_CC look around for a compiler too.  This is
+# mostly in the interests of following a standard autoconf setup, after all
+# we've already tested cc and gcc adequately (hopefully).  As of autoconf
+# 2.50 the only thing AC_PROG_CC really adds is a check for "cl" (Microsoft
+# C on MS-DOS systems).
+#
+if test $found_compiler = no && test -n "$path"; then
+  AC_MSG_ERROR([could not find a working compiler, see config.log for details])
+fi
+
+case $host in
+  X86_PATTERN | X86_64_PATTERN)
+    # If the user asked for a fat build, override the path and flags set above
+    if test $enable_fat = yes; then
+      gcc_cflags_cpu=""
+      gcc_cflags_arch=""
+
+      fat_functions="add_n addmul_1 bdiv_dbm1c com copyd copyi dive_1 divrem_1
+                    gcd_1 lshift lshiftc mod_1 mod_1_1 mod_1_1_cps mod_1_2
+                    mod_1_2_cps mod_1_4 mod_1_4_cps mod_34lsub1 mode1o mul_1
+                    mul_basecase mullo_basecase pre_divrem_1 pre_mod_1 redc_1
+                    redc_2 rshift sqr_basecase sub_n submul_1"
+
+      if test "$abi" = 32; then
+       extra_functions="$extra_functions fat fat_entry"
+       path="x86/fat x86"
+       fat_path="x86 x86/fat x86/i486
+                 x86/k6 x86/k6/mmx x86/k6/k62mmx
+                 x86/k7 x86/k7/mmx
+                 x86/k8 x86/k10 x86/bobcat
+                 x86/pentium x86/pentium/mmx
+                 x86/p6 x86/p6/mmx x86/p6/p3mmx x86/p6/sse2
+                 x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2
+                 x86/core2 x86/coreinhm x86/coreisbr
+                 x86/atom x86/atom/mmx x86/atom/sse2 x86/nano"
+      fi
+
+      if test "$abi" = 64; then
+       gcc_64_cflags=""
+       extra_functions_64="$extra_functions_64 fat fat_entry"
+       path_64="x86_64/fat x86_64"
+       fat_path="x86_64 x86_64/fat
+                 x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat
+                 x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
+                 x86_64/atom x86_64/nano"
+       fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
+      fi
+
+      fat_thresholds="MUL_TOOM22_THRESHOLD MUL_TOOM33_THRESHOLD
+                     SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD
+                     BMOD_1_TO_MOD_1_THRESHOLD"
+    fi
+    ;;
+esac
+
+
+if test $found_compiler = yes; then
+
+  # If we're creating CFLAGS, then look for optional additions.  If the user
+  # set CFLAGS then leave it alone.
+  #
+  if test "$test_CFLAGS" != set; then
+                          eval optlist=\"\$${ccbase}${abi1}_cflags_optlist\"
+    test -n "$optlist" || eval optlist=\"\$${ccbase}${abi2}_cflags_optlist\"
+
+    for opt in $optlist; do
+                             eval optflags=\"\$${ccbase}${abi1}_cflags_${opt}\"
+      test -n "$optflags" || eval optflags=\"\$${ccbase}${abi2}_cflags_${opt}\"
+      test -n "$optflags" || eval optflags=\"\$${ccbase}_cflags_${opt}\"
+
+      for flag in $optflags; do
+
+       # ~ represents a space in an option spec
+        flag=`echo "$flag" | tr '~' ' '`
+
+        case $flag in
+          -march=pentium4 | -march=k8)
+            # For -march settings which enable SSE2 we exclude certain bad
+            # gcc versions and we need an OS knowing how to save xmm regs.
+            #
+            # This is only for ABI=32, any 64-bit gcc is good and any OS
+            # knowing x86_64 will know xmm.
+            #
+            # -march=k8 was only introduced in gcc 3.3, so we shouldn't need
+            # the GMP_GCC_PENTIUM4_SSE2 check (for gcc 3.2 and prior).  But
+            # it doesn't hurt to run it anyway, sharing code with the
+            # pentium4 case.
+            #
+            if test "$abi" = 32; then
+              GMP_GCC_PENTIUM4_SSE2($cc $cflags $cppflags,, continue)
+              GMP_OS_X86_XMM($cc $cflags $cppflags,, continue)
+            fi
+            ;;
+          -no-cpp-precomp)
+            # special check, avoiding a warning
+            GMP_GCC_NO_CPP_PRECOMP($ccbase,$cc,$cflags,
+                                   [cflags="$cflags $flag"
+                                   break],
+                                   [continue])
+            ;;
+          -Wa,-m*)
+            case $host in
+              alpha*-*-*)
+                GMP_GCC_WA_MCPU($cc $cflags, $flag, , [continue])
+              ;;
+            esac
+            ;;
+          -Wa,-oldas)
+            GMP_GCC_WA_OLDAS($cc $cflags $cppflags,
+                             [cflags="$cflags $flag"
+                             break],
+                             [continue])
+            ;;
+        esac
+
+        GMP_PROG_CC_WORKS($cc $cflags $cppflags $flag,
+          [cflags="$cflags $flag"
+          break])
+      done
+    done
+  fi
+
+  ABI="$abi"
+  CC="$cc"
+  CFLAGS="$cflags"
+  CPPFLAGS="$cppflags"
+  eval GMP_NONSTD_ABI=\"\$GMP_NONSTD_ABI_$ABI\"
+
+  # Could easily have this in config.h too, if desired.
+  ABI_nodots=`echo $ABI | sed 's/\./_/'`
+  GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_ABI_$ABI_nodots')", POST)
+
+
+  # GMP_LDFLAGS substitution, selected according to ABI.
+  # These are needed on libgmp.la and libmp.la, but currently not on
+  # convenience libraries like tune/libspeed.la or mpz/libmpz.la.
+  #
+                            eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
+  test -n "$GMP_LDFLAGS" || eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
+  AC_SUBST(GMP_LDFLAGS)
+  AC_SUBST(LIBGMP_LDFLAGS)
+  AC_SUBST(LIBGMPXX_LDFLAGS)
+
+  # extra_functions, selected according to ABI
+                    eval tmp=\"\$extra_functions$abi1\"
+  test -n "$tmp" || eval tmp=\"\$extra_functions$abi2\"
+  extra_functions="$tmp"
+
+
+  # Cycle counter, selected according to ABI.
+  #
+                    eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi1\"
+  test -n "$tmp" || eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi2\"
+  SPEED_CYCLECOUNTER_OBJ="$tmp"
+                    eval tmp=\"\$cyclecounter_size$abi1\"
+  test -n "$tmp" || eval tmp=\"\$cyclecounter_size$abi2\"
+  cyclecounter_size="$tmp"
+
+  if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
+    AC_DEFINE_UNQUOTED(HAVE_SPEED_CYCLECOUNTER, $cyclecounter_size,
+    [Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits)])
+  fi
+  AC_SUBST(SPEED_CYCLECOUNTER_OBJ)
+
+
+  # Calling conventions checking, selected according to ABI.
+  #
+                    eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi1\"
+  test -n "$tmp" || eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi2\"
+  if test "$enable_assembly" = "yes"; then
+     CALLING_CONVENTIONS_OBJS="$tmp"
+  else
+     CALLING_CONVENTIONS_OBJS=""
+  fi
+
+  if test -n "$CALLING_CONVENTIONS_OBJS"; then
+    AC_DEFINE(HAVE_CALLING_CONVENTIONS,1,
+    [Define to 1 if tests/libtests has calling conventions checking for the CPU])
+  fi
+  AC_SUBST(CALLING_CONVENTIONS_OBJS)
+
+fi
+
+
+# If the user gave an MPN_PATH, use that verbatim, otherwise choose
+# according to the ABI and add "generic".
+#
+if test -n "$MPN_PATH"; then
+  path="$MPN_PATH"
+else
+                    eval tmp=\"\$path$abi1\"
+  test -n "$tmp" || eval tmp=\"\$path$abi2\"
+  path="$tmp generic"
+fi
+
+
+# Long long limb setup for gmp.h.
+case $limb_chosen in
+longlong) DEFN_LONG_LONG_LIMB="#define _LONG_LONG_LIMB 1"    ;;
+*)        DEFN_LONG_LONG_LIMB="/* #undef _LONG_LONG_LIMB */" ;;
+esac
+AC_SUBST(DEFN_LONG_LONG_LIMB)
+
+
+# The C compiler and preprocessor, put into ANSI mode if possible.
+AC_PROG_CC
+AC_PROG_CC_STDC
+AC_PROG_CPP
+
+
+# The C compiler on the build system, and associated tests.
+GMP_PROG_CC_FOR_BUILD
+GMP_PROG_CPP_FOR_BUILD
+GMP_PROG_EXEEXT_FOR_BUILD
+GMP_C_FOR_BUILD_ANSI
+GMP_CHECK_LIBM_FOR_BUILD
+
+
+# How to assemble, used with CFLAGS etc, see mpn/Makeasm.am.
+# Using the compiler is a lot easier than figuring out how to invoke the
+# assembler directly.
+#
+test -n "$CCAS" || CCAS="$CC -c"
+AC_SUBST(CCAS)
+
+
+# The C++ compiler, if desired.
+want_cxx=no
+if test $enable_cxx != no; then
+  test_CXXFLAGS=${CXXFLAGS+set}
+  AC_PROG_CXX
+
+  echo "CXXFLAGS chosen by autoconf: $CXXFLAGS" >&AC_FD_CC
+  cxxflags_ac_prog_cxx=$CXXFLAGS
+  cxxflags_list=ac_prog_cxx
+
+  # If the user didn't specify $CXXFLAGS, then try $CFLAGS, with -g removed
+  # if AC_PROG_CXX thinks that doesn't work.  $CFLAGS stands a good chance
+  # of working, eg. on a GNU system where CC=gcc and CXX=g++.
+  #
+  if test "$test_CXXFLAGS" != set; then
+    cxxflags_cflags=$CFLAGS
+    cxxflags_list="cflags $cxxflags_list"
+    if test "$ac_prog_cxx_g" = no; then
+      cxxflags_cflags=`echo "$cxxflags_cflags" | sed -e 's/ -g //' -e 's/^-g //' -e 's/ -g$//'`
+    fi
+  fi
+
+  # See if the C++ compiler works.  If the user specified CXXFLAGS then all
+  # we're doing is checking whether AC_PROG_CXX succeeded, since it doesn't
+  # give a fatal error, just leaves CXX set to a default g++.  If on the
+  # other hand the user didn't specify CXXFLAGS then we get to try here our
+  # $cxxflags_list alternatives.
+  #
+  # Automake includes $CPPFLAGS in a C++ compile, so we do the same here.
+  #
+  for cxxflags_choice in $cxxflags_list; do
+    eval CXXFLAGS=\"\$cxxflags_$cxxflags_choice\"
+    GMP_PROG_CXX_WORKS($CXX $CPPFLAGS $CXXFLAGS,
+      [want_cxx=yes
+      break])
+  done
+
+  # If --enable-cxx=yes but a C++ compiler can't be found, then abort.
+  if test $want_cxx = no && test $enable_cxx = yes; then
+    AC_MSG_ERROR([C++ compiler not available, see config.log for details])
+  fi
+fi
+
+AM_CONDITIONAL(WANT_CXX, test $want_cxx = yes)
+
+# FIXME: We're not interested in CXXCPP for ourselves, but if we don't do it
+# here then AC_PROG_LIBTOOL will AC_REQUIRE it (via _LT_AC_TAGCONFIG) and
+# hence execute it unconditionally, and that will fail if there's no C++
+# compiler (and no generic /lib/cpp).
+#
+if test $want_cxx = yes; then
+  AC_PROG_CXXCPP
+fi
+
+
+# Path setups for Cray, according to IEEE or CFP.  These must come after
+# deciding the compiler.
+#
+GMP_CRAY_OPTIONS(
+  [add_path="cray/ieee"],
+  [add_path="cray/cfp"; extra_functions="mulwwc90"],
+  [add_path="cray/cfp"; extra_functions="mulwwj90"])
+
+
+if test -z "$MPN_PATH"; then
+  path="$add_path $path"
+fi
+
+# For a nail build, also look in "nails" subdirectories.
+#
+if test $GMP_NAIL_BITS != 0 && test -z "$MPN_PATH"; then
+  new_path=
+  for i in $path; do
+    case $i in
+    generic) new_path="$new_path $i" ;;
+    *)       new_path="$new_path $i/nails $i" ;;
+    esac
+  done
+  path=$new_path
+fi
+
+
+# Put all directories into CPUVEC_list so as to get a full set of
+# CPUVEC_SETUP_$tmp_suffix defines into config.h, even if some of them are
+# empty because mmx and/or sse2 had to be dropped.
+#
+for i in $fat_path; do
+  GMP_FAT_SUFFIX(tmp_suffix, $i)
+  CPUVEC_list="$CPUVEC_list CPUVEC_SETUP_$tmp_suffix"
+done
+
+
+# If there's any sse2 or mmx in the path, check whether the assembler
+# supports it, and remove if not.
+#
+# We only need this in ABI=32, for ABI=64 on x86_64 we can assume a new
+# enough assembler.
+#
+case $host in
+  X86_PATTERN | X86_64_PATTERN)
+    if test "$ABI" = 32; then
+      case "$path $fat_path" in
+        *mmx*)   GMP_ASM_X86_MMX( , [GMP_STRIP_PATH(*mmx*)]) ;;
+      esac
+      case "$path $fat_path" in
+        *sse2*)  GMP_ASM_X86_SSE2( , [GMP_STRIP_PATH(sse2)]) ;;
+      esac
+    fi
+    ;;
+esac
+
+
+if test "$enable_assembly" = "no"; then
+  path="generic"
+  CFLAGS="$CFLAGS -DNO_ASM"
+#  for abi in $abilist; do
+#    eval unset "path_\$abi"
+#    eval gcc_${abi}_cflags=\"\$gcc_${abi}_cflags -DNO_ASM\"
+#  done
+fi
+
+
+cat >&AC_FD_CC <<EOF
+Decided:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS
+CPPFLAGS=$CPPFLAGS
+GMP_LDFLAGS=$GMP_LDFLAGS
+CXX=$CXX
+CXXFLAGS=$CXXFLAGS
+path=$path
+EOF
+echo "using ABI=\"$ABI\""
+echo "      CC=\"$CC\""
+echo "      CFLAGS=\"$CFLAGS\""
+echo "      CPPFLAGS=\"$CPPFLAGS\""
+if test $want_cxx = yes; then
+  echo "      CXX=\"$CXX\""
+  echo "      CXXFLAGS=\"$CXXFLAGS\""
+fi
+echo "      MPN_PATH=\"$path\""
+
+
+CL_AS_NOEXECSTACK
+
+GMP_PROG_AR
+GMP_PROG_NM
+
+case $host in
+  # FIXME: On AIX 3 and 4, $libname.a is included in libtool
+  # $library_names_spec, so libgmp.a becomes a symlink to libgmp.so, making
+  # it impossible to build shared and static libraries simultaneously.
+  # Disable shared libraries by default, but let the user override with
+  # --enable-shared --disable-static.
+  #
+  # FIXME: This $libname.a problem looks like it might apply to *-*-amigaos*
+  # and *-*-os2* too, but wait for someone to test this before worrying
+  # about it.  If there is a problem then of course libtool is the right
+  # place to fix it.
+  #
+  [*-*-aix[34]*])
+    if test -z "$enable_shared"; then enable_shared=no; fi ;;
+esac
+
+
+# Configs for Windows DLLs.
+
+AC_LIBTOOL_WIN32_DLL
+
+AC_SUBST(LIBGMP_DLL,0)
+case $host in
+  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+    # By default, build only static.
+    if test -z "$enable_shared"; then
+      enable_shared=no
+    fi
+    # Don't allow both static and DLL.
+    if test "$enable_shared" != no && test "$enable_static" != no; then
+      AC_MSG_ERROR([cannot build both static and DLL, since gmp.h is different for each.
+Use "--disable-static --enable-shared" to build just a DLL.])
+    fi
+
+    # "-no-undefined" is required when building a DLL, see documentation on
+    # AC_LIBTOOL_WIN32_DLL.
+    #
+    # "-Wl,--export-all-symbols" is a bit of a hack, it gets all libgmp and
+    # libgmpxx functions and variables exported.  This is what libtool did
+    # in the past, and it's convenient for us in the test programs.
+    #
+    # Maybe it'd be prudent to check for --export-all-symbols before using
+    # it, but it seems to have been in ld since at least 2000, and there's
+    # not really any alternative we want to take up at the moment.
+    #
+    # "-Wl,output-def" is used to get a .def file for use by MS lib to make
+    # a .lib import library, described in the manual.  libgmp-3.dll.def
+    # corresponds to the libmp-3.dll.def generated by libtool (as a result
+    # of -export-symbols on that library).
+    #
+    # Incidentally, libtool does generate an import library libgmp.dll.a,
+    # but it's "ar" format and cannot be used by the MS linker.  There
+    # doesn't seem to be any GNU tool for generating or converting to .lib.
+    #
+    # FIXME: The .def files produced by -Wl,output-def include isascii,
+    # iscsym, iscsymf and toascii, apparently because mingw ctype.h doesn't
+    # inline isascii (used in gmp).  It gives an extern inline for
+    # __isascii, but for some reason not the plain isascii.
+    #
+    if test "$enable_shared" = yes; then
+      GMP_LDFLAGS="$GMP_LDFLAGS -no-undefined -Wl,--export-all-symbols"
+      LIBGMP_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmp-3.dll.def"
+      LIBGMPXX_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmpxx-3.dll.def"
+      LIBGMP_DLL=1
+    fi
+    ;;
+esac
+
+
+# Ensure that $CONFIG_SHELL is available for AC_LIBTOOL_SYS_MAX_CMD_LEN.
+# It's often set already by _LT_AC_PROG_ECHO_BACKSLASH or
+# _AS_LINENO_PREPARE, but not always.
+#
+# The symptom of CONFIG_SHELL unset is some "expr" errors during the test,
+# and an empty result.  This only happens when invoked as "sh configure",
+# ie. no path, and can be seen for instance on ia64-*-hpux*.
+#
+# FIXME: Newer libtool should have it's own fix for this.
+#
+if test -z "$CONFIG_SHELL"; then
+  CONFIG_SHELL=$SHELL
+fi
+
+# Enable CXX in libtool only if we want it, and never enable GCJ, nor RC on
+# mingw and cygwin.  Under --disable-cxx this avoids some error messages
+# from libtool arising from the fact we didn't actually run AC_PROG_CXX.
+# Notice that any user-supplied --with-tags setting takes precedence.
+#
+# FIXME: Is this the right way to get this effect?  Very possibly not, but
+# the current _LT_AC_TAGCONFIG doesn't really suggest an alternative.
+#
+if test "${with_tags+set}" != set; then
+  if test $want_cxx = yes; then
+    with_tags=CXX
+  else
+    with_tags=
+  fi
+fi
+
+# The dead hand of AC_REQUIRE makes AC_PROG_LIBTOOL expand and execute
+# AC_PROG_F77, even when F77 is not in the selected with_tags.  This is
+# probably harmless, but it's unsightly and bloats our configure, so pretend
+# AC_PROG_F77 has been expanded already.
+#
+# FIXME: Rumour has it libtool will one day provide a way for a configure.in
+# to say what it wants from among supported languages etc.
+#
+AC_PROVIDE([AC_PROG_F77])
+
+AC_PROG_LIBTOOL
+
+# Generate an error here if attempting to build both shared and static when
+# $libname.a is in $library_names_spec (as mentioned above), rather than
+# wait for ar or ld to fail.
+#
+if test "$enable_shared" = yes && test "$enable_static" = yes; then
+  case $library_names_spec in
+    *libname.a*)
+      AC_MSG_ERROR([cannot create both shared and static libraries on this system, --disable one of the two])
+      ;;
+  esac
+fi
+
+AM_CONDITIONAL(ENABLE_STATIC, test "$enable_static" = yes)
+
+
+# Many of these library and header checks are for the benefit of
+# supplementary programs.  libgmp doesn't use anything too weird.
+
+AC_HEADER_STDC
+AC_HEADER_TIME
+
+# Reasons for testing:
+#   float.h - not in SunOS bundled cc
+#   invent.h - IRIX specific
+#   langinfo.h - X/Open standard only, not in djgpp for instance
+#   locale.h - old systems won't have this
+#   nl_types.h - X/Open standard only, not in djgpp for instance
+#       (usually langinfo.h gives nl_item etc, but not on netbsd 1.4.1)
+#   sys/attributes.h - IRIX specific
+#   sys/iograph.h - IRIX specific
+#   sys/mman.h - not in Cray Unicos
+#   sys/param.h - not in mingw
+#   sys/processor.h - solaris specific, though also present in macos
+#   sys/pstat.h - HPUX specific
+#   sys/resource.h - not in mingw
+#   sys/sysctl.h - not in mingw
+#   sys/sysinfo.h - OSF specific
+#   sys/syssgi.h - IRIX specific
+#   sys/systemcfg.h - AIX specific
+#   sys/time.h - autoconf suggests testing, don't know anywhere without it
+#   sys/times.h - not in mingw
+#   machine/hal_sysinfo.h - OSF specific
+#
+# inttypes.h, stdint.h, unistd.h and sys/types.h are already in the autoconf
+# default tests
+#
+AC_CHECK_HEADERS(fcntl.h float.h invent.h langinfo.h locale.h nl_types.h sys/attributes.h sys/iograph.h sys/mman.h sys/param.h sys/processor.h sys/pstat.h sys/sysinfo.h sys/syssgi.h sys/systemcfg.h sys/time.h sys/times.h)
+
+# On SunOS, sys/resource.h needs sys/time.h (for struct timeval)
+AC_CHECK_HEADERS(sys/resource.h,,,
+[#if TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif])
+
+# On NetBSD and OpenBSD, sys/sysctl.h needs sys/param.h for various constants
+AC_CHECK_HEADERS(sys/sysctl.h,,,
+[#if HAVE_SYS_PARAM_H
+# include <sys/param.h>
+#endif])
+
+# On OSF 4.0, <machine/hal_sysinfo.h> must have <sys/sysinfo.h> for ulong_t
+AC_CHECK_HEADERS(machine/hal_sysinfo.h,,,
+[#if HAVE_SYS_SYSINFO_H
+# include <sys/sysinfo.h>
+#endif])
+
+# Reasons for testing:
+#   optarg - not declared in mingw
+#   fgetc, fscanf, ungetc, vfprintf - not declared in SunOS 4
+#   sys_errlist, sys_nerr - not declared in SunOS 4
+#
+# optarg should be in unistd.h and the rest in stdio.h, both of which are
+# in the autoconf default includes.
+#
+# sys_errlist and sys_nerr are supposed to be in <errno.h> on SunOS according
+# to the man page (but aren't), in glibc they're in stdio.h.
+#
+AC_CHECK_DECLS([fgetc, fscanf, optarg, ungetc, vfprintf])
+AC_CHECK_DECLS([sys_errlist, sys_nerr], , ,
+[#include <stdio.h>
+#include <errno.h>])
+
+AC_TYPE_SIGNAL
+
+# Reasons for testing:
+#   intmax_t       - C99
+#   long double    - not in the HP bundled K&R cc
+#   long long      - only in reasonably recent compilers
+#   ptrdiff_t      - seems to be everywhere, maybe don't need to check this
+#   quad_t         - BSD specific
+#   uint_least32_t - C99
+#
+# the default includes are sufficient for all these types
+#
+AC_CHECK_TYPES([intmax_t, long double, long long, ptrdiff_t, quad_t,
+               uint_least32_t, intptr_t])
+
+AC_C_STRINGIZE
+
+# FIXME: Really want #ifndef __cplusplus around the #define volatile
+# replacement autoconf gives, since volatile is always available in C++.
+# But we don't use it in C++ currently.
+AC_C_VOLATILE
+
+AC_C_RESTRICT
+
+GMP_C_STDARG
+GMP_C_ATTRIBUTE_CONST
+GMP_C_ATTRIBUTE_MALLOC
+GMP_C_ATTRIBUTE_MODE
+GMP_C_ATTRIBUTE_NORETURN
+
+GMP_H_EXTERN_INLINE
+
+# from libtool
+AC_CHECK_LIBM
+AC_SUBST(LIBM)
+
+GMP_FUNC_ALLOCA
+GMP_OPTION_ALLOCA
+
+GMP_H_HAVE_FILE
+
+AC_C_BIGENDIAN(
+  [AC_DEFINE(HAVE_LIMB_BIG_ENDIAN, 1)
+   GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_BIG_ENDIAN')", POST)],
+  [AC_DEFINE(HAVE_LIMB_LITTLE_ENDIAN, 1)
+   GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_LITTLE_ENDIAN')", POST)
+  ], [:])
+AH_VERBATIM([HAVE_LIMB],
+[/* Define one of these to 1 for the endianness of `mp_limb_t'.
+   If the endianness is not a simple big or little, or you don't know what
+   it is, then leave both undefined. */
+#undef HAVE_LIMB_BIG_ENDIAN
+#undef HAVE_LIMB_LITTLE_ENDIAN])
+
+GMP_C_DOUBLE_FORMAT
+
+
+# Reasons for testing:
+#   alarm - not in mingw
+#   attr_get - IRIX specific
+#   clock_gettime - not in glibc 2.2.4, only very recent systems
+#   cputime - not in glibc
+#   getsysinfo - OSF specific
+#   getrusage - not in mingw
+#   gettimeofday - not in mingw
+#   mmap - not in mingw, djgpp
+#   nl_langinfo - X/Open standard only, not in djgpp for instance
+#   obstack_vprintf - glibc specific
+#   processor_info - solaris specific
+#   pstat_getprocessor - HPUX specific (10.x and up)
+#   raise - an ANSI-ism, though probably almost universal by now
+#   read_real_time - AIX specific
+#   sigaction - not in mingw
+#   sigaltstack - not in mingw, or old AIX (reputedly)
+#   sigstack - not in mingw
+#   strerror - not in SunOS
+#   strnlen - glibc extension (some other systems too)
+#   syssgi - IRIX specific
+#   times - not in mingw
+#
+# AC_FUNC_STRNLEN is not used because we don't want the AC_LIBOBJ
+# replacement setups it gives.  It detects a faulty strnlen on AIX, but
+# missing out on that test is ok since our only use of strnlen is in
+# __gmp_replacement_vsnprintf which is not required on AIX since it has a
+# vsnprintf.
+#
+AC_CHECK_FUNCS(alarm attr_get clock cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times)
+
+# clock_gettime is in librt on *-*-osf5.1 and on glibc, so att -lrt to
+# TUNE_LIBS if needed. On linux (tested on x86_32, 2.6.26),
+# clock_getres reports ns accuracy, while in a quick test on osf
+# clock_getres said only 1 millisecond.
+
+old_LIBS="$LIBS"
+AC_SEARCH_LIBS(clock_gettime, rt, [
+  AC_DEFINE([HAVE_CLOCK_GETTIME],1,[Define to 1 if you have the `clock_gettime' function])])
+TUNE_LIBS="$LIBS"
+LIBS="$old_LIBS"
+
+AC_SUBST(TUNE_LIBS)
+
+GMP_FUNC_VSNPRINTF
+GMP_FUNC_SSCANF_WRITABLE_INPUT
+
+# Reasons for checking:
+#   pst_processor psp_iticksperclktick - not in hpux 9
+#
+AC_CHECK_MEMBER(struct pst_processor.psp_iticksperclktick,
+                [AC_DEFINE(HAVE_PSP_ITICKSPERCLKTICK, 1,
+[Define to 1 if <sys/pstat.h> `struct pst_processor' exists
+and contains `psp_iticksperclktick'.])],,
+                [#include <sys/pstat.h>])
+
+# C++ tests, when required
+#
+if test $enable_cxx = yes; then
+  AC_LANG_PUSH(C++)
+
+  # Reasons for testing:
+  #   <sstream> - not in g++ 2.95.2
+  #   std::locale - not in g++ 2.95.4
+  #
+  AC_CHECK_HEADERS([sstream])
+  AC_CHECK_TYPES([std::locale],,,[#include <locale>])
+
+  AC_LANG_POP(C++)
+fi
+
+
+# Pick the correct source files in $path and link them to mpn/.
+# $gmp_mpn_functions lists all functions we need.
+#
+# The rule is to find a file with the function name and a .asm, .S,
+# .s, or .c extension.  Certain multi-function files with special names
+# can provide some functions too.  (mpn/Makefile.am passes
+# -DOPERATION_<func> to get them to generate the right code.)
+
+# Note: $gmp_mpn_functions must have mod_1 before pre_mod_1 so the former
+#       can optionally provide the latter as an extra entrypoint.  Likewise
+#       divrem_1 and pre_divrem_1.
+
+gmp_mpn_functions_optional="umul udiv                                  \
+  invert_limb sqr_diagonal sqr_diag_addlsh1                            \
+  mul_2 mul_3 mul_4 mul_5 mul_6                                                \
+  addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8       \
+  addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n                    \
+  addlsh2_n sublsh2_n rsblsh2_n                                                \
+  addlsh_n sublsh_n rsblsh_n                                           \
+  add_n_sub_n addaddmul_1msb0"
+
+gmp_mpn_functions="$extra_functions                                       \
+  add add_1 add_n sub sub_1 sub_n addcnd_n subcnd_n neg com               \
+  mul_1 addmul_1 submul_1                                                 \
+  add_err1_n add_err2_n add_err3_n sub_err1_n sub_err2_n sub_err3_n       \
+  lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2             \
+  fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump            \
+  mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc                                 \
+  mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul          \
+  mulmid_basecase toom42_mulmid mulmid_n mulmid                                   \
+  random random2 pow_1                                                    \
+  rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp        \
+  perfsqr perfpow                                                         \
+  gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step                               \
+  gcdext_lehmer                                                                   \
+  div_q tdiv_qr jacbase jacobi_2 jacobi get_d                             \
+  matrix22_mul matrix22_mul1_inverse_vector                               \
+  hgcd_matrix hgcd2 hgcd_step hgcd_reduce hgcd hgcd_appr                  \
+  hgcd2_jacobi hgcd_jacobi                                                \
+  mullo_n mullo_basecase                                                  \
+  toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul                  \
+  toom33_mul toom43_mul toom53_mul toom54_mul toom63_mul                  \
+  toom44_mul                                                              \
+  toom6h_mul toom6_sqr toom8h_mul toom8_sqr                               \
+  toom_couple_handling                                                    \
+  toom2_sqr toom3_sqr toom4_sqr                                                   \
+  toom_eval_dgr3_pm1 toom_eval_dgr3_pm2                                           \
+  toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp          \
+  toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts       \
+  toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts     \
+  invertappr invert binvert mulmod_bnm1 sqrmod_bnm1                       \
+  div_qr_2 div_qr_2n_pi1 div_qr_2u_pi1                                    \
+  sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q                                \
+  dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q                                \
+  mu_div_qr mu_divappr_q mu_div_q                                         \
+  bdiv_q_1                                                                \
+  sbpi1_bdiv_q sbpi1_bdiv_qr                                              \
+  dcpi1_bdiv_q dcpi1_bdiv_qr                                              \
+  mu_bdiv_q mu_bdiv_qr                                                    \
+  bdiv_q bdiv_qr broot brootinv bsqrt bsqrtinv                            \
+  divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec            \
+  sb_div_qr_sec sb_div_r_sec sbpi1_div_qr_sec sbpi1_div_r_sec             \
+  trialdiv remove                                                         \
+  and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n                    \
+  copyi copyd zero tabselect                                              \
+  comb_tables                                                             \
+  $gmp_mpn_functions_optional"
+
+define(GMP_MULFUNC_CHOICES,
+[# functions that can be provided by multi-function files
+tmp_mulfunc=
+case $tmp_fn in
+  add_n|sub_n)       tmp_mulfunc="aors_n"    ;;
+  add_err1_n|sub_err1_n)
+                    tmp_mulfunc="aors_err1_n" ;;
+  add_err2_n|sub_err2_n)
+                    tmp_mulfunc="aors_err2_n" ;;
+  add_err3_n|sub_err3_n)
+                    tmp_mulfunc="aors_err3_n" ;;
+  addcnd_n|subcnd_n) tmp_mulfunc="aorscnd_n"   ;;
+  addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
+  popcount|hamdist)  tmp_mulfunc="popham"    ;;
+  and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
+                     tmp_mulfunc="logops_n"  ;;
+  lshift|rshift)     tmp_mulfunc="lorrshift";;
+  addlsh1_n)
+                    tmp_mulfunc="aorslsh1_n aorrlsh1_n";;
+  sublsh1_n)
+                    tmp_mulfunc="aorslsh1_n sorrlsh1_n";;
+  rsblsh1_n)
+                    tmp_mulfunc="aorrlsh1_n sorrlsh1_n";;
+  addlsh2_n)
+                    tmp_mulfunc="aorslsh2_n aorrlsh2_n";;
+  sublsh2_n)
+                    tmp_mulfunc="aorslsh2_n sorrlsh2_n";;
+  rsblsh2_n)
+                    tmp_mulfunc="aorrlsh2_n sorrlsh2_n";;
+  addlsh_n)
+                    tmp_mulfunc="aorslsh_n aorrlsh_n";;
+  sublsh_n)
+                    tmp_mulfunc="aorslsh_n sorrlsh_n";;
+  rsblsh_n)
+                    tmp_mulfunc="aorrlsh_n sorrlsh_n";;
+  rsh1add_n|rsh1sub_n)
+                    tmp_mulfunc="rsh1aors_n";;
+  sb_div_qr_sec|sb_div_r_sec)
+                    tmp_mulfunc="sb_div_sec";;
+  sbpi1_div_qr_sec|sbpi1_div_r_sec)
+                    tmp_mulfunc="sbpi1_div_sec";;
+esac
+])
+
+# the list of all object files used by mpn/Makefile.in and the
+# top-level Makefile.in, respectively
+mpn_objects=
+mpn_objs_in_libgmp=
+
+# links from the sources, to be removed by "make distclean"
+gmp_srclinks=
+
+
+# mpn_relative_top_srcdir is $top_srcdir, but for use from within the mpn
+# build directory.  If $srcdir is relative then we use a relative path too,
+# so the two trees can be moved together.
+case $srcdir in
+  [[\\/]* | ?:[\\/]*])  # absolute, as per autoconf
+    mpn_relative_top_srcdir=$srcdir ;;
+  *)                    # relative
+    mpn_relative_top_srcdir=../$srcdir ;;
+esac
+
+
+define(MPN_SUFFIXES,[asm S s c])
+
+dnl  Usage: GMP_FILE_TO_FUNCTION_BASE(func,file)
+dnl
+dnl  Set $func to the function base name for $file, eg. dive_1 gives
+dnl  divexact_1.
+dnl
+define(GMP_FILE_TO_FUNCTION,
+[case $$2 in
+  dive_1)      $1=divexact_1 ;;
+  diveby3)     $1=divexact_by3c ;;
+  pre_divrem_1) $1=preinv_divrem_1 ;;
+  mode1o)      $1=modexact_1c_odd ;;
+  pre_mod_1)   $1=preinv_mod_1 ;;
+  mod_1_1)     $1=mod_1_1p ;;
+  mod_1_1_cps) $1=mod_1_1p_cps ;;
+  mod_1_2)     $1=mod_1s_2p ;;
+  mod_1_2_cps) $1=mod_1s_2p_cps ;;
+  mod_1_3)     $1=mod_1s_3p ;;
+  mod_1_3_cps) $1=mod_1s_3p_cps ;;
+  mod_1_4)     $1=mod_1s_4p ;;
+  mod_1_4_cps) $1=mod_1s_4p_cps ;;
+  *)           $1=$$2 ;;
+esac
+])
+
+# Fat binary setups.
+#
+# We proceed through each $fat_path directory, and look for $fat_function
+# routines there.  Those found are incorporated in the build by generating a
+# little mpn/<foo>.asm or mpn/<foo>.c file in the build directory, with
+# suitable function renaming, and adding that to $mpn_objects (the same as a
+# normal mpn file).
+#
+# fat.h is generated with macros to let internal calls to each $fat_function
+# go directly through __gmpn_cpuvec, plus macros and declarations helping to
+# setup that structure, on a per-directory basis ready for
+# mpn/<cpu>/fat/fat.c.
+#
+# fat.h includes thresholds listed in $fat_thresholds, extracted from
+# gmp-mparam.h in each directory.  An overall maximum for each threshold is
+# established, for use in making fixed size arrays of temporary space.
+# (Eg. MUL_TOOM33_THRESHOLD_LIMIT used by mpn/generic/mul.c.)
+#
+# It'd be possible to do some of this manually, but when there's more than a
+# few functions and a few directories it becomes very tedious, and very
+# prone to having some routine accidentally omitted.  On that basis it seems
+# best to automate as much as possible, even if the code to do so is a bit
+# ugly.
+#
+
+if test -n "$fat_path"; then
+  # Usually the mpn build directory is created with mpn/Makefile
+  # instantiation, but we want to write to it sooner.
+  mkdir mpn 2>/dev/null
+
+  echo "/* fat.h - setups for fat binaries." >fat.h
+  echo "   Generated by configure - DO NOT EDIT.  */" >>fat.h
+
+  AC_DEFINE(WANT_FAT_BINARY, 1, [Define to 1 when building a fat binary.])
+  GMP_DEFINE(WANT_FAT_BINARY, yes)
+
+  # Don't want normal copies of fat functions
+  for tmp_fn in $fat_functions; do
+    GMP_REMOVE_FROM_LIST(gmp_mpn_functions, $tmp_fn)
+    GMP_REMOVE_FROM_LIST(gmp_mpn_functions_optional, $tmp_fn)
+  done
+
+  for tmp_fn in $fat_functions; do
+    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+    echo "
+#ifndef OPERATION_$tmp_fn
+#undef  mpn_$tmp_fbase
+#define mpn_$tmp_fbase  (*__gmpn_cpuvec.$tmp_fbase)
+#endif
+DECL_$tmp_fbase (__MPN(${tmp_fbase}_init));" >>fat.h
+    # encourage various macros to use fat functions
+    AC_DEFINE_UNQUOTED(HAVE_NATIVE_mpn_$tmp_fbase)
+  done
+
+  echo "" >>fat.h
+  echo "/* variable thresholds */" >>fat.h
+  for tmp_tn in $fat_thresholds; do
+    echo "#undef  $tmp_tn" >>fat.h
+    echo "#define $tmp_tn  CPUVEC_THRESHOLD (`echo $tmp_tn | tr [A-Z] [a-z]`)" >>fat.h
+  done
+
+  echo "
+/* Copy all fields into __gmpn_cpuvec.
+   memcpy is not used because it might operate byte-wise (depending on its
+   implementation), and we need the function pointer writes to be atomic.
+   "volatile" discourages the compiler from trying to optimize this.  */
+#define CPUVEC_INSTALL(vec) \\
+  do { \\
+    volatile struct cpuvec_t *p = &__gmpn_cpuvec; \\" >>fat.h
+  for tmp_fn in $fat_functions; do
+    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+    echo "    p->$tmp_fbase = vec.$tmp_fbase; \\" >>fat.h
+  done
+  for tmp_tn in $fat_thresholds; do
+    tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
+    echo "    p->$tmp_field_name = vec.$tmp_field_name; \\" >>fat.h
+  done
+  echo "  } while (0)" >>fat.h
+
+  echo "
+/* A helper to check all fields are filled. */
+#define ASSERT_CPUVEC(vec) \\
+  do { \\" >>fat.h
+  for tmp_fn in $fat_functions; do
+    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+    echo "    ASSERT (vec.$tmp_fbase != NULL); \\" >>fat.h
+  done
+  for tmp_tn in $fat_thresholds; do
+    tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
+    echo "    ASSERT (vec.$tmp_field_name != 0); \\" >>fat.h
+  done
+  echo "  } while (0)" >>fat.h
+
+  echo "
+/* Call ITERATE(field) for each fat threshold field. */
+#define ITERATE_FAT_THRESHOLDS() \\
+  do { \\" >>fat.h
+  for tmp_tn in $fat_thresholds; do
+    tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
+    echo "    ITERATE ($tmp_tn, $tmp_field_name); \\" >>fat.h
+  done
+  echo "  } while (0)" >>fat.h
+
+  for tmp_dir in $fat_path; do
+    CPUVEC_SETUP=
+    THRESH_ASM_SETUP=
+    echo "" >>fat.h
+    GMP_FAT_SUFFIX(tmp_suffix, $tmp_dir)
+
+    # In order to keep names unique on a DOS 8.3 filesystem, use a prefix
+    # (rather than a suffix) for the generated file names, and abbreviate.
+    case $tmp_suffix in
+      pentium)       tmp_prefix=p   ;;
+      pentium_mmx)   tmp_prefix=pm  ;;
+      p6_mmx)        tmp_prefix=p2  ;;
+      p6_p3mmx)      tmp_prefix=p3  ;;
+      pentium4)      tmp_prefix=p4  ;;
+      pentium4_mmx)  tmp_prefix=p4m ;;
+      pentium4_sse2) tmp_prefix=p4s ;;
+      k6_mmx)        tmp_prefix=k6m ;;
+      k6_k62mmx)     tmp_prefix=k62 ;;
+      k7_mmx)        tmp_prefix=k7m ;;
+      *)             tmp_prefix=$tmp_suffix ;;
+    esac
+
+    # Extract desired thresholds from gmp-mparam.h file in this directory,
+    # if present.
+    tmp_mparam=$srcdir/mpn/$tmp_dir/gmp-mparam.h
+    if test -f $tmp_mparam; then
+      for tmp_tn in $fat_thresholds; do
+        tmp_thresh=`sed -n "s/^#define $tmp_tn[        ]*\\([0-9][0-9]*\\).*$/\\1/p" $tmp_mparam`
+        if test -n "$tmp_thresh"; then
+          THRESH_ASM_SETUP=["${THRESH_ASM_SETUP}define($tmp_tn,$tmp_thresh)
+"]
+          CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.`echo $tmp_tn | tr [[A-Z]] [[a-z]]` = $tmp_thresh; \\
+"
+          eval tmp_limit=\$${tmp_tn}_LIMIT
+          if test -z "$tmp_limit"; then
+            tmp_limit=0
+          fi
+          if test $tmp_thresh -gt $tmp_limit; then
+            eval ${tmp_tn}_LIMIT=$tmp_thresh
+          fi
+        fi
+      done
+    fi
+
+    for tmp_fn in $fat_functions; do
+      GMP_MULFUNC_CHOICES
+
+      for tmp_base in $tmp_fn $tmp_mulfunc; do
+        for tmp_ext in MPN_SUFFIXES; do
+          tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+          if test -f $tmp_file; then
+
+           # If the host uses a non-standard ABI, check if tmp_file supports it
+           #
+           if test -n "$GMP_NONSTD_ABI" && test $tmp_ext != "c"; then
+             abi=[`sed -n 's/^[        ]*ABI_SUPPORT(\(.*\))/\1/p' $tmp_file `]
+             if echo "$abi" | grep -q "\\b${GMP_NONSTD_ABI}\\b"; then
+               true
+             else
+               continue
+             fi
+           fi
+
+            mpn_objects="$mpn_objects ${tmp_prefix}_$tmp_fn.lo"
+            mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_prefix}_$tmp_fn.lo"
+
+            GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+
+            # carry-in variant, eg. divrem_1c or modexact_1c_odd
+            case $tmp_fbase in
+              *_1*) tmp_fbasec=`echo $tmp_fbase | sed 's/_1/_1c/'` ;;
+              *)    tmp_fbasec=${tmp_fbase}c ;;
+            esac
+
+            # Create a little file doing an include from srcdir.  The
+            # OPERATION and renamings aren't all needed all the time, but
+            # they don't hurt if unused.
+            #
+            # FIXME: Should generate these via config.status commands.
+            # Would need them all in one AC_CONFIG_COMMANDS though, since
+            # that macro doesn't accept a set of separate commands generated
+            # by shell code.
+            #
+            case $tmp_ext in
+              asm)
+                # hide the d-n-l from autoconf's error checking
+                tmp_d_n_l=d""nl
+                echo ["$tmp_d_n_l  mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
+$tmp_d_n_l  Generated by configure - DO NOT EDIT.
+
+define(OPERATION_$tmp_fn)
+define(__gmpn_$tmp_fbase, __gmpn_${tmp_fbase}_$tmp_suffix)
+define(__gmpn_$tmp_fbasec,__gmpn_${tmp_fbasec}_${tmp_suffix})
+define(__gmpn_preinv_${tmp_fbase},__gmpn_preinv_${tmp_fbase}_${tmp_suffix})
+define(__gmpn_${tmp_fbase}_cps,__gmpn_${tmp_fbase}_cps_${tmp_suffix})
+
+$tmp_d_n_l  For k6 and k7 gcd_1 calling their corresponding mpn_modexact_1_odd
+ifdef(\`__gmpn_modexact_1_odd',,
+\`define(__gmpn_modexact_1_odd,__gmpn_modexact_1_odd_${tmp_suffix})')
+
+$THRESH_ASM_SETUP
+include][($mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.asm)
+"] >mpn/${tmp_prefix}_$tmp_fn.asm
+                ;;
+              c)
+                echo ["/* mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
+   Generated by configure - DO NOT EDIT. */
+
+#define OPERATION_$tmp_fn 1
+#define __gmpn_$tmp_fbase           __gmpn_${tmp_fbase}_$tmp_suffix
+#define __gmpn_$tmp_fbasec          __gmpn_${tmp_fbasec}_${tmp_suffix}
+#define __gmpn_preinv_${tmp_fbase}  __gmpn_preinv_${tmp_fbase}_${tmp_suffix}
+#define __gmpn_${tmp_fbase}_cps     __gmpn_${tmp_fbase}_cps_${tmp_suffix}
+
+#include \"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.c\"
+"] >mpn/${tmp_prefix}_$tmp_fn.c
+                ;;
+            esac
+
+            # Prototype, and append to CPUVEC_SETUP for this directory.
+            echo "DECL_$tmp_fbase (__gmpn_${tmp_fbase}_$tmp_suffix);" >>fat.h
+            CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.$tmp_fbase = __gmpn_${tmp_fbase}_${tmp_suffix}; \\
+"
+            # Ditto for any preinv variant (preinv_divrem_1, preinv_mod_1).
+            if grep "^PROLOGUE(mpn_preinv_$tmp_fn)" $tmp_file >/dev/null; then
+              echo "DECL_preinv_$tmp_fbase (__gmpn_preinv_${tmp_fbase}_$tmp_suffix);" >>fat.h
+              CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.preinv_$tmp_fbase = __gmpn_preinv_${tmp_fbase}_${tmp_suffix}; \\
+"
+            fi
+
+            # Ditto for any mod_1...cps variant
+            if grep "^PROLOGUE(mpn_${tmp_fbase}_cps)" $tmp_file >/dev/null; then
+              echo "DECL_${tmp_fbase}_cps (__gmpn_${tmp_fbase}_cps_$tmp_suffix);" >>fat.h
+              CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.${tmp_fbase}_cps = __gmpn_${tmp_fbase}_cps_${tmp_suffix}; \\
+"
+            fi
+          fi
+        done
+      done
+    done
+
+    # Emit CPUVEC_SETUP for this directory
+    echo "" >>fat.h
+    echo "#define CPUVEC_SETUP_$tmp_suffix \\" >>fat.h
+    echo "  do { \\" >>fat.h
+    echo "$CPUVEC_SETUP  } while (0)" >>fat.h
+  done
+
+  # Emit threshold limits
+  echo "" >>fat.h
+  for tmp_tn in $fat_thresholds; do
+    eval tmp_limit=\$${tmp_tn}_LIMIT
+    echo "#define ${tmp_tn}_LIMIT  $tmp_limit" >>fat.h
+  done
+fi
+
+
+# Normal binary setups.
+#
+
+for tmp_ext in MPN_SUFFIXES; do
+  eval found_$tmp_ext=no
+done
+
+for tmp_fn in $gmp_mpn_functions; do
+  for tmp_ext in MPN_SUFFIXES; do
+    test "$no_create" = yes || rm -f mpn/$tmp_fn.$tmp_ext
+  done
+
+  # mpn_preinv_divrem_1 might have been provided by divrem_1.asm, likewise
+  # mpn_preinv_mod_1 by mod_1.asm.
+  case $tmp_fn in
+  pre_divrem_1)
+    if test "$HAVE_NATIVE_mpn_preinv_divrem_1" = yes; then continue; fi ;;
+  pre_mod_1)
+    if test "$HAVE_NATIVE_mpn_preinv_mod_1" = yes; then continue; fi ;;
+  esac
+
+  GMP_MULFUNC_CHOICES
+
+  found=no
+  for tmp_dir in $path; do
+    for tmp_base in $tmp_fn $tmp_mulfunc; do
+      for tmp_ext in MPN_SUFFIXES; do
+        tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+        if test -f $tmp_file; then
+
+          # For a nails build, check if the file supports our nail bits.
+          # Generic code always supports all nails.
+          #
+          # FIXME: When a multi-function file is selected to provide one of
+          # the nails-neutral routines, like logops_n for and_n, the
+          # PROLOGUE grepping will create HAVE_NATIVE_mpn_<foo> defines for
+          # all functions in that file, even if they haven't all been
+          # nailified.  Not sure what to do about this, it's only really a
+          # problem for logops_n, and it's not too terrible to insist those
+          # get nailified always.
+          #
+          if test $GMP_NAIL_BITS != 0 && test $tmp_dir != generic; then
+            case $tmp_fn in
+              and_n | ior_n | xor_n | andn_n | \
+              copyi | copyd | \
+              popcount | hamdist | \
+              udiv | udiv_w_sdiv | umul | \
+              cntlz | invert_limb)
+                # these operations are either unaffected by nails or defined
+                # to operate on full limbs
+                ;;
+              *)
+                nails=[`sed -n 's/^[   ]*NAILS_SUPPORT(\(.*\))/\1/p' $tmp_file `]
+                for n in $nails; do
+                  case $n in
+                  *-*)
+                    n_start=`echo "$n" | sed -n 's/\(.*\)-.*/\1/p'`
+                    n_end=`echo "$n" | sed -n 's/.*-\(.*\)/\1/p'`
+                    ;;
+                  *)
+                    n_start=$n
+                    n_end=$n
+                    ;;
+                  esac
+                  if test $GMP_NAIL_BITS -ge $n_start && test $GMP_NAIL_BITS -le $n_end; then
+                    found=yes
+                    break
+                  fi
+                done
+                if test $found != yes; then
+                  continue
+                fi
+                ;;
+            esac
+          fi
+
+         # If the host uses a non-standard ABI, check if tmp_file supports it
+         #
+         if test -n "$GMP_NONSTD_ABI" && test $tmp_ext != "c"; then
+           abi=[`sed -n 's/^[  ]*ABI_SUPPORT(\(.*\))/\1/p' $tmp_file `]
+           if echo "$abi" | grep -q "\\b${GMP_NONSTD_ABI}\\b"; then
+             true
+           else
+             continue
+           fi
+         fi
+
+          found=yes
+          eval found_$tmp_ext=yes
+
+          if test $tmp_ext = c; then
+            tmp_u='$U'
+          else
+            tmp_u=
+          fi
+
+          mpn_objects="$mpn_objects $tmp_fn$tmp_u.lo"
+          mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/$tmp_fn$tmp_u.lo"
+          AC_CONFIG_LINKS(mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext)
+          gmp_srclinks="$gmp_srclinks mpn/$tmp_fn.$tmp_ext"
+
+          # Duplicate AC_DEFINEs are harmless, so it doesn't matter
+          # that multi-function files get grepped here repeatedly.
+          # The PROLOGUE pattern excludes the optional second parameter.
+          gmp_ep=[`
+            sed -n 's/^[       ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
+            sed -n 's/^[       ]*PROLOGUE(\([^,]*\).*)/\1/p' $tmp_file
+          `]
+          for gmp_tmp in $gmp_ep; do
+            AC_DEFINE_UNQUOTED(HAVE_NATIVE_$gmp_tmp)
+            eval HAVE_NATIVE_$gmp_tmp=yes
+          done
+
+          case $tmp_fn in
+          sqr_basecase) sqr_basecase_source=$tmp_file ;;
+          esac
+
+          break
+        fi
+      done
+      if test $found = yes; then break ; fi
+    done
+    if test $found = yes; then break ; fi
+  done
+
+  if test $found = no; then
+    for tmp_optional in $gmp_mpn_functions_optional; do
+      if test $tmp_optional = $tmp_fn; then
+        found=yes
+      fi
+    done
+    if test $found = no; then
+      AC_MSG_ERROR([no version of $tmp_fn found in path: $path])
+    fi
+  fi
+done
+
+# All cycle counters are .asm files currently
+if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
+  found_asm=yes
+fi
+
+dnl  The following list only needs to have templates for those defines which
+dnl  are going to be tested by the code, there's no need to have every
+dnl  possible mpn routine.
+
+AH_VERBATIM([HAVE_NATIVE],
+[/* Define to 1 each of the following for which a native (ie. CPU specific)
+    implementation of the corresponding routine exists.  */
+#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_n_sub_n
+#undef HAVE_NATIVE_mpn_add_nc
+#undef HAVE_NATIVE_mpn_addaddmul_1msb0
+#undef HAVE_NATIVE_mpn_addcnd_n
+#undef HAVE_NATIVE_mpn_addlsh1_n
+#undef HAVE_NATIVE_mpn_addlsh2_n
+#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addlsh1_nc
+#undef HAVE_NATIVE_mpn_addlsh2_nc
+#undef HAVE_NATIVE_mpn_addlsh_nc
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip2
+#undef HAVE_NATIVE_mpn_addmul_1c
+#undef HAVE_NATIVE_mpn_addmul_2
+#undef HAVE_NATIVE_mpn_addmul_3
+#undef HAVE_NATIVE_mpn_addmul_4
+#undef HAVE_NATIVE_mpn_addmul_5
+#undef HAVE_NATIVE_mpn_addmul_6
+#undef HAVE_NATIVE_mpn_addmul_7
+#undef HAVE_NATIVE_mpn_addmul_8
+#undef HAVE_NATIVE_mpn_addmul_2s
+#undef HAVE_NATIVE_mpn_and_n
+#undef HAVE_NATIVE_mpn_andn_n
+#undef HAVE_NATIVE_mpn_bdiv_dbm1c
+#undef HAVE_NATIVE_mpn_bdiv_q_1
+#undef HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#undef HAVE_NATIVE_mpn_com
+#undef HAVE_NATIVE_mpn_copyd
+#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_div_qr_2
+#undef HAVE_NATIVE_mpn_divexact_1
+#undef HAVE_NATIVE_mpn_divexact_by3c
+#undef HAVE_NATIVE_mpn_divrem_1
+#undef HAVE_NATIVE_mpn_divrem_1c
+#undef HAVE_NATIVE_mpn_divrem_2
+#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_hamdist
+#undef HAVE_NATIVE_mpn_invert_limb
+#undef HAVE_NATIVE_mpn_ior_n
+#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
+#undef HAVE_NATIVE_mpn_lshiftc
+#undef HAVE_NATIVE_mpn_lshsub_n
+#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1_1p
+#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mod_1s_2p
+#undef HAVE_NATIVE_mpn_mod_1s_4p
+#undef HAVE_NATIVE_mpn_mod_34lsub1
+#undef HAVE_NATIVE_mpn_modexact_1_odd
+#undef HAVE_NATIVE_mpn_modexact_1c_odd
+#undef HAVE_NATIVE_mpn_mul_1
+#undef HAVE_NATIVE_mpn_mul_1c
+#undef HAVE_NATIVE_mpn_mul_2
+#undef HAVE_NATIVE_mpn_mul_3
+#undef HAVE_NATIVE_mpn_mul_4
+#undef HAVE_NATIVE_mpn_mul_5
+#undef HAVE_NATIVE_mpn_mul_6
+#undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_nand_n
+#undef HAVE_NATIVE_mpn_nior_n
+#undef HAVE_NATIVE_mpn_popcount
+#undef HAVE_NATIVE_mpn_preinv_divrem_1
+#undef HAVE_NATIVE_mpn_preinv_mod_1
+#undef HAVE_NATIVE_mpn_redc_1
+#undef HAVE_NATIVE_mpn_redc_2
+#undef HAVE_NATIVE_mpn_rsblsh1_n
+#undef HAVE_NATIVE_mpn_rsblsh2_n
+#undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsblsh1_nc
+#undef HAVE_NATIVE_mpn_rsblsh2_nc
+#undef HAVE_NATIVE_mpn_rsblsh_nc
+#undef HAVE_NATIVE_mpn_rsh1add_n
+#undef HAVE_NATIVE_mpn_rsh1add_nc
+#undef HAVE_NATIVE_mpn_rsh1sub_n
+#undef HAVE_NATIVE_mpn_rsh1sub_nc
+#undef HAVE_NATIVE_mpn_rshift
+#undef HAVE_NATIVE_mpn_sqr_basecase
+#undef HAVE_NATIVE_mpn_sqr_diagonal
+#undef HAVE_NATIVE_mpn_sqr_diag_addlsh1
+#undef HAVE_NATIVE_mpn_sub_n
+#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_subcnd_n
+#undef HAVE_NATIVE_mpn_sublsh1_n
+#undef HAVE_NATIVE_mpn_sublsh2_n
+#undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_sublsh1_nc
+#undef HAVE_NATIVE_mpn_sublsh2_nc
+#undef HAVE_NATIVE_mpn_sublsh_nc
+#undef HAVE_NATIVE_mpn_sublsh1_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh_nc_ip1
+#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_tabselect
+#undef HAVE_NATIVE_mpn_udiv_qrnnd
+#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
+#undef HAVE_NATIVE_mpn_umul_ppmm
+#undef HAVE_NATIVE_mpn_umul_ppmm_r
+#undef HAVE_NATIVE_mpn_xor_n
+#undef HAVE_NATIVE_mpn_xnor_n])
+
+
+# Don't demand an m4 unless it's actually needed.
+if test $found_asm = yes; then
+  GMP_PROG_M4
+  GMP_M4_M4WRAP_SPURIOUS
+# else
+# It's unclear why this m4-not-needed stuff was ever done.
+#  if test -z "$M4" ; then
+#    M4=m4-not-needed
+#  fi
+fi
+
+# Only do the GMP_ASM checks if there's a .S or .asm wanting them.
+if test $found_asm = no && test $found_S = no; then
+  gmp_asm_syntax_testing=no
+fi
+
+if test "$gmp_asm_syntax_testing" != no; then
+  GMP_ASM_TEXT
+  GMP_ASM_DATA
+  GMP_ASM_LABEL_SUFFIX
+  GMP_ASM_GLOBL
+  GMP_ASM_GLOBL_ATTR
+  GMP_ASM_UNDERSCORE
+  GMP_ASM_RODATA
+  GMP_ASM_TYPE
+  GMP_ASM_SIZE
+  GMP_ASM_LSYM_PREFIX
+  GMP_ASM_W32
+  GMP_ASM_ALIGN_LOG
+
+  case $host in
+    hppa*-*-*)
+      # for both pa32 and pa64
+      GMP_INCLUDE_MPN(pa32/pa-defs.m4)
+      ;;
+    IA64_PATTERN)
+      GMP_ASM_IA64_ALIGN_OK
+      ;;
+    M68K_PATTERN)
+      GMP_ASM_M68K_INSTRUCTION
+      GMP_ASM_M68K_ADDRESSING
+      GMP_ASM_M68K_BRANCHES
+      ;;
+    [powerpc*-*-* | power[3-9]-*-*])
+      GMP_ASM_POWERPC_PIC_ALWAYS
+      GMP_ASM_POWERPC_R_REGISTERS
+      GMP_INCLUDE_MPN(powerpc32/powerpc-defs.m4)
+      case $host in
+        *-*-aix*)
+         case $ABI in
+           mode64)      GMP_INCLUDE_MPN(powerpc64/aix.m4) ;;
+            *)           GMP_INCLUDE_MPN(powerpc32/aix.m4) ;;
+          esac
+          ;;
+        *-*-linux* | *-*-*bsd*)
+         case $ABI in
+           mode64)      GMP_INCLUDE_MPN(powerpc64/elf.m4) ;;
+           mode32 | 32) GMP_INCLUDE_MPN(powerpc32/elf.m4) ;;
+          esac
+          ;;
+        *-*-darwin*)
+         case $ABI in
+           mode64)      GMP_INCLUDE_MPN(powerpc64/darwin.m4) ;;
+           mode32 | 32) GMP_INCLUDE_MPN(powerpc32/darwin.m4) ;;
+          esac
+          ;;
+        *)
+         # Assume unrecognized operating system is the powerpc eABI
+          GMP_INCLUDE_MPN(powerpc32/eabi.m4)
+         ;;
+      esac
+      ;;
+    power*-*-aix*)
+      GMP_INCLUDE_MPN(powerpc32/aix.m4)
+      ;;
+    *sparc*-*-*)
+      case $ABI in
+        64)
+          GMP_ASM_SPARC_REGISTER
+          ;;
+      esac
+      ;;
+    X86_PATTERN | X86_64_PATTERN)
+      GMP_ASM_ALIGN_FILL_0x90
+      case $ABI in
+        32)
+          GMP_INCLUDE_MPN(x86/x86-defs.m4)
+          AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86)
+          GMP_ASM_COFF_TYPE
+          GMP_ASM_X86_GOT_UNDERSCORE
+          GMP_ASM_X86_SHLDL_CL
+         case $enable_profiling in
+           prof | gprof)  GMP_ASM_X86_MCOUNT ;;
+         esac
+         case $host in
+           *-*-darwin*)
+             GMP_INCLUDE_MPN(x86/darwin.m4) ;;
+         esac
+          ;;
+        64|x32)
+          GMP_INCLUDE_MPN(x86_64/x86_64-defs.m4)
+          AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86_64)
+         case $host in
+           *-*-darwin*)
+             GMP_INCLUDE_MPN(x86_64/darwin.m4) ;;
+           *-*-mingw* | *-*-cygwin)
+             GMP_INCLUDE_MPN(x86_64/dos64.m4) ;;
+         esac
+          ;;
+      esac
+      ;;
+  esac
+fi
+
+# For --enable-minithres, prepend "minithres" to path so that its special
+# gmp-mparam.h will be used.
+if test $enable_minithres = yes; then
+  path="minithres $path"
+fi
+
+# Create link for gmp-mparam.h.
+gmp_mparam_source=
+for gmp_mparam_dir in $path; do
+  test "$no_create" = yes || rm -f gmp-mparam.h
+  tmp_file=$srcdir/mpn/$gmp_mparam_dir/gmp-mparam.h
+  if test -f $tmp_file; then
+    AC_CONFIG_LINKS(gmp-mparam.h:mpn/$gmp_mparam_dir/gmp-mparam.h)
+    gmp_srclinks="$gmp_srclinks gmp-mparam.h"
+    gmp_mparam_source=$tmp_file
+    break
+  fi
+done
+if test -z "$gmp_mparam_source"; then
+  AC_MSG_ERROR([no version of gmp-mparam.h found in path: $path])
+fi
+
+# For a helpful message from tune/tuneup.c
+gmp_mparam_suggest=$gmp_mparam_source
+if test "$gmp_mparam_dir" = generic; then
+  for i in $path; do break; done
+  if test "$i" != generic; then
+    gmp_mparam_suggest="new file $srcdir/mpn/$i/gmp-mparam.h"
+  fi
+fi
+AC_DEFINE_UNQUOTED(GMP_MPARAM_H_SUGGEST, "$gmp_mparam_source",
+[The gmp-mparam.h file (a string) the tune program should suggest updating.])
+
+
+# Copy relevant parameters from gmp-mparam.h to config.m4.
+# We only do this for parameters that are used by some assembly files.
+# Fat binaries do this on a per-file basis, so skip in that case.
+#
+if test -z "$fat_path"; then
+  for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD SHLD_SLOW SHRD_SLOW; do
+    value=`sed -n 's/^#define '$i'[    ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+    if test -n "$value"; then
+      GMP_DEFINE_RAW(["define(<$i>,<$value>)"])
+    fi
+  done
+fi
+
+
+# Sizes of some types, needed at preprocessing time.
+#
+# FIXME: The assumption that GMP_LIMB_BITS is 8*sizeof(mp_limb_t) might
+# be slightly rash, but it's true everywhere we know of and ought to be true
+# of any sensible system.  In a generic C build, grepping LONG_BIT out of
+# <limits.h> might be an alternative, for maximum portability.
+#
+AC_CHECK_SIZEOF(void *)
+AC_CHECK_SIZEOF(unsigned short)
+AC_CHECK_SIZEOF(unsigned)
+AC_CHECK_SIZEOF(unsigned long)
+AC_CHECK_SIZEOF(mp_limb_t, , GMP_INCLUDE_GMP_H)
+if test "$ac_cv_sizeof_mp_limb_t" = 0; then
+  AC_MSG_ERROR([Oops, mp_limb_t doesn't seem to work])
+fi
+AC_SUBST(GMP_LIMB_BITS, `expr 8 \* $ac_cv_sizeof_mp_limb_t`)
+GMP_DEFINE_RAW(["define(<SIZEOF_UNSIGNED>,<$ac_cv_sizeof_unsigned>)"])
+
+# Check compiler limb size matches gmp-mparam.h
+#
+# FIXME: Some of the cycle counter objects in the tune directory depend on
+# the size of ulong, it'd be possible to check that here, though a mismatch
+# probably wouldn't want to be fatal, none of the libgmp assembler code
+# depends on ulong.
+#
+mparam_bits=[`sed -n 's/^#define GMP_LIMB_BITS[        ][      ]*\([0-9]*\).*$/\1/p' $gmp_mparam_source`]
+if test -n "$mparam_bits" && test "$mparam_bits" -ne $GMP_LIMB_BITS; then
+  if test "$test_CFLAGS" = set; then
+    AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+in this configuration expects $mparam_bits bits.
+You appear to have set \$CFLAGS, perhaps you also need to tell GMP the
+intended ABI, see "ABI and ISA" in the manual.])
+  else
+    AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+in this configuration expects $mparam_bits bits.])
+  fi
+fi
+
+GMP_DEFINE_RAW(["define(<GMP_LIMB_BITS>,$GMP_LIMB_BITS)"])
+GMP_DEFINE_RAW(["define(<GMP_NAIL_BITS>,$GMP_NAIL_BITS)"])
+GMP_DEFINE_RAW(["define(<GMP_NUMB_BITS>,eval(GMP_LIMB_BITS-GMP_NAIL_BITS))"])
+
+
+AC_SUBST(mpn_objects)
+AC_SUBST(mpn_objs_in_libgmp)
+AC_SUBST(gmp_srclinks)
+
+
+# A recompiled sqr_basecase for use in the tune program, if necessary.
+TUNE_SQR_OBJ=
+test -d tune || mkdir tune
+case $sqr_basecase_source in
+  *.asm)
+    sqr_max=[`sed -n 's/^def...(SQR_TOOM2_THRESHOLD_MAX, *\([0-9]*\))/\1/p' $sqr_basecase_source`]
+    if test -n "$sqr_max"; then
+      TUNE_SQR_OBJ=sqr_asm.o
+      AC_DEFINE_UNQUOTED(TUNE_SQR_TOOM2_MAX,$sqr_max,
+      [Maximum size the tune program can test for SQR_TOOM2_THRESHOLD])
+    fi
+    cat >tune/sqr_basecase.c <<EOF
+/* not sure that an empty file can compile, so put in a dummy */
+int sqr_basecase_dummy;
+EOF
+    ;;
+  *.c)
+    TUNE_SQR_OBJ=
+    AC_DEFINE(TUNE_SQR_TOOM2_MAX,SQR_TOOM2_MAX_GENERIC)
+    cat >tune/sqr_basecase.c <<EOF
+#define TUNE_PROGRAM_BUILD 1
+#define TUNE_PROGRAM_BUILD_SQR 1
+#include "mpn/sqr_basecase.c"
+EOF
+    ;;
+esac
+AC_SUBST(TUNE_SQR_OBJ)
+
+
+# Configs for demos/pexpr.c.
+#
+AC_CONFIG_FILES(demos/pexpr-config.h:demos/pexpr-config-h.in)
+GMP_SUBST_CHECK_FUNCS(clock, cputime, getrusage, gettimeofday, sigaction, sigaltstack, sigstack)
+GMP_SUBST_CHECK_HEADERS(sys/resource.h)
+AC_CHECK_TYPES([stack_t], HAVE_STACK_T_01=1, HAVE_STACK_T_01=0,
+               [#include <signal.h>])
+AC_SUBST(HAVE_STACK_T_01)
+
+# Configs for demos/calc directory
+#
+# AC_SUBST+AC_CONFIG_FILES is used for calc-config.h, rather than AC_DEFINE+
+# AC_CONFIG_HEADERS, since with the latter automake (1.8) will then put the
+# directory (ie. demos/calc) into $(DEFAULT_INCLUDES) for every Makefile.in,
+# which would look very strange.
+#
+# -lcurses is required by libreadline.  On a typical SVR4 style system this
+# normally doesn't have to be given explicitly, since libreadline.so will
+# have a NEEDED record for it.  But if someone for some reason is using only
+# a static libreadline.a then we must give -lcurses.  Readline (as of
+# version 4.3) doesn't use libtool, so we can't rely on a .la to cover
+# necessary dependencies.
+#
+# On a couple of systems we've seen libreadline available, but the headers
+# not in the default include path, so check for readline/readline.h.  We've
+# also seen readline/history.h missing, not sure if that's just a broken
+# install or a very old version, but check that too.
+#
+AC_CONFIG_FILES(demos/calc/calc-config.h:demos/calc/calc-config-h.in)
+LIBCURSES=
+if test $with_readline != no; then
+  AC_CHECK_LIB(ncurses, tputs, [LIBCURSES=-lncurses],
+    [AC_CHECK_LIB(curses, tputs, [LIBCURSES=-lcurses])])
+fi
+AC_SUBST(LIBCURSES)
+use_readline=$with_readline
+if test $with_readline = detect; then
+  use_readline=no
+  AC_CHECK_LIB(readline, readline,
+    [AC_CHECK_HEADER(readline/readline.h,
+      [AC_CHECK_HEADER(readline/history.h, use_readline=yes)])],
+    , $LIBCURSES)
+  AC_MSG_CHECKING(readline detected)
+  AC_MSG_RESULT($use_readline)
+fi
+if test $use_readline = yes; then
+  AC_SUBST(WITH_READLINE_01, 1)
+  AC_SUBST(LIBREADLINE, -lreadline)
+else
+  WITH_READLINE_01=0
+fi
+AC_PROG_YACC
+AM_PROG_LEX
+
+# Configs for demos/expr directory
+#
+# Libtool already runs an AC_CHECK_TOOL for ranlib, but we give
+# AC_PROG_RANLIB anyway since automake is supposed to complain if it's not
+# called.  (Automake 1.8.4 doesn't, at least not when the only library is in
+# an EXTRA_LIBRARIES.)
+#
+AC_PROG_RANLIB
+
+
+# Create config.m4.
+GMP_FINISH
+
+# Create Makefiles
+# FIXME: Upcoming version of autoconf/automake may not like broken lines.
+#        Right now automake isn't accepting the new AC_CONFIG_FILES scheme.
+
+AC_OUTPUT(Makefile                                                     \
+  mpf/Makefile mpn/Makefile mpq/Makefile                               \
+  mpz/Makefile printf/Makefile scanf/Makefile rand/Makefile cxx/Makefile \
+  tests/Makefile tests/devel/Makefile                                  \
+  tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile             \
+  tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile           \
+  tests/cxx/Makefile                                                   \
+  doc/Makefile tune/Makefile                                           \
+  demos/Makefile demos/calc/Makefile demos/expr/Makefile               \
+  gmp.h:gmp-h.in)
+
+AC_MSG_NOTICE([summary of build options:
+
+  Version:           ${PACKAGE_STRING}
+  Host type:         ${host}
+  ABI:               ${ABI}
+  Install prefix:    ${prefix}
+  Compiler:          ${CC}
+  Static libraries:  ${enable_static}
+  Shared libraries:  ${enable_shared}
+])
+
+if test x$cross_compiling = xyes ; then
+   case "$host" in
+     *-*-mingw* | *-*-cygwin)
+     if test x$ABI = x64 ; then
+       AC_MSG_NOTICE([If wine64 is installed, use make check TESTS_ENVIRONMENT=wine64.])
+     else
+       AC_MSG_NOTICE([If wine is installed, use make check TESTS_ENVIRONMENT=wine.])
+     fi
+     ;;
+   esac
+fi
diff --git a/configure.in b/configure.in

deleted file mode 100644 (file)

index 2396dac..0000000
--- a/configure.in
+++ /dev/null
@@ -1,3477 +0,0 @@
-dnl  Process this file with autoconf to produce a configure script.
-
-
-define(GMP_COPYRIGHT,[[
-
-Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published
-by the Free Software Foundation; either version 3 of the License, or (at
-your option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-]])
-
-AC_COPYRIGHT(GMP_COPYRIGHT)
-AH_TOP(/*GMP_COPYRIGHT*/)
-
-AC_REVISION($Revision$)
-AC_PREREQ(2.59)
-AC_INIT(GNU MP, GMP_VERSION, [gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html], gmp)
-AC_CONFIG_SRCDIR(gmp-impl.h)
-m4_pattern_forbid([^[ \t]*GMP_])
-m4_pattern_allow(GMP_LDFLAGS)
-m4_pattern_allow(GMP_LIMB_BITS)
-m4_pattern_allow(GMP_MPARAM_H_SUGGEST)
-m4_pattern_allow(GMP_NAIL_BITS)
-m4_pattern_allow(GMP_NUMB_BITS)
-
-# If --target is not used then $target_alias is empty, but if say
-# "./configure athlon-pc-freebsd3.5" is used, then all three of
-# $build_alias, $host_alias and $target_alias are set to
-# "athlon-pc-freebsd3.5".
-#
-if test -n "$target_alias" && test "$target_alias" != "$host_alias"; then
-  AC_MSG_ERROR([--target is not appropriate for GMP
-Use --build=CPU-VENDOR-OS if you need to specify your CPU and/or system
-explicitly.  Use --host if cross-compiling (see "Installing GMP" in the
-manual for more on this).])
-fi
-
-GMP_INIT(config.m4)
-
-AC_CANONICAL_HOST
-
-dnl  Automake "no-dependencies" is used because include file dependencies
-dnl  are not useful to us.  Pretty much everything depends just on gmp.h,
-dnl  gmp-impl.h and longlong.h, and yet only rarely does everything need to
-dnl  be rebuilt for changes to those files.
-dnl
-dnl  "no-dependencies" also helps with the way we're setup to run
-dnl  AC_PROG_CXX only conditionally.  If dependencies are used then recent
-dnl  automake (eg 1.7.2) appends an AM_CONDITIONAL to AC_PROG_CXX, and then
-dnl  gets upset if it's not actually executed.
-dnl
-dnl  Note that there's a copy of these options in the top-level Makefile.am,
-dnl  so update there too if changing anything.
-dnl
-AM_INIT_AUTOMAKE([1.8 gnu no-dependencies $(top_builddir)/ansi2knr])
-AM_CONFIG_HEADER(config.h:config.in)
-AM_MAINTAINER_MODE
-
-
-AC_ARG_ENABLE(assert,
-AC_HELP_STRING([--enable-assert],[enable ASSERT checking [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-assert, need yes or no]) ;;
-esac],
-[enable_assert=no])
-
-if test "$enable_assert" = "yes"; then
-  AC_DEFINE(WANT_ASSERT,1,
-  [Define to 1 to enable ASSERT checking, per --enable-assert])
-  want_assert_01=1
-else
-  want_assert_01=0
-fi
-GMP_DEFINE_RAW(["define(<WANT_ASSERT>,$want_assert_01)"])
-
-
-AC_ARG_ENABLE(alloca,
-AC_HELP_STRING([--enable-alloca],[how to get temp memory [[default=reentrant]]]),
-[case $enableval in
-alloca|malloc-reentrant|malloc-notreentrant) ;;
-yes|no|reentrant|notreentrant) ;;
-debug) ;;
-*)
-  AC_MSG_ERROR([bad value $enableval for --enable-alloca, need one of:
-yes no reentrant notreentrant alloca malloc-reentrant malloc-notreentrant debug]) ;;
-esac],
-[enable_alloca=reentrant])
-
-
-# IMPROVE ME: The default for C++ is disabled.  The tests currently
-# performed below for a working C++ compiler are not particularly strong,
-# and in general can't be expected to get the right setup on their own.  The
-# most significant problem is getting the ABI the same.  Defaulting CXXFLAGS
-# to CFLAGS takes only a small step towards this.  It's also probably worth
-# worrying whether the C and C++ runtimes from say gcc and a vendor C++ can
-# work together.  Some rather broken C++ installations were encountered
-# during testing, and though such things clearly aren't GMP's problem, if
-# --enable-cxx=detect were to be the default then some careful checks of
-# which, if any, C++ compiler on the system is up to scratch would be
-# wanted.
-#
-AC_ARG_ENABLE(cxx,
-AC_HELP_STRING([--enable-cxx],[enable C++ support [[default=no]]]),
-[case $enableval in
-yes|no|detect) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-cxx, need yes/no/detect]) ;;
-esac],
-[enable_cxx=no])
-
-
-AC_ARG_ENABLE(fft,
-AC_HELP_STRING([--enable-fft],[enable FFTs for multiplication [[default=yes]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-fft, need yes or no]) ;;
-esac],
-[enable_fft=yes])
-
-if test "$enable_fft" = "yes"; then
-  AC_DEFINE(WANT_FFT,1,
-  [Define to 1 to enable FFTs for multiplication, per --enable-fft])
-fi
-
-
-AC_ARG_ENABLE(old-fft-full,
-AC_HELP_STRING([--enable-old-fft-full],[enable old mpn_mul_fft_full for multiplication [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-old-fft-full, need yes or no]) ;;
-esac],
-[enable_old_fft_full=no])
-
-if test "$enable_old_fft_full" = "yes"; then
-  AC_DEFINE(WANT_OLD_FFT_FULL,1,
-  [Define to 1 to enable old mpn_mul_fft_full for multiplication, per --enable-old-fft-full])
-fi
-
-
-AC_ARG_ENABLE(mpbsd,
-AC_HELP_STRING([--enable-mpbsd],
-               [build Berkeley MP compatibility library [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-mpbsd, need yes or no]) ;;
-esac],
-[enable_mpbsd=no])
-AM_CONDITIONAL(WANT_MPBSD, test "$enable_mpbsd" = "yes")
-
-
-AC_ARG_ENABLE(nails,
-AC_HELP_STRING([--enable-nails],[use nails on limbs [[default=no]]]),
-[case $enableval in
-[yes|no|[02468]|[0-9][02468]]) ;;
-[*[13579]])
-  AC_MSG_ERROR([bad value $enableval for --enable-nails, only even nail sizes supported]) ;;
-*)
-  AC_MSG_ERROR([bad value $enableval for --enable-nails, need yes/no/number]) ;;
-esac],
-[enable_nails=no])
-
-case $enable_nails in
-yes) GMP_NAIL_BITS=2 ;;
-no)  GMP_NAIL_BITS=0 ;;
-*)   GMP_NAIL_BITS=$enable_nails ;;
-esac
-AC_SUBST(GMP_NAIL_BITS)
-
-
-AC_ARG_ENABLE(profiling,
-AC_HELP_STRING([--enable-profiling],
-               [build with profiler support [[default=no]]]),
-[case $enableval in
-no|prof|gprof|instrument) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-profiling, need no/prof/gprof/instrument]) ;;
-esac],
-[enable_profiling=no])
-
-case $enable_profiling in
-  prof)
-    AC_DEFINE(WANT_PROFILING_PROF, 1,
-              [Define to 1 if --enable-profiling=prof])
-    ;;
-  gprof)
-    AC_DEFINE(WANT_PROFILING_GPROF, 1,
-              [Define to 1 if --enable-profiling=gprof])
-    ;;
-  instrument)
-    AC_DEFINE(WANT_PROFILING_INSTRUMENT, 1,
-              [Define to 1 if --enable-profiling=instrument])
-    ;;
-esac
-
-GMP_DEFINE_RAW(["define(<WANT_PROFILING>,<\`$enable_profiling'>)"])
-
-# -fomit-frame-pointer is incompatible with -pg on some chips
-if test "$enable_profiling" = gprof; then
-  fomit_frame_pointer=
-else
-  fomit_frame_pointer="-fomit-frame-pointer"
-fi
-
-
-AC_ARG_WITH(readline,
-AC_HELP_STRING([--with-readline],
-               [readline support in calc demo program [[default=detect]]]),
-[case $withval in
-yes|no|detect) ;;
-*) AC_MSG_ERROR([bad value $withval for --with-readline, need yes/no/detect]) ;;
-esac],
-[with_readline=detect])
-
-
-AC_ARG_ENABLE(fat,
-AC_HELP_STRING([--enable-fat],
-               [build a fat binary on systems that support it [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-fat, need yes or no]) ;;
-esac],
-[enable_fat=no])
-
-
-AC_ARG_ENABLE(minithres,
-AC_HELP_STRING([--enable-minithres],
-               [choose minimal thresholds for testing [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-minithres, need yes or no]) ;;
-esac],
-[enable_minithres=no])
-
-
-
-tmp_host=`echo $host_cpu | sed 's/\./_/'`
-AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_$tmp_host)
-GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_HOST_CPU_$tmp_host')", POST)
-
-dnl  The HAVE_HOST_CPU_ list here only needs to have entries for those which
-dnl  are going to be tested, not everything that can possibly be selected.
-dnl
-dnl  The HAVE_HOST_CPU_FAMILY_ list similarly, and note that the AC_DEFINEs
-dnl  for these are under the cpu specific setups below.
-
-AH_VERBATIM([HAVE_HOST_CPU_1],
-[/* Define one of these to 1 for the host CPU family.
-   If your CPU is not in any of these families, leave all undefined.
-   For an AMD64 chip, define "x86" in ABI=32, but not in ABI=64. */
-#undef HAVE_HOST_CPU_FAMILY_alpha
-#undef HAVE_HOST_CPU_FAMILY_m68k
-#undef HAVE_HOST_CPU_FAMILY_power
-#undef HAVE_HOST_CPU_FAMILY_powerpc
-#undef HAVE_HOST_CPU_FAMILY_x86
-#undef HAVE_HOST_CPU_FAMILY_x86_64
-
-/* Define one of the following to 1 for the host CPU, as per the output of
-   ./config.guess.  If your CPU is not listed here, leave all undefined.  */
-#undef HAVE_HOST_CPU_alphaev67
-#undef HAVE_HOST_CPU_alphaev68
-#undef HAVE_HOST_CPU_alphaev7
-#undef HAVE_HOST_CPU_m68020
-#undef HAVE_HOST_CPU_m68030
-#undef HAVE_HOST_CPU_m68040
-#undef HAVE_HOST_CPU_m68060
-#undef HAVE_HOST_CPU_m68360
-#undef HAVE_HOST_CPU_powerpc604
-#undef HAVE_HOST_CPU_powerpc604e
-#undef HAVE_HOST_CPU_powerpc750
-#undef HAVE_HOST_CPU_powerpc7400
-#undef HAVE_HOST_CPU_supersparc
-#undef HAVE_HOST_CPU_i386
-#undef HAVE_HOST_CPU_i586
-#undef HAVE_HOST_CPU_i686
-#undef HAVE_HOST_CPU_pentium
-#undef HAVE_HOST_CPU_pentiummmx
-#undef HAVE_HOST_CPU_pentiumpro
-#undef HAVE_HOST_CPU_pentium2
-#undef HAVE_HOST_CPU_pentium3
-#undef HAVE_HOST_CPU_s390_z900
-#undef HAVE_HOST_CPU_s390_z990
-#undef HAVE_HOST_CPU_s390_z9
-#undef HAVE_HOST_CPU_s390_z10
-#undef HAVE_HOST_CPU_s390_z196
-
-/* Define to 1 iff we have a s390 with 64-bit registers.  */
-#undef HAVE_HOST_CPU_s390_zarch])
-
-
-# Table of compilers, options, and mpn paths.  This code has various related
-# purposes
-#
-#   - better default CC/CFLAGS selections than autoconf otherwise gives
-#   - default CC/CFLAGS selections for extra CPU types specific to GMP
-#   - a few tests for known bad compilers
-#   - choice of ABIs on suitable systems
-#   - selection of corresponding mpn search path
-#
-# After GMP specific searches and tests, the standard autoconf AC_PROG_CC is
-# called.  User selections of CC etc are respected.
-#
-# Care is taken not to use macros like AC_TRY_COMPILE during the GMP
-# pre-testing, since they of course depend on AC_PROG_CC, and also some of
-# them cache their results, which is not wanted.
-#
-# The ABI selection mechanism is unique to GMP.  All that reaches autoconf
-# is a different selection of CC/CFLAGS according to the best ABI the system
-# supports, and/or what the user selects.  Naturally the mpn assembler code
-# selected is very dependent on the ABI.
-#
-# The closest the standard tools come to a notion of ABI is something like
-# "sparc64" which encodes a CPU and an ABI together.  This doesn't seem to
-# scale well for GMP, where exact CPU types like "ultrasparc2" are wanted,
-# separate from the ABI used on them.
-#
-#
-# The variables set here are
-#
-#   cclist              the compiler choices
-#   xx_cflags           flags for compiler xx
-#   xx_cflags_maybe     flags for compiler xx, if they work
-#   xx_cppflags         cpp flags for compiler xx
-#   xx_cflags_optlist   list of sets of optional flags
-#   xx_cflags_yyy       set yyy of optional flags for compiler xx
-#   xx_ldflags          -Wc,-foo flags for libtool linking with compiler xx
-#   ar_flags            extra flags for $AR
-#   nm_flags            extra flags for $NM
-#   limb                limb size, can be "longlong"
-#   path                mpn search path
-#   extra_functions     extra mpn functions
-#   fat_path            fat binary mpn search path [if fat binary desired]
-#   fat_functions       fat functions
-#   fat_thresholds      fat thresholds
-#
-# Suppose xx_cflags_optlist="arch", then flags from $xx_cflags_arch are
-# tried, and the first flag that works will be used.  An optlist like "arch
-# cpu optimize" can be used to get multiple independent sets of flags tried.
-# The first that works from each will be used.  If no flag in a set works
-# then nothing from that set is added.
-#
-# For multiple ABIs, the scheme extends as follows.
-#
-#   abilist               set of ABI choices
-#   cclist_aa             compiler choices in ABI aa
-#   xx_aa_cflags          flags for xx in ABI aa
-#   xx_aa_cflags_maybe    flags for xx in ABI aa, if they work
-#   xx_aa_cppflags        cpp flags for xx in ABI aa
-#   xx_aa_cflags_optlist  list of sets of optional flags in ABI aa
-#   xx_aa_cflags_yyy      set yyy of optional flags for compiler xx in ABI aa
-#   xx_aa_ldflags         -Wc,-foo flags for libtool linking
-#   ar_aa_flags           extra flags for $AR in ABI aa
-#   nm_aa_flags           extra flags for $NM in ABI aa
-#   limb_aa               limb size in ABI aa, can be "longlong"
-#   path_aa               mpn search path in ABI aa
-#   extra_functions_aa    extra mpn functions in ABI aa
-#
-# As a convenience, the unadorned xx_cflags (etc) are used for the last ABI
-# in ablist, if an xx_aa_cflags for that ABI isn't given.  For example if
-# abilist="64 32" then $cc_64_cflags will be used for the 64-bit ABI, but
-# for the 32-bit either $cc_32_cflags or $cc_cflags is used, whichever is
-# defined.  This makes it easy to add some 64-bit compilers and flags to an
-# unadorned 32-bit set.
-#
-# limb=longlong (or limb_aa=longlong) applies to all compilers within that
-# ABI.  It won't work to have some needing long long and some not, since a
-# single instantiated gmp.h will be used by both.
-#
-# SPEED_CYCLECOUNTER, cyclecounter_size and CALLING_CONVENTIONS_OBJS are
-# also set here, with an ABI suffix.
-#
-#
-#
-# A table-driven approach like this to mapping cpu type to good compiler
-# options is a bit of a maintenance burden, but there's not much uniformity
-# between options specifications on different compilers.  Some sort of
-# separately updatable tool might be cute.
-#
-# The use of lots of variables like this, direct and indirect, tends to
-# obscure when and how various things are done, but unfortunately it's
-# pretty much the only way.  If shell subroutines were portable then actual
-# code like "if this .. do that" could be written, but attempting the same
-# with full copies of GMP_PROG_CC_WORKS etc expanded at every point would
-# hugely bloat the output.
-
-
-AC_ARG_VAR(ABI, [desired ABI (for processors supporting more than one ABI)])
-
-# abilist needs to be non-empty, "standard" is just a generic name here
-abilist="standard"
-
-# FIXME: We'd like to prefer an ANSI compiler, perhaps by preferring
-# c89 over cc here.  But note that on HP-UX c89 provides a castrated
-# environment, and would want to be excluded somehow.  Maybe
-# AC_PROG_CC_STDC already does enough to stick cc into ANSI mode and
-# we don't need to worry.
-#
-cclist="gcc cc"
-
-gcc_cflags="-O2 -pedantic"
-gcc_64_cflags="-O2 -pedantic"
-cc_cflags="-O"
-cc_64_cflags="-O"
-
-SPEED_CYCLECOUNTER_OBJ=
-cyclecounter_size=2
-
-AC_SUBST(HAVE_HOST_CPU_FAMILY_power,  0)
-AC_SUBST(HAVE_HOST_CPU_FAMILY_powerpc,0)
-
-case $host in
-
-  a29k*-*-*)
-    path="a29k"
-    ;;
-
-
-  alpha*-*-*)
-    AC_DEFINE(HAVE_HOST_CPU_FAMILY_alpha)
-    case $host_cpu in
-      alphaev5* | alphapca5*)
-       path="alpha/ev5 alpha" ;;
-      alphaev67 | alphaev68 | alphaev7*)
-        path="alpha/ev67 alpha/ev6 alpha" ;;
-      alphaev6)
-       path="alpha/ev6 alpha" ;;
-      *)
-        path="alpha" ;;
-    esac
-    extra_functions="cntlz"
-    gcc_cflags_optlist="asm cpu oldas" # need asm ahead of cpu, see below
-    gcc_cflags_oldas="-Wa,-oldas"     # see GMP_GCC_WA_OLDAS.
-
-    # gcc 2.7.2.3 doesn't know any -mcpu= for alpha, apparently.
-    # gcc 2.95 knows -mcpu= ev4, ev5, ev56, pca56, ev6.
-    # gcc 3.0 adds nothing.
-    # gcc 3.1 adds ev45, ev67 (but ev45 is the same as ev4).
-    # gcc 3.2 adds nothing.
-    #
-    # gcc version "2.9-gnupro-99r1" under "-O2 -mcpu=ev6" strikes internal
-    # compiler errors too easily and is rejected by GMP_PROG_CC_WORKS.  Each
-    # -mcpu=ev6 below has a fallback to -mcpu=ev56 for this reason.
-    #
-    case $host_cpu in
-      alpha)        gcc_cflags_cpu="-mcpu=ev4" ;;
-      alphaev5)     gcc_cflags_cpu="-mcpu=ev5" ;;
-      alphaev56)    gcc_cflags_cpu="-mcpu=ev56" ;;
-      alphapca56 | alphapca57)
-                    gcc_cflags_cpu="-mcpu=pca56" ;;
-      alphaev6)     gcc_cflags_cpu="-mcpu=ev6 -mcpu=ev56" ;;
-      alphaev67 | alphaev68 | alphaev7*)
-                    gcc_cflags_cpu="-mcpu=ev67 -mcpu=ev6 -mcpu=ev56" ;;
-    esac
-
-    # gcc version "2.9-gnupro-99r1" on alphaev68-dec-osf5.1 has been seen
-    # accepting -mcpu=ev6, but not putting the assembler in the right mode
-    # for what it produces.  We need to do this for it, and need to do it
-    # before testing the -mcpu options.
-    #
-    # On old versions of gcc, which don't know -mcpu=, we believe an
-    # explicit -Wa,-mev5 etc will be necessary to put the assembler in
-    # the right mode for our .asm files and longlong.h asm blocks.
-    #
-    # On newer versions of gcc, when -mcpu= is known, we must give a -Wa
-    # which is at least as high as the code gcc will generate.  gcc
-    # establishes what it needs with a ".arch" directive, our command line
-    # option seems to override that.
-    #
-    # gas prior to 2.14 doesn't accept -mev67, but -mev6 seems enough for
-    # ctlz and cttz (in 2.10.0 at least).
-    #
-    # OSF `as' accepts ev68 but stupidly treats it as ev4.  -arch only seems
-    # to affect insns like ldbu which are expanded as macros when necessary.
-    # Insns like ctlz which were never available as macros are always
-    # accepted and always generate their plain code.
-    #
-    case $host_cpu in
-      alpha)        gcc_cflags_asm="-Wa,-arch,ev4 -Wa,-mev4" ;;
-      alphaev5)     gcc_cflags_asm="-Wa,-arch,ev5 -Wa,-mev5" ;;
-      alphaev56)    gcc_cflags_asm="-Wa,-arch,ev56 -Wa,-mev56" ;;
-      alphapca56 | alphapca57)
-                    gcc_cflags_asm="-Wa,-arch,pca56 -Wa,-mpca56" ;;
-      alphaev6)     gcc_cflags_asm="-Wa,-arch,ev6 -Wa,-mev6" ;;
-      alphaev67 | alphaev68 | alphaev7*)
-                    gcc_cflags_asm="-Wa,-arch,ev67 -Wa,-mev67 -Wa,-arch,ev6 -Wa,-mev6" ;;
-    esac
-
-    # It might be better to ask "cc" whether it's Cray C or DEC C,
-    # instead of relying on the OS part of $host.  But it's hard to
-    # imagine either of those compilers anywhere except their native
-    # systems.
-    #
-    GMP_INCLUDE_MPN(alpha/alpha-defs.m4)
-    case $host in
-      *-cray-unicos*)
-        cc_cflags="-O"         # no -g, it silently disables all optimizations
-        GMP_INCLUDE_MPN(alpha/unicos.m4)
-        # Don't perform any assembly syntax tests on this beast.
-        gmp_asm_syntax_testing=no
-        ;;
-      *-*-osf*)
-        GMP_INCLUDE_MPN(alpha/default.m4)
-        cc_cflags=""
-        cc_cflags_optlist="opt cpu"
-
-        # not sure if -fast works on old versions, so make it optional
-       cc_cflags_opt="-fast -O2"
-
-       # DEC C V5.9-005 knows ev4, ev5, ev56, pca56, ev6.
-       # Compaq C V6.3-029 adds ev67.
-       #
-       case $host_cpu in
-         alpha)       cc_cflags_cpu="-arch~ev4~-tune~ev4" ;;
-         alphaev5)    cc_cflags_cpu="-arch~ev5~-tune~ev5" ;;
-         alphaev56)   cc_cflags_cpu="-arch~ev56~-tune~ev56" ;;
-         alphapca56 | alphapca57)
-            cc_cflags_cpu="-arch~pca56~-tune~pca56" ;;
-         alphaev6)    cc_cflags_cpu="-arch~ev6~-tune~ev6" ;;
-         alphaev67 | alphaev68 | alphaev7*)
-            cc_cflags_cpu="-arch~ev67~-tune~ev67 -arch~ev6~-tune~ev6" ;;
-       esac
-        ;;
-      *)
-        GMP_INCLUDE_MPN(alpha/default.m4)
-        ;;
-    esac
-
-    case $host in
-      *-*-unicos*)
-        # tune/alpha.asm assumes int==4bytes but unicos uses int==8bytes
-        ;;
-      *)
-        SPEED_CYCLECOUNTER_OBJ=alpha.lo
-        cyclecounter_size=1 ;;
-    esac
-    ;;
-
-
-  # Cray vector machines.
-  # This must come after alpha* so that we can recognize present and future
-  # vector processors with a wildcard.
-  *-cray-unicos*)
-    gmp_asm_syntax_testing=no
-    cclist="cc"
-    # We used to have -hscalar0 here as a workaround for miscompilation of
-    # mpz/import.c, but let's hope Cray fixes their bugs instead, since
-    # -hscalar0 causes disastrously poor code to be generated.
-    cc_cflags="-O3 -hnofastmd -htask0 -Wa,-B"
-    path="cray"
-    ;;
-
-
-  arm*-*-*)
-    path="arm"
-    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
-    gcc_testlist="gcc-arm-umodsi"
-    GMP_INCLUDE_MPN(arm/arm-defs.m4)
-    ;;
-
-
-  clipper*-*-*)
-    path="clipper"
-    ;;
-
-
-  # Fujitsu
-  [f30[01]-fujitsu-sysv*])
-    cclist="gcc vcc"
-    # FIXME: flags for vcc?
-    vcc_cflags="-g"
-    path="fujitsu"
-    ;;
-
-
-  hppa*-*-*)
-    # HP cc (the one sold separately) is K&R by default, but AM_C_PROTOTYPES
-    # will add "-Ae", or "-Aa -D_HPUX_SOURCE", to put it into ansi mode, if
-    # possible.
-    #
-    # gcc for hppa 2.0 can be built either for 2.0n (32-bit) or 2.0w
-    # (64-bit), but not both, so there's no option to choose the desired
-    # mode, we must instead detect which of the two it is.  This is done by
-    # checking sizeof(long), either 4 or 8 bytes respectively.  Do this in
-    # ABI=1.0 too, in case someone tries to build that with a 2.0w gcc.
-    #
-    gcc_cflags_optlist="arch"
-    gcc_testlist="sizeof-long-4"
-    SPEED_CYCLECOUNTER_OBJ=hppa.lo
-    cyclecounter_size=1
-
-    # FIXME: For hppa2.0*, path should be "pa32/hppa2_0 pa32/hppa1_1 pa32".
-    # (Can't remember why this isn't done already, have to check what .asm
-    # files are available in each and how they run on a typical 2.0 cpu.)
-    #
-    case $host_cpu in
-      hppa1.0*)    path="pa32" ;;
-      hppa7000*)   path="pa32/hppa1_1 pa32" ;;
-      hppa2.0* | hppa64)
-                   path="pa32/hppa2_0 pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
-      *)           # default to 7100
-                   path="pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
-    esac
-
-    # gcc 2.7.2.3 knows -mpa-risc-1-0 and -mpa-risc-1-1
-    # gcc 2.95 adds -mpa-risc-2-0, plus synonyms -march=1.0, 1.1 and 2.0
-    #
-    # We don't use -mpa-risc-2-0 in ABI=1.0 because 64-bit registers may not
-    # be saved by the kernel on an old system.  Actually gcc (as of 3.2)
-    # only adds a few float instructions with -mpa-risc-2-0, so it would
-    # probably be safe, but let's not take the chance.  In any case, a
-    # configuration like --host=hppa2.0 ABI=1.0 is far from optimal.
-    #
-    case $host_cpu in
-      hppa1.0*)           gcc_cflags_arch="-mpa-risc-1-0" ;;
-      *)                  # default to 7100
-                          gcc_cflags_arch="-mpa-risc-1-1" ;;
-    esac
-
-    case $host_cpu in
-      hppa1.0*)    cc_cflags="+O2" ;;
-      *)           # default to 7100
-                   cc_cflags="+DA1.1 +O2" ;;
-    esac
-
-    case $host in
-      hppa2.0*-*-* | hppa64-*-*)
-       cclist_20n="gcc cc"
-        abilist="2.0n 1.0"
-        path_20n="pa64"
-       limb_20n=longlong
-        any_20n_testlist="sizeof-long-4"
-        SPEED_CYCLECOUNTER_OBJ_20n=hppa2.lo
-        cyclecounter_size_20n=2
-
-        # -mpa-risc-2-0 is only an optional flag, in case an old gcc is
-        # used.  Assembler support for 2.0 is essential though, for our asm
-        # files.
-       gcc_20n_cflags="-O2"
-       gcc_20n_cflags_optlist="arch"
-        gcc_20n_cflags_arch="-mpa-risc-2-0 -mpa-risc-1-1"
-        gcc_20n_testlist="sizeof-long-4 hppa-level-2.0"
-
-        cc_20n_cflags="+DA2.0 +e +O2 -Wl,+vnocompatwarnings"
-        cc_20n_testlist="hpc-hppa-2-0"
-
-       # ABI=2.0w is available for hppa2.0w and hppa2.0, but not for
-       # hppa2.0n, on the assumption that that the latter indicates a
-       # desire for ABI=2.0n.
-       case $host in
-        hppa2.0n-*-*) ;;
-        *)
-          # HPUX 10 and earlier cannot run 2.0w.  Not sure about other
-          # systems (GNU/Linux for instance), but lets assume they're ok.
-          case $host in
-            [*-*-hpux[1-9] | *-*-hpux[1-9].* | *-*-hpux10 | *-*-hpux10.*]) ;;
-           [*-*-linux*])  abilist="1.0" ;; # due to linux permanent kernel bug
-            *)    abilist="2.0w $abilist" ;;
-          esac
-
-          cclist_20w="gcc cc"
-         gcc_20w_cflags="-O2 -mpa-risc-2-0"
-          cc_20w_cflags="+DD64 +O2"
-          cc_20w_testlist="hpc-hppa-2-0"
-          path_20w="pa64"
-         any_20w_testlist="sizeof-long-8"
-          SPEED_CYCLECOUNTER_OBJ_20w=hppa2w.lo
-          cyclecounter_size_20w=2
-         ;;
-        esac
-        ;;
-    esac
-    ;;
-
-
-  i960*-*-*)
-    path="i960"
-    ;;
-
-
-  IA64_PATTERN)
-    abilist="64"
-    GMP_INCLUDE_MPN(ia64/ia64-defs.m4)
-    SPEED_CYCLECOUNTER_OBJ=ia64.lo
-
-    case $host_cpu in
-      itanium)   path="ia64/itanium  ia64" ;;
-      itanium2)  path="ia64/itanium2 ia64" ;;
-      *)         path="ia64" ;;
-    esac
-
-    gcc_64_cflags_optlist="tune"
-    gcc_32_cflags_optlist=$gcc_64_cflags_optlist
-
-    # gcc pre-release 3.4 adds -mtune itanium and itanium2
-    case $host_cpu in
-      itanium)   gcc_cflags_tune="-mtune=itanium" ;;
-      itanium2)  gcc_cflags_tune="-mtune=itanium2" ;;
-    esac
-
-    case $host in
-      *-*-linux*)
-       cclist="gcc icc"
-       icc_cflags="-no-gcc"
-       icc_cflags_optlist="opt"
-       # Don't use -O3, it is for "large data sets" and also miscompiles GMP.
-       # But icc miscompiles GMP at any optimization level, at higher levels
-       # it miscompiles more files...
-       icc_cflags_opt="-O2 -O1"
-       ;;
-
-      *-*-hpux*)
-        # HP cc sometimes gets internal errors if the optimization level is
-        # too high.  GMP_PROG_CC_WORKS detects this, the "_opt" fallbacks
-        # let us use whatever seems to work.
-        #
-        abilist="32 64"
-
-        cclist_32="gcc cc"
-        path_32="ia64"
-        cc_32_cflags=""
-        cc_32_cflags_optlist="opt"
-        cc_32_cflags_opt="+O3 +O2 +O1"
-        gcc_32_cflags="-milp32 -O2"
-        limb_32=longlong
-        SPEED_CYCLECOUNTER_OBJ_32=ia64.lo
-        cyclecounter_size_32=2
-
-        # Must have +DD64 in CPPFLAGS to get the right __LP64__ for headers,
-        # but also need it in CFLAGS for linking programs, since automake
-        # only uses CFLAGS when linking, not CPPFLAGS.
-        # FIXME: Maybe should use cc_64_ldflags for this, but that would
-        # need GMP_LDFLAGS used consistently by all the programs.
-        #
-        cc_64_cflags="+DD64"
-        cc_64_cppflags="+DD64"
-        cc_64_cflags_optlist="opt"
-        cc_64_cflags_opt="+O3 +O2 +O1"
-        gcc_64_cflags="$gcc_64_cflags -mlp64"
-        ;;
-    esac
-    ;;
-
-
-  # Motorola 68k
-  #
-  M68K_PATTERN)
-    AC_DEFINE(HAVE_HOST_CPU_FAMILY_m68k)
-    GMP_INCLUDE_MPN(m68k/m68k-defs.m4)
-    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
-    gcc_cflags_optlist="arch"
-
-    # gcc 2.7.2 knows -m68000, -m68020, -m68030, -m68040.
-    # gcc 2.95 adds -mcpu32, -m68060.
-    # FIXME: Maybe "-m68020 -mnobitfield" would suit cpu32 on 2.7.2.
-    #
-    case $host_cpu in
-    m68020)  gcc_cflags_arch="-m68020" ;;
-    m68030)  gcc_cflags_arch="-m68030" ;;
-    m68040)  gcc_cflags_arch="-m68040" ;;
-    m68060)  gcc_cflags_arch="-m68060 -m68000" ;;
-    m68360)  gcc_cflags_arch="-mcpu32 -m68000" ;;
-    *)       gcc_cflags_arch="-m68000" ;;
-    esac
-
-    # FIXME: m68k/mc68020 looks like it's ok for cpu32, but this wants to be
-    # tested.  Will need to introduce an m68k/cpu32 if m68k/mc68020 ever uses
-    # the bitfield instructions.
-    case $host_cpu in
-    [m680[234]0 | m68360])  path="m68k/mc68020 m68k" ;;
-    *)                      path="m68k" ;;
-    esac
-    ;;
-
-
-  # Motorola 88k
-  m88k*-*-*)
-    path="m88k"
-    ;;
-  m88110*-*-*)
-    gcc_cflags="$gcc_cflags -m88110"
-    path="m88k/mc88110 m88k"
-    ;;
-
-
-  # National Semiconductor 32k
-  ns32k*-*-*)
-    path="ns32k"
-    ;;
-
-
-  # IRIX 5 and earlier can only run 32-bit o32.
-  #
-  # IRIX 6 and up always has a 64-bit mips CPU can run n32 or 64.  n32 is
-  # preferred over 64, but only because that's been the default in past
-  # versions of GMP.  The two are equally efficient.
-  #
-  # Linux kernel 2.2.13 arch/mips/kernel/irixelf.c has a comment about not
-  # supporting n32 or 64.
-  #
-  # For reference, libtool (eg. 1.5.6) recognises the n32 ABI and knows the
-  # right options to use when linking (both cc and gcc), so no need for
-  # anything special from us.
-  #
-  mips*-*-*)
-    abilist="o32"
-    gcc_cflags_optlist="abi"
-    gcc_cflags_abi="-mabi=32"
-    gcc_testlist="gcc-mips-o32"
-    path="mips32"
-    cc_cflags="-O2 -o32"   # no -g, it disables all optimizations
-    # this suits both mips32 and mips64
-    GMP_INCLUDE_MPN(mips32/mips-defs.m4)
-
-    case $host in
-      [mips64*-*-* | mips*-*-irix[6789]*])
-        abilist="n32 64 o32"
-
-        cclist_n32="gcc cc"
-        gcc_n32_cflags="-O2 -mabi=n32"
-        cc_n32_cflags="-O2 -n32"       # no -g, it disables all optimizations
-        limb_n32=longlong
-        path_n32="mips64"
-
-        cclist_64="gcc cc"
-        gcc_64_cflags="$gcc_64_cflags -mabi=64"
-        gcc_64_ldflags="-Wc,-mabi=64"
-        cc_64_cflags="-O2 -64"         # no -g, it disables all optimizations
-        cc_64_ldflags="-Wc,-64"
-        path_64="mips64"
-        ;;
-    esac
-    ;;
-
-
-  # Darwin (powerpc-apple-darwin1.3) has it's hacked gcc installed as cc.
-  # Our usual "gcc in disguise" detection means gcc_cflags etc here gets
-  # used.
-  #
-  # The darwin pre-compiling preprocessor is disabled with -no-cpp-precomp
-  # since it doesn't like "__attribute__ ((mode (SI)))" etc in gmp-impl.h,
-  # and so always ends up running the plain preprocessor anyway.  This could
-  # be done in CPPFLAGS rather than CFLAGS, but there's not many places
-  # preprocessing is done separately, and this is only a speedup, the normal
-  # preprocessor gets run if there's any problems.
-  #
-  # We used to use -Wa,-mppc with gcc, but can't remember exactly why.
-  # Presumably it was for old versions of gcc where -mpowerpc doesn't put
-  # the assembler in the right mode.  In any case -Wa,-mppc is not good, for
-  # instance -mcpu=604 makes recent gcc use -m604 to get access to the
-  # "fsel" instruction, but a -Wa,-mppc overrides that, making code that
-  # comes out with fsel fail.
-  #
-  # (Note also that the darwin assembler doesn't accept "-mppc", so any
-  # -Wa,-mppc was used only if it worked.  The right flag on darwin would be
-  # "-arch ppc" or some such, but that's already the default.)
-  #
-  [powerpc*-*-* | power[3-9]-*-*])
-    AC_DEFINE(HAVE_HOST_CPU_FAMILY_powerpc)
-    HAVE_HOST_CPU_FAMILY_powerpc=1
-    abilist="32"
-    cclist="gcc cc"
-    cc_cflags="-O2"
-    gcc_32_cflags="$gcc_cflags -mpowerpc"
-    gcc_cflags_optlist="precomp subtype asm cpu"
-    gcc_cflags_precomp="-no-cpp-precomp"
-    gcc_cflags_subtype="-force_cpusubtype_ALL" # for vmx on darwin
-    gcc_cflags_asm=""
-    gcc_cflags_cpu=""
-    vmx_path=""
-
-    # grab this object, though it's not a true cycle counter routine
-    SPEED_CYCLECOUNTER_OBJ=powerpc.lo
-    cyclecounter_size=0
-
-    case $host_cpu in
-      powerpc740 | powerpc750)
-        path="powerpc32/750 powerpc32" ;;
-      powerpc7400 | powerpc7410)
-        path="powerpc32/vmx powerpc32/750 powerpc32" ;;
-      [powerpc74[45]?])
-        path="powerpc32/vmx powerpc32" ;;
-      *)
-        path="powerpc32" ;;
-    esac
-
-    case $host_cpu in
-      powerpc401)   gcc_cflags_cpu="-mcpu=401" ;;
-      powerpc403)   gcc_cflags_cpu="-mcpu=403"
-                   xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
-      powerpc405)   gcc_cflags_cpu="-mcpu=405" ;;
-      powerpc505)   gcc_cflags_cpu="-mcpu=505" ;;
-      powerpc601)   gcc_cflags_cpu="-mcpu=601"
-                   xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
-      powerpc602)   gcc_cflags_cpu="-mcpu=602"
-                   xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
-      powerpc603)   gcc_cflags_cpu="-mcpu=603"
-                   xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
-      powerpc603e)  gcc_cflags_cpu="-mcpu=603e -mcpu=603"
-                   xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
-      powerpc604)   gcc_cflags_cpu="-mcpu=604"
-                   xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
-      powerpc604e)  gcc_cflags_cpu="-mcpu=604e -mcpu=604"
-                   xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
-      powerpc620)   gcc_cflags_cpu="-mcpu=620" ;;
-      powerpc630)   gcc_cflags_cpu="-mcpu=630"
-                   xlc_cflags_arch="-qarch=pwr3"
-                   cpu_path="p3 p3-p7" ;;
-      powerpc740)   gcc_cflags_cpu="-mcpu=740" ;;
-      powerpc7400 | powerpc7410)
-                   gcc_cflags_asm="-Wa,-maltivec"
-                   gcc_cflags_cpu="-mcpu=7400 -mcpu=750" ;;
-      [powerpc74[45]?])
-                   gcc_cflags_asm="-Wa,-maltivec"
-                   gcc_cflags_cpu="-mcpu=7450" ;;
-      powerpc750)   gcc_cflags_cpu="-mcpu=750" ;;
-      powerpc801)   gcc_cflags_cpu="-mcpu=801" ;;
-      powerpc821)   gcc_cflags_cpu="-mcpu=821" ;;
-      powerpc823)   gcc_cflags_cpu="-mcpu=823" ;;
-      powerpc860)   gcc_cflags_cpu="-mcpu=860" ;;
-      powerpc970)   gcc_cflags_cpu="-mtune=970"
-                   xlc_cflags_arch="-qarch=970 -qarch=pwr3"
-                   vmx_path="powerpc64/vmx"
-                   cpu_path="p4 p3-p7" ;;
-      power4)      gcc_cflags_cpu="-mtune=power4"
-                   xlc_cflags_arch="-qarch=pwr4"
-                   cpu_path="p4 p3-p7" ;;
-      power5)      gcc_cflags_cpu="-mtune=power5 -mtune=power4"
-                   xlc_cflags_arch="-qarch=pwr5"
-                   cpu_path="p5 p4 p3-p7" ;;
-      power6)      gcc_cflags_cpu="-mtune=power6"
-                   xlc_cflags_arch="-qarch=pwr6"
-                   cpu_path="p6 p3-p7" ;;
-      power7)      gcc_cflags_cpu="-mtune=power7 -mtune=power5"
-                   xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
-                   cpu_path="p7 p5 p4 p3-p7" ;;
-    esac
-
-    case $host in
-      *-*-aix*)
-       cclist="gcc xlc cc"
-       gcc_32_cflags_maybe="-maix32"
-       xlc_cflags="-O2 -qmaxmem=20000"
-       xlc_cflags_optlist="arch"
-       xlc_32_cflags_maybe="-q32"
-       ar_32_flags="-X32"
-       nm_32_flags="-X32"
-    esac
-
-    case $host in
-      POWERPC64_PATTERN)
-       case $host in
-         *-*-aix*)
-           # On AIX a true 64-bit ABI is available.
-           # Need -Wc to pass object type flags through to the linker.
-           abilist="aix64 $abilist"
-           cclist_aix64="gcc xlc"
-           gcc_aix64_cflags="-O2 -maix64 -mpowerpc64"
-           gcc_aix64_cflags_optlist="cpu"
-           gcc_aix64_ldflags="-Wc,-maix64"
-           xlc_aix64_cflags="-O2 -q64 -qmaxmem=20000"
-           xlc_aix64_cflags_optlist="arch"
-           xlc_aix64_ldflags="-Wc,-q64"
-           # Must indicate object type to ar and nm
-           ar_aix64_flags="-X64"
-           nm_aix64_flags="-X64"
-           path_aix64=""
-           for i in $cpu_path; do path_aix64="${path_aix64}powerpc64/mode64/$i "; done
-           path_aix64="${path_aix64}powerpc64/mode64 $vmx_path powerpc64"
-           # grab this object, though it's not a true cycle counter routine
-           SPEED_CYCLECOUNTER_OBJ_aix64=powerpc64.lo
-           cyclecounter_size_aix64=0
-           ;;
-         *-*-darwin*)
-           # On Darwin we can use 64-bit instructions with a longlong limb,
-           # but the chip still in 32-bit mode.
-           # In theory this can be used on any OS which knows how to save
-           # 64-bit registers in a context switch.
-           #
-           # Note that we must use -mpowerpc64 with gcc, since the
-           # longlong.h macros expect limb operands in a single 64-bit
-           # register, not two 32-bit registers as would be given for a
-           # long long without -mpowerpc64.  In theory we could detect and
-           # accommodate both styles, but the proper 64-bit registers will
-           # be fastest and are what we really want to use.
-           #
-           # One would think -mpowerpc64 would set the assembler in the right
-           # mode to handle 64-bit instructions.  But for that, also
-           # -force_cpusubtype_ALL is needed.
-           #
-           # Do not use -fast for Darwin, it actually adds options
-           # incompatible with a shared library.
-           #
-           abilist="mode64 mode32 $abilist"
-           gcc_32_cflags_maybe="-m32"
-           gcc_cflags_opt="-O3 -O2 -O1"        # will this become used?
-           cclist_mode32="gcc"
-           gcc_mode32_cflags_maybe="-m32"
-           gcc_mode32_cflags="-mpowerpc64"
-           gcc_mode32_cflags_optlist="subtype cpu opt"
-           gcc_mode32_cflags_subtype="-force_cpusubtype_ALL"
-           gcc_mode32_cflags_opt="-O3 -O2 -O1"
-           path_mode32="powerpc64/mode32 $vmx_path powerpc64"
-           limb_mode32=longlong
-           cclist_mode64="gcc"
-           gcc_mode64_cflags="-m64"
-           gcc_mode64_cflags_optlist="cpu opt"
-           gcc_mode64_cflags_opt="-O3 -O2 -O1"
-           path_mode64=""
-           for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
-           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
-           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
-           cyclecounter_size_mode64=0
-           any_mode64_testlist="sizeof-long-8"
-           ;;
-         *-*-linux* | *-*-*bsd*)
-           # On GNU/Linux, assume the processor is in 64-bit mode.  Some
-           # environments have a gcc that is always in 64-bit mode, while
-           # others require -m64, hence the use of cflags_maybe.  The
-           # sizeof-long-8 test checks the mode is right (for the no option
-           # case).
-           #
-           # -mpowerpc64 is not used, since it should be the default in
-           # 64-bit mode.  (We need its effect for the various longlong.h
-           # asm macros to be right of course.)
-           #
-           # gcc64 was an early port of gcc to 64-bit mode, but should be
-           # obsolete before too long.  We prefer plain gcc when it knows
-           # 64-bits.
-           #
-           abilist="mode64 mode32 $abilist"
-           gcc_32_cflags_maybe="-m32"
-           cclist_mode32="gcc"
-           gcc_mode32_cflags_maybe="-m32"
-           gcc_mode32_cflags="-mpowerpc64"
-           gcc_mode32_cflags_optlist="cpu opt"
-           gcc_mode32_cflags_opt="-O3 -O2 -O1"
-           path_mode32="powerpc64/mode32 $vmx_path powerpc64"
-           limb_mode32=longlong
-           cclist_mode64="gcc gcc64"
-           gcc_mode64_cflags_maybe="-m64"
-           gcc_mode64_cflags_optlist="cpu opt"
-           gcc_mode64_cflags_opt="-O3 -O2 -O1"
-           path_mode64=""
-           for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
-           path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
-           SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
-           cyclecounter_size_mode64=0
-           any_mode64_testlist="sizeof-long-8"
-           ;;
-       esac
-       ;;
-    esac
-    ;;
-
-
-  # POWER 32-bit
-  [power-*-* | power[12]-*-* | power2sc-*-*])
-    AC_DEFINE(HAVE_HOST_CPU_FAMILY_power)
-    HAVE_HOST_CPU_FAMILY_power=1
-    cclist="gcc"
-    extra_functions="udiv_w_sdiv"
-    path="power"
-
-    # gcc 2.7.2 knows rios1, rios2, rsc
-    #
-    # -mcpu=rios2 can tickle an AIX assembler bug (see GMP_PROG_CC_WORKS) so
-    # there needs to be a fallback to just -mpower.
-    #
-    gcc_cflags_optlist="cpu"
-    case $host in
-      power-*-*)    gcc_cflags_cpu="-mcpu=power -mpower" ;;
-      power1-*-*)   gcc_cflags_cpu="-mcpu=rios1 -mpower" ;;
-      power2-*-*)   gcc_cflags_cpu="-mcpu=rios2 -mpower" ;;
-      power2sc-*-*) gcc_cflags_cpu="-mcpu=rsc   -mpower" ;;
-    esac
-    case $host in
-    *-*-aix*)
-      cclist="gcc xlc"
-      xlc_cflags="-O2 -qarch=pwr -qmaxmem=20000"
-      ;;
-    esac
-    ;;
-
-
-  pyramid-*-*)
-    path="pyr"
-    ;;
-
-
-  # IBM System/390 and z/Architecture
-  S390_PATTERN | S390X_PATTERN)
-    abilist="32"
-    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
-    gcc_cflags_optlist="arch"
-    path="s390_32"
-    extra_functions="udiv_w_sdiv"
-    gcc_32_cflags_maybe="-m31"
-
-    case $host_cpu in
-      s390)
-       ;;
-      z900 | z900esa)
-        cpu="z900"
-        gccarch="$cpu"
-       path="s390_32/esame/$cpu s390_32/esame s390_32"
-       gcc_cflags_arch="-march=$gccarch"
-       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
-       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
-       extra_functions=""
-        ;;
-      z990 | z990esa)
-        cpu="z990"
-        gccarch="$cpu"
-       path="s390_32/esame/$cpu s390_32/esame s390_32"
-       gcc_cflags_arch="-march=$gccarch"
-       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
-       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
-       extra_functions=""
-        ;;
-      z9 | z9esa)
-        cpu="z9"
-       gccarch="z9-109"
-       path="s390_32/esame/$cpu s390_32/esame s390_32"
-       gcc_cflags_arch="-march=$gccarch"
-       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
-       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
-       extra_functions=""
-        ;;
-      z10 | z10esa)
-        cpu="z10"
-       gccarch="z10"
-       path="s390_32/esame/$cpu s390_32/esame s390_32"
-       gcc_cflags_arch="-march=$gccarch"
-       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
-       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
-       extra_functions=""
-        ;;
-      z196 | z196esa)
-        cpu="z196"
-       gccarch="z196"
-       path="s390_32/esame/$cpu s390_32/esame s390_32"
-       gcc_cflags_arch="-march=$gccarch"
-       AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
-       AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
-       extra_functions=""
-        ;;
-      esac
-
-    case $host in
-      S390X_PATTERN)
-       abilist="64 32"
-       cclist_64="gcc"
-       gcc_64_cflags_optlist="arch"
-       gcc_64_cflags="$gcc_cflags -m64"
-       path_64="s390_64/$host_cpu s390_64"
-       extra_functions=""
-       ;;
-      esac
-    ;;
-
-
-  sh-*-*)   path="sh" ;;
-  [sh[2-4]-*-*])  path="sh/sh2 sh" ;;
-
-
-  *sparc*-*-*)
-    # sizeof(long)==4 or 8 is tested, to ensure we get the right ABI.  We've
-    # had various bug reports where users have set CFLAGS for their desired
-    # mode, but not set our ABI.  For some reason it's sparc where this
-    # keeps coming up, presumably users there are accustomed to driving the
-    # compiler mode that way.  The effect of our testlist setting is to
-    # reject ABI=64 in favour of ABI=32 if the user has forced the flags to
-    # 32-bit mode.
-    #
-    abilist="32"
-    cclist="gcc acc cc"
-    any_testlist="sizeof-long-4"
-    GMP_INCLUDE_MPN(sparc32/sparc-defs.m4)
-
-    case $host_cpu in
-      sparcv8 | microsparc | turbosparc)
-        path="sparc32/v8 sparc32" ;;
-      supersparc)
-        path="sparc32/v8/supersparc sparc32/v8 sparc32" ;;
-      sparc64 | sparcv9* | ultrasparc*)
-        path="sparc32/v9 sparc32/v8 sparc32" ;;
-      *)
-        path="sparc32" ;;
-    esac
-
-    # gcc 2.7.2 doesn't know about v9 and doesn't pass -xarch=v8plus to the
-    # assembler.  Add it explicitly since the solaris assembler won't accept
-    # our sparc32/v9 asm code without it.  gas accepts -xarch=v8plus too, so
-    # it can be in the cflags unconditionally (though gas doesn't need it).
-    #
-    # gcc -m32 is needed to force 32-bit mode on a dual-ABI system, but past
-    # gcc doesn't know that flag, hence cflags_maybe.  Note that -m32 cannot
-    # be done through the optlist since the plain cflags would be run first
-    # and we don't want to require the default mode (whatever it is) works.
-    #
-    # Note it's gcc_32_cflags_maybe and not gcc_cflags_maybe because the
-    # latter would be used in the 64-bit ABI on systems like "*bsd" where
-    # abilist="64" only.
-    #
-    case $host_cpu in
-      sparc64 | sparcv9* | ultrasparc*)
-        gcc_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
-      *)
-        gcc_cflags="$gcc_cflags" ;;
-    esac
-    gcc_32_cflags_maybe="-m32"
-    gcc_cflags_optlist="cpu"
-
-    # gcc 2.7.2 knows -mcypress, -msupersparc, -mv8, -msparclite.
-    # gcc 2.95 knows -mcpu= v7, hypersparc, sparclite86x, f930, f934,
-    #   sparclet, tsc701, v9, ultrasparc.  A warning is given that the
-    #   plain -m forms will disappear.
-    # gcc 3.0 adds nothing.
-    # gcc 3.1 adds nothing.
-    # gcc 3.2 adds nothing.
-    # gcc 3.3 adds ultrasparc3.
-    #
-    case $host_cpu in
-      supersparc)           gcc_cflags_cpu="-mcpu=supersparc -msupersparc" ;;
-      sparcv8 | microsparc | turbosparc)
-                           gcc_cflags_cpu="-mcpu=v8 -mv8" ;;
-      sparc64 | sparcv9*)   gcc_cflags_cpu="-mcpu=v9 -mv8" ;;
-      ultrasparc3)          gcc_cflags_cpu="-mcpu=ultrasparc3 -mcpu=ultrasparc -mv8" ;;
-      ultrasparc*)          gcc_cflags_cpu="-mcpu=ultrasparc -mv8" ;;
-      *)                    gcc_cflags_cpu="-mcpu=v7 -mcypress" ;;
-    esac
-
-    # SunPRO cc and acc, and SunOS bundled cc
-    case $host in
-      *-*-solaris* | *-*-sunos*)
-       # Note no -g, it disables all optimizations.
-       cc_cflags=
-       cc_cflags_optlist="opt arch cpu"
-
-        # SunOS cc doesn't know -xO4, fallback to -O2.
-       cc_cflags_opt="-xO4 -O2"
-
-        # SunOS cc doesn't know -xarch, apparently always generating v7
-        # code, so make this optional
-       case $host_cpu in
-         sparcv8 | microsparc | supersparc | turbosparc)
-                                             cc_cflags_arch="-xarch=v8" ;;
-         sparc64 | sparcv9* | ultrasparc*)   cc_cflags_arch="-xarch=v8plus" ;;
-         *)                                  cc_cflags_arch="-xarch=v7" ;;
-       esac
-
-        # SunOS cc doesn't know -xchip and doesn't seem to have an equivalent.
-       # SunPRO cc 5 recognises -xchip=generic, old, super, super2, micro,
-       #   micro2, hyper, hyper2, powerup, ultra, ultra2, ultra2i.
-       # SunPRO cc 6 adds -xchip=ultra2e, ultra3cu.
-        #
-       # FIXME: Which of ultra, ultra2 or ultra2i is the best fallback for
-       # ultrasparc3?
-       #
-       case $host_cpu in
-         supersparc)   cc_cflags_cpu="-xchip=super" ;;
-         microsparc)   cc_cflags_cpu="-xchip=micro" ;;
-         turbosparc)   cc_cflags_cpu="-xchip=micro2" ;;
-         ultrasparc)   cc_cflags_cpu="-xchip=ultra" ;;
-         ultrasparc2)  cc_cflags_cpu="-xchip=ultra2" ;;
-         ultrasparc2i) cc_cflags_cpu="-xchip=ultra2i" ;;
-         ultrasparc3)  cc_cflags_cpu="-xchip=ultra3 -xchip=ultra" ;;
-         *)            cc_cflags_cpu="-xchip=generic" ;;
-       esac
-    esac
-
-    case $host_cpu in
-      sparc64 | sparcv9* | ultrasparc*)
-        case $host in
-          # Solaris 6 and earlier cannot run ABI=64 since it doesn't save
-          # registers properly, so ABI=32 is left as the only choice.
-          #
-          [*-*-solaris2.[0-6] | *-*-solaris2.[0-6].*]) ;;
-
-          # BSD sparc64 ports are 64-bit-only systems, so ABI=64 is the only
-          # choice.  In fact they need no special compiler flags, gcc -m64
-          # is the default, but it doesn't hurt to add it.  v9 CPUs always
-          # use the sparc64 port, since the plain 32-bit sparc ports don't
-          # run on a v9.
-          #
-          *-*-*bsd*) abilist="64" ;;
-
-          # For all other systems, we try both 64 and 32.
-          #
-          # GNU/Linux sparc64 has only recently gained a 64-bit user mode.
-          # In the past sparc64 meant a v9 cpu, but there were no 64-bit
-          # operations in user mode.  We assume that if "gcc -m64" works
-          # then the system is suitable.  Hopefully even if someone attempts
-          # to put a new gcc and/or glibc on an old system it won't run.
-          #
-          *) abilist="64 32" ;;
-        esac
-
-       case $host_cpu in
-         ultrasparc | ultrasparc2 | ultrasparc2i)
-           path_64="sparc64/ultrasparc12 sparc64" ;;
-         [ultrasparc[34]])
-           path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
-         [ultrasparct[1234]])
-           path_64="sparc64" ;;
-         *)
-           path_64="sparc64"
-       esac
-
-        cclist_64="gcc"
-        any_64_testlist="sizeof-long-8"
-
-        # gcc -mptr64 is probably implied by -m64, but we're not sure if
-        # this was always so.  On Solaris in the past we always used both
-        # "-m64 -mptr64".
-        #
-        # gcc -Wa,-xarch=v9 is thought to be necessary in some cases on
-        # solaris, but it would seem likely that if gcc is going to generate
-        # 64-bit code it will have to add that option itself where needed.
-        # An extra copy of this option should be harmless though, but leave
-        # it until we're sure.  (Might want -xarch=v9a or -xarch=v9b for the
-        # higher cpu types instead.)
-        #
-        gcc_64_cflags="$gcc_64_cflags -m64 -mptr64"
-        gcc_64_ldflags="-Wc,-m64"
-        gcc_64_cflags_optlist="cpu"
-
-        case $host in
-          *-*-solaris*)
-            # Sun cc.
-            #
-            # We used to have -fast and some fixup options here, but it
-            # recurrently caused problems with miscompilation.  Of course,
-            # -fast is documented as miscompiling things for the sake of speed.
-            #
-            cclist_64="$cclist_64 cc"
-            cc_64_cflags="-xO3 -xarch=v9"
-            cc_64_cflags_optlist="cpu"
-            ;;
-        esac
-
-        # using the v9 %tick register
-        SPEED_CYCLECOUNTER_OBJ_32=sparcv9.lo
-        SPEED_CYCLECOUNTER_OBJ_64=sparcv9.lo
-        cyclecounter_size_32=2
-        cyclecounter_size_64=2
-        ;;
-    esac
-    ;;
-
-
-  # VAX
-  vax*-*-*)
-    # Currently gcc (version 3.0) on vax always uses a frame pointer
-    # (config/vax/vax.h FRAME_POINTER_REQUIRED=1), so -fomit-frame-pointer
-    # will be ignored.
-    #
-    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
-    path="vax"
-    extra_functions="udiv_w_sdiv"
-    ;;
-
-
-  # AMD and Intel x86 configurations, including AMD64
-  #
-  # Rumour has it gcc -O2 used to give worse register allocation than just
-  # -O, but lets assume that's no longer true.
-  #
-  # -m32 forces 32-bit mode on a bi-arch 32/64 amd64 build of gcc.  -m64 is
-  # the default in such a build (we think), so -m32 is essential for ABI=32.
-  # This is, of course, done for any $host_cpu, not just x86_64, so we can
-  # get such a gcc into the right mode to cross-compile to say i486-*-*.
-  #
-  # -m32 is not available in gcc 2.95 and earlier, hence cflags_maybe to use
-  # it when it works.  We check sizeof(long)==4 to ensure we get the right
-  # mode, in case -m32 has failed not because it's an old gcc, but because
-  # it's a dual 32/64-bit gcc without a 32-bit libc, or whatever.
-  #
-  X86_PATTERN | X86_64_PATTERN)
-    abilist="32"
-    cclist="gcc icc cc"
-    gcc_cflags="$gcc_cflags $fomit_frame_pointer"
-    gcc_32_cflags_maybe="-m32"
-    icc_cflags="-no-gcc"
-    icc_cflags_optlist="opt"
-    icc_cflags_opt="-O3 -O2 -O1"
-    any_32_testlist="sizeof-long-4"
-    CALLING_CONVENTIONS_OBJS='x86call.lo x86check$U.lo'
-
-    # Availability of rdtsc is checked at run-time.
-    SPEED_CYCLECOUNTER_OBJ=pentium.lo
-
-    # gcc 2.7.2 only knows i386 and i486, using -m386 or -m486.  These
-    #     represent -mcpu= since -m486 doesn't generate 486 specific insns.
-    # gcc 2.95 adds k6, pentium and pentiumpro, and takes -march= and -mcpu=.
-    # gcc 3.0 adds athlon.
-    # gcc 3.1 adds k6-2, k6-3, pentium-mmx, pentium2, pentium3, pentium4,
-    #     athlon-tbird, athlon-4, athlon-xp, athlon-mp.
-    # gcc 3.2 adds winchip2.
-    # gcc 3.3 adds winchip-c6.
-    # gcc 3.3.1 from mandrake adds k8 and knows -mtune.
-    # gcc 3.4 adds c3, c3-2, k8, and deprecates -mcpu in favour of -mtune.
-    #
-    # In gcc 2.95.[0123], -march=pentiumpro provoked a stack slot bug in an
-    # old version of mpz/powm.c.  Seems to be fine with the current code, so
-    # no need for any restrictions on that option.
-    #
-    # -march=pentiumpro can fail if the assembler doesn't know "cmov"
-    # (eg. solaris 2.8 native "as"), so always have -march=pentium after
-    # that as a fallback.
-    #
-    # -march=pentium4 and -march=k8 enable SSE2 instructions, which may or
-    # may not be supported by the assembler and/or the OS, and is bad in gcc
-    # prior to 3.3.  The tests will reject these if no good, so fallbacks
-    # like "-march=pentium4 -mno-sse2" are given to try also without SSE2.
-    # Note the relevant -march types are listed in the optflags handling
-    # below, be sure to update there if adding new types emitting SSE2.
-    #
-    # -mtune is used at the start of each cpu option list to give something
-    # gcc 3.4 will use, thereby avoiding warnings from -mcpu.  -mcpu forms
-    # are retained for use by prior gcc.  For example pentium has
-    # "-mtune=pentium -mcpu=pentium ...", the -mtune is for 3.4 and the
-    # -mcpu for prior.  If there's a brand new choice in 3.4 for a chip,
-    # like k8 for x86_64, then it can be the -mtune at the start, no need to
-    # duplicate anything.
-    #
-    gcc_cflags_optlist="cpu arch"
-    case $host_cpu in
-      i386*)
-       gcc_cflags_cpu="-mtune=i386 -mcpu=i386 -m386"
-       gcc_cflags_arch="-march=i386"
-       path="x86"
-       ;;
-      i486*)
-       gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=i486"
-       path="x86/i486 x86"
-       ;;
-      i586 | pentium)
-       gcc_cflags_cpu="-mtune=pentium -mcpu=pentium -m486"
-       gcc_cflags_arch="-march=pentium"
-       path="x86/pentium x86"
-       ;;
-      pentiummmx)
-       gcc_cflags_cpu="-mtune=pentium-mmx -mcpu=pentium-mmx -mcpu=pentium -m486"
-       gcc_cflags_arch="-march=pentium-mmx -march=pentium"
-       path="x86/pentium/mmx x86/pentium x86"
-       ;;
-      i686 | pentiumpro)
-       gcc_cflags_cpu="-mtune=pentiumpro -mcpu=pentiumpro -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=pentiumpro -march=pentium"
-       path="x86/p6 x86"
-       ;;
-      pentium2)
-       gcc_cflags_cpu="-mtune=pentium2 -mcpu=pentium2 -mcpu=pentiumpro -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=pentium2 -march=pentiumpro -march=pentium"
-       path="x86/p6/mmx x86/p6 x86"
-       ;;
-      pentium3)
-       gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
-       path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
-       ;;
-      pentiumm)
-       gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
-       path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
-       ;;
-      k6)
-       gcc_cflags_cpu="-mtune=k6 -mcpu=k6 -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=k6"
-       path="x86/k6/mmx x86/k6 x86"
-       ;;
-      k62)
-       gcc_cflags_cpu="-mtune=k6-2 -mcpu=k6-2 -mcpu=k6 -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=k6-2 -march=k6"
-       path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
-       ;;
-      k63)
-       gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=k6-3 -march=k6"
-       path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
-       ;;
-      geode)
-       gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=k6-3 -march=k6"
-       path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
-       ;;
-      athlon)
-       # Athlon instruction costs are close to P6 (3 cycle load latency,
-       # 4-6 cycle mul, 40 cycle div, pairable adc, etc) so if gcc doesn't
-       # know athlon (eg. 2.95.2 doesn't) then fall back on pentiumpro.
-       gcc_cflags_cpu="-mtune=athlon -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=athlon -march=pentiumpro -march=pentium"
-       path="x86/k7/mmx x86/k7 x86"
-       ;;
-      i786 | pentium4)
-       # pentiumpro is the primary fallback when gcc doesn't know pentium4.
-       # This gets us cmov to eliminate branches.  Maybe "athlon" would be
-       # a possibility on gcc 3.0.
-       #
-       gcc_cflags_cpu="-mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=pentium4 -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium"
-       gcc_64_cflags_cpu="-mtune=nocona"
-       path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86"
-       path_64="x86_64/pentium4 x86_64"
-       ;;
-      viac32)
-       # Not sure of the best fallbacks here for -mcpu.
-       # c3-2 has sse and mmx, so pentium3 is good for -march.
-       gcc_cflags_cpu="-mtune=c3-2 -mcpu=c3-2 -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=c3-2 -march=pentium3 -march=pentiumpro -march=pentium"
-       path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
-       ;;
-      viac3*)
-       # Not sure of the best fallbacks here.
-       gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
-       path="x86/pentium/mmx x86/pentium x86"
-       ;;
-      athlon64 | k8 | x86_64)
-       gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
-       path="x86/k8 x86/k7/mmx x86/k7 x86"
-       path_64="x86_64/k8 x86_64"
-       ;;
-      k10)
-       gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
-       gcc_cflags_arch="-march=amdfam10 -march=k8 -march=k8~-mno-sse2"
-       path="x86/k10 x86/k8 x86/k7/mmx x86/k7 x86"
-       path_64="x86_64/k10 x86_64/k8 x86_64"
-       ;;
-      bobcat)
-       gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
-       gcc_cflags_arch="-march=btver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
-       path="x86/bobcat x86/k7/mmx x86/k7 x86"
-       path_64="x86_64/bobcat x86_64/k10 x86_64/k8 x86_64"
-       ;;
-      bulldozer | bd1)
-       gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
-       gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
-       path="x86/bd1 x86/k7/mmx x86/k7 x86"
-       path_64="x86_64/bd1 x86_64"
-       ;;
-      core2)
-       gcc_cflags_cpu="-mtune=core2 -mtune=k8"
-       gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-       path="x86/core2 x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
-       path_64="x86_64/core2 x86_64"
-       ;;
-      corei | coreinhm | coreiwsm)
-       gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
-       gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-       path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
-       path_64="x86_64/coreinhm x86_64/core2 x86_64"
-       ;;
-      coreisbr)
-       gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
-       gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
-       path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
-       path_64="x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
-       ;;
-      atom)
-       gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
-       gcc_cflags_arch="-march=atom -march=pentium3"
-       path="x86/atom/sse2 x86/atom/mmx x86/atom x86"
-       path_64="x86_64/atom x86_64"
-       ;;
-      nano)
-       gcc_cflags_cpu="-mtune=nano"
-       gcc_cflags_arch="-march=nano"
-       path="x86/nano x86"
-       path_64="x86_64/nano x86_64"
-       ;;
-      *)
-       gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
-       gcc_cflags_arch="-march=i486"
-       path="x86"
-       path_64="x86_64"
-       ;;
-    esac
-
-    case $host in
-      X86_64_PATTERN)
-       cclist_64="gcc"
-       gcc_64_cflags="$gcc_64_cflags -m64"
-       gcc_64_cflags_optlist="cpu arch"
-       CALLING_CONVENTIONS_OBJS_64='amd64call.lo amd64check$U.lo'
-       SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo
-       cyclecounter_size_64=2
-       abilist="64 32"
-
-       case $host in
-         *-*-solaris*)
-           # Sun cc.
-           cclist_64="$cclist_64 cc"
-           cc_64_cflags="-xO3 -m64"
-           ;;
-         *-*-mingw* | *-*-cygwin)
-           limb_64=longlong
-           path_64=""  # Windows amd64 calling conventions are *different*
-           # Silence many pedantic warnings for w64.  FIXME.
-           gcc_64_cflags="$gcc_64_cflags -std=gnu99"
-           ;;
-       esac
-       ;;
-    esac
-    ;;
-
-
-  # FIXME: z8kx won't get through config.sub.  Could make 16 versus 32 bit
-  # limb an ABI option perhaps.
-  z8kx*-*-*)
-    path="z8000x"
-    extra_functions="udiv_w_sdiv"
-    ;;
-  z8k*-*-*)
-    path="z8000"
-    extra_functions="udiv_w_sdiv"
-    ;;
-
-
-  # Special CPU "none" selects generic C.  -DNO_ASM is used to disable gcc
-  # asm blocks in longlong.h (since they're driven by cpp pre-defined
-  # symbols like __alpha rather than the configured $host_cpu).
-  #
-  none-*-*)
-    abilist="long longlong"
-    cclist_long=$cclist
-    gcc_long_cflags=$gcc_cflags
-    gcc_long_cppflags="-DNO_ASM"
-    cc_long_cflags=$cc_cflags
-    cclist_longlong=$cclist
-    gcc_longlong_cflags=$gcc_cflags
-    gcc_longlong_cppflags="-DNO_ASM"
-    cc_longlong_cflags=$cc_cflags
-    limb_longlong=longlong
-    ;;
-
-esac
-
-# mingw can be built by the cygwin gcc if -mno-cygwin is added.  For
-# convenience add this automatically if it works.  Actual mingw gcc accepts
-# -mno-cygwin too, but of course is the default.  mingw only runs on the
-# x86s, but allow any CPU here so as to catch "none" too.
-#
-case $host in
-  *-*-mingw*)
-    gcc_cflags_optlist="$gcc_cflags_optlist nocygwin"
-    gcc_cflags_nocygwin="-mno-cygwin"
-    ;;
-esac
-
-
-CFLAGS_or_unset=${CFLAGS-'(unset)'}
-CPPFLAGS_or_unset=${CPPFLAGS-'(unset)'}
-
-cat >&AC_FD_CC <<EOF
-User:
-ABI=$ABI
-CC=$CC
-CFLAGS=$CFLAGS_or_unset
-CPPFLAGS=$CPPFLAGS_or_unset
-MPN_PATH=$MPN_PATH
-GMP:
-abilist=$abilist
-cclist=$cclist
-EOF
-
-
-test_CFLAGS=${CFLAGS+set}
-test_CPPFLAGS=${CPPFLAGS+set}
-
-for abi in $abilist; do
-  abi_last="$abi"
-done
-
-# If the user specifies an ABI then it must be in $abilist, after that
-# $abilist is restricted to just that choice.
-#
-if test -n "$ABI"; then
-  found=no
-  for abi in $abilist; do
-    if test $abi = "$ABI"; then found=yes; break; fi
-  done
-  if test $found = no; then
-    AC_MSG_ERROR([ABI=$ABI is not among the following valid choices: $abilist])
-  fi
-  abilist="$ABI"
-fi
-
-found_compiler=no
-
-for abi in $abilist; do
-
-  echo "checking ABI=$abi"
-
-  # Suppose abilist="64 32", then for abi=64, will have abi1="_64" and
-  # abi2="_64".  For abi=32, will have abi1="_32" and abi2="".  This is how
-  # $gcc_cflags becomes a fallback for $gcc_32_cflags (the last in the
-  # abilist), but there's no fallback for $gcc_64_cflags.
-  #
-  abi1=[`echo _$abi | sed 's/[.]//g'`]
-  if test $abi = $abi_last; then abi2=; else abi2="$abi1"; fi
-
-  # Compiler choices under this ABI
-                              eval cclist_chosen=\"\$cclist$abi1\"
-  test -n "$cclist_chosen" || eval cclist_chosen=\"\$cclist$abi2\"
-
-  # If there's a user specified $CC then don't use a list for
-  # $cclist_chosen, just a single value for $ccbase.
-  #
-  if test -n "$CC"; then
-
-    # The first word of $CC, stripped of any directory.  For instance
-    # CC="/usr/local/bin/gcc -pipe" will give "gcc".
-    #
-    for ccbase in $CC; do break; done
-    ccbase=`echo $ccbase | sed 's:.*/::'`
-
-    # If this $ccbase is in $cclist_chosen then it's a compiler we know and
-    # we can do flags defaulting with it.  If not, then $cclist_chosen is
-    # set to "unrecognised" so no default flags are used.
-    #
-    # "unrecognised" is used to avoid bad effects with eval if $ccbase has
-    # non-symbol characters.  For instance ccbase=my+cc would end up with
-    # something like cflags="$my+cc_cflags" which would give
-    # cflags="+cc_cflags" rather than the intended empty string for an
-    # unknown compiler.
-    #
-    found=unrecognised
-    for i in $cclist_chosen; do
-      if test "$ccbase" = $i; then
-        found=$ccbase
-        break
-      fi
-    done
-    cclist_chosen=$found
-  fi
-
-  for ccbase in $cclist_chosen; do
-
-    # When cross compiling, look for a compiler with the $host_alias as a
-    # prefix, the same way that AC_CHECK_TOOL does.  But don't do this to a
-    # user-selected $CC.
-    #
-    # $cross_compiling will be yes/no/maybe at this point.  Do the host
-    # prefixing for "maybe" as well as "yes".
-    #
-    if test "$cross_compiling" != no && test -z "$CC"; then
-      cross_compiling_prefix="${host_alias}-"
-    fi
-
-    for ccprefix in $cross_compiling_prefix ""; do
-
-      cc="$CC"
-      test -n "$cc" || cc="$ccprefix$ccbase"
-
-      # If the compiler is gcc but installed under another name, then change
-      # $ccbase so as to use the flags we know for gcc.  This helps for
-      # instance when specifying CC=gcc272 on Debian GNU/Linux, or the
-      # native cc which is really gcc on NeXT or MacOS-X.
-      #
-      # FIXME: There's a slight misfeature here.  If cc is actually gcc but
-      # gcc is not a known compiler under this $abi then we'll end up
-      # testing it with no flags and it'll work, but chances are it won't be
-      # in the right mode for the ABI we desire.  Let's quietly hope this
-      # doesn't happen.
-      #
-      if test $ccbase != gcc; then
-        GMP_PROG_CC_IS_GNU($cc,ccbase=gcc)
-      fi
-
-      # Similarly if the compiler is IBM xlc but invoked as cc or whatever
-      # then change $ccbase and make the default xlc flags available.
-      if test $ccbase != xlc; then
-        GMP_PROG_CC_IS_XLC($cc,ccbase=xlc)
-      fi
-
-      # acc was Sun's first unbundled compiler back in the SunOS days, or
-      # something like that, but today its man page says it's not meant to
-      # be used directly (instead via /usr/ucb/cc).  The options are pretty
-      # much the same as the main SunPRO cc, so share those configs.
-      #
-      case $host in
-        *sparc*-*-solaris* | *sparc*-*-sunos*)
-          if test "$ccbase" = acc; then ccbase=cc; fi ;;
-      esac
-
-      for tmp_cflags_maybe in yes no; do
-                             eval cflags=\"\$${ccbase}${abi1}_cflags\"
-        test -n "$cflags" || eval cflags=\"\$${ccbase}${abi2}_cflags\"
-
-       if test "$tmp_cflags_maybe" = yes; then
-          # don't try cflags_maybe when the user set CFLAGS
-          if test "$test_CFLAGS" = set; then continue; fi
-                                     eval cflags_maybe=\"\$${ccbase}${abi1}_cflags_maybe\"
-          test -n "$cflags_maybe" || eval cflags_maybe=\"\$${ccbase}${abi2}_cflags_maybe\"
-          # don't try cflags_maybe if there's nothing set
-          if test -z "$cflags_maybe"; then continue; fi
-          cflags="$cflags_maybe $cflags"
-        fi
-
-        # Any user CFLAGS, even an empty string, takes precedence
-        if test "$test_CFLAGS" = set; then cflags=$CFLAGS; fi
-
-        # Any user CPPFLAGS, even an empty string, takes precedence
-                               eval cppflags=\"\$${ccbase}${abi1}_cppflags\"
-        test -n "$cppflags" || eval cppflags=\"\$${ccbase}${abi2}_cppflags\"
-        if test "$test_CPPFLAGS" = set; then cppflags=$CPPFLAGS; fi
-
-        # --enable-profiling adds -p/-pg even to user-specified CFLAGS.
-        # This is convenient, but it's perhaps a bit naughty to modify user
-        # CFLAGS.
-        case "$enable_profiling" in
-          prof)       cflags="$cflags -p" ;;
-          gprof)      cflags="$cflags -pg" ;;
-          instrument) cflags="$cflags -finstrument-functions" ;;
-        esac
-
-        GMP_PROG_CC_WORKS($cc $cflags $cppflags,,continue)
-
-        # If we're supposed to be using a "long long" for a limb, check that
-        # it works.
-                                  eval limb_chosen=\"\$limb$abi1\"
-        test -n "$limb_chosen" || eval limb_chosen=\"\$limb$abi2\"
-        if test "$limb_chosen" = longlong; then
-          GMP_PROG_CC_WORKS_LONGLONG($cc $cflags $cppflags,,continue)
-        fi
-
-        # The tests to perform on this $cc, if any
-                               eval testlist=\"\$${ccbase}${abi1}_testlist\"
-        test -n "$testlist" || eval testlist=\"\$${ccbase}${abi2}_testlist\"
-        test -n "$testlist" || eval testlist=\"\$any${abi1}_testlist\"
-        test -n "$testlist" || eval testlist=\"\$any${abi2}_testlist\"
-
-        testlist_pass=yes
-        for tst in $testlist; do
-          case $tst in
-          hpc-hppa-2-0)   GMP_HPC_HPPA_2_0($cc,,testlist_pass=no) ;;
-          gcc-arm-umodsi) GMP_GCC_ARM_UMODSI($cc,,testlist_pass=no) ;;
-          gcc-mips-o32)   GMP_GCC_MIPS_O32($cc,,testlist_pass=no) ;;
-          hppa-level-2.0) GMP_HPPA_LEVEL_20($cc $cflags,,testlist_pass=no) ;;
-          sizeof*)       GMP_C_TEST_SIZEOF($cc $cflags,$tst,,testlist_pass=no) ;;
-          esac
-          if test $testlist_pass = no; then break; fi
-        done
-
-        if test $testlist_pass = yes; then
-          found_compiler=yes
-          break
-        fi
-      done
-
-      if test $found_compiler = yes; then break; fi
-    done
-
-    if test $found_compiler = yes; then break; fi
-  done
-
-  if test $found_compiler = yes; then break; fi
-done
-
-
-# If we recognised the CPU, as indicated by $path being set, then insist
-# that we have a working compiler, either from our $cclist choices or from
-# $CC.  We can't let AC_PROG_CC look around for a compiler because it might
-# find one that we've rejected (for not supporting the modes our asm code
-# demands, etc).
-#
-# If we didn't recognise the CPU (and this includes host_cpu=none), then
-# fall through and let AC_PROG_CC look around for a compiler too.  This is
-# mostly in the interests of following a standard autoconf setup, after all
-# we've already tested cc and gcc adequately (hopefully).  As of autoconf
-# 2.50 the only thing AC_PROG_CC really adds is a check for "cl" (Microsoft
-# C on MS-DOS systems).
-#
-if test $found_compiler = no && test -n "$path"; then
-  AC_MSG_ERROR([could not find a working compiler, see config.log for details])
-fi
-
-case $host in
-  X86_PATTERN | X86_64_PATTERN)
-    # If the user asked for a fat build, override the path and flags set above
-    if test $enable_fat = yes; then
-      gcc_cflags_cpu=""
-      gcc_cflags_arch=""
-
-      if test "$abi" = 32; then
-       extra_functions="$extra_functions fat fat_entry"
-       path="x86/fat x86"
-       fat_path="x86 x86/fat x86/i486
-                 x86/k6 x86/k6/mmx x86/k6/k62mmx
-                 x86/k7 x86/k7/mmx
-                 x86/pentium x86/pentium/mmx
-                 x86/p6 x86/p6/mmx x86/p6/p3mmx x86/p6/sse2
-                 x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2"
-      fi
-
-      if test "$abi" = 64; then
-       gcc_64_cflags=""
-       extra_functions_64="$extra_functions_64 fat fat_entry"
-       path_64="x86_64/fat x86_64"
-       fat_path="x86_64 x86_64/fat x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr x86_64/atom x86_64/nano"
-      fi
-
-      fat_functions="add_n addmul_1 copyd copyi
-                    dive_1 diveby3 divrem_1 gcd_1 lshift
-                    mod_1 mod_34lsub1 mode1o mul_1 mul_basecase
-                    pre_divrem_1 pre_mod_1 rshift
-                    sqr_basecase sub_n submul_1"
-      fat_thresholds="MUL_TOOM22_THRESHOLD MUL_TOOM33_THRESHOLD
-                     SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD"
-    fi
-    ;;
-esac
-
-
-if test $found_compiler = yes; then
-
-  # If we're creating CFLAGS, then look for optional additions.  If the user
-  # set CFLAGS then leave it alone.
-  #
-  if test "$test_CFLAGS" != set; then
-                          eval optlist=\"\$${ccbase}${abi1}_cflags_optlist\"
-    test -n "$optlist" || eval optlist=\"\$${ccbase}${abi2}_cflags_optlist\"
-
-    for opt in $optlist; do
-                             eval optflags=\"\$${ccbase}${abi1}_cflags_${opt}\"
-      test -n "$optflags" || eval optflags=\"\$${ccbase}${abi2}_cflags_${opt}\"
-      test -n "$optflags" || eval optflags=\"\$${ccbase}_cflags_${opt}\"
-
-      for flag in $optflags; do
-
-       # ~ represents a space in an option spec
-        flag=`echo "$flag" | tr '~' ' '`
-
-        case $flag in
-          -march=pentium4 | -march=k8)
-            # For -march settings which enable SSE2 we exclude certain bad
-            # gcc versions and we need an OS knowing how to save xmm regs.
-            #
-            # This is only for ABI=32, any 64-bit gcc is good and any OS
-            # knowing x86_64 will know xmm.
-            #
-            # -march=k8 was only introduced in gcc 3.3, so we shouldn't need
-            # the GMP_GCC_PENTIUM4_SSE2 check (for gcc 3.2 and prior).  But
-            # it doesn't hurt to run it anyway, sharing code with the
-            # pentium4 case.
-            #
-            if test "$abi" = 32; then
-              GMP_GCC_PENTIUM4_SSE2($cc $cflags $cppflags,, continue)
-              GMP_OS_X86_XMM($cc $cflags $cppflags,, continue)
-            fi
-            ;;
-          -no-cpp-precomp)
-            # special check, avoiding a warning
-            GMP_GCC_NO_CPP_PRECOMP($ccbase,$cc,$cflags,
-                                   [cflags="$cflags $flag"
-                                   break],
-                                   [continue])
-            ;;
-          -Wa,-m*)
-            case $host in
-              alpha*-*-*)
-                GMP_GCC_WA_MCPU($cc $cflags, $flag, , [continue])
-              ;;
-            esac
-            ;;
-          -Wa,-oldas)
-            GMP_GCC_WA_OLDAS($cc $cflags $cppflags,
-                             [cflags="$cflags $flag"
-                             break],
-                             [continue])
-            ;;
-        esac
-
-        GMP_PROG_CC_WORKS($cc $cflags $cppflags $flag,
-          [cflags="$cflags $flag"
-          break])
-      done
-    done
-  fi
-
-  ABI="$abi"
-  CC="$cc"
-  CFLAGS="$cflags"
-  CPPFLAGS="$cppflags"
-
-
-  # Could easily have this in config.h too, if desired.
-  ABI_nodots=`echo $ABI | sed 's/\./_/'`
-  GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_ABI_$ABI_nodots')", POST)
-
-
-  # GMP_LDFLAGS substitution, selected according to ABI.
-  # These are needed on libgmp.la and libmp.la, but currently not on
-  # convenience libraries like tune/libspeed.la or mpz/libmpz.la.
-  #
-                            eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
-  test -n "$GMP_LDFLAGS" || eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
-  AC_SUBST(GMP_LDFLAGS)
-  AC_SUBST(LIBGMP_LDFLAGS)
-  AC_SUBST(LIBGMPXX_LDFLAGS)
-
-  # extra_functions, selected according to ABI
-                    eval tmp=\"\$extra_functions$abi1\"
-  test -n "$tmp" || eval tmp=\"\$extra_functions$abi2\"
-  extra_functions="$tmp"
-
-
-  # Cycle counter, selected according to ABI.
-  #
-                    eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi1\"
-  test -n "$tmp" || eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi2\"
-  SPEED_CYCLECOUNTER_OBJ="$tmp"
-                    eval tmp=\"\$cyclecounter_size$abi1\"
-  test -n "$tmp" || eval tmp=\"\$cyclecounter_size$abi2\"
-  cyclecounter_size="$tmp"
-
-  if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
-    AC_DEFINE_UNQUOTED(HAVE_SPEED_CYCLECOUNTER, $cyclecounter_size,
-    [Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits)])
-  fi
-  AC_SUBST(SPEED_CYCLECOUNTER_OBJ)
-
-
-  # Calling conventions checking, selected according to ABI.
-  #
-                    eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi1\"
-  test -n "$tmp" || eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi2\"
-  CALLING_CONVENTIONS_OBJS="$tmp"
-
-  if test -n "$CALLING_CONVENTIONS_OBJS"; then
-    AC_DEFINE(HAVE_CALLING_CONVENTIONS,1,
-    [Define to 1 if tests/libtests has calling conventions checking for the CPU])
-  fi
-  AC_SUBST(CALLING_CONVENTIONS_OBJS)
-
-fi
-
-
-# If the user gave an MPN_PATH, use that verbatim, otherwise choose
-# according to the ABI and add "generic".
-#
-if test -n "$MPN_PATH"; then
-  path="$MPN_PATH"
-else
-                    eval tmp=\"\$path$abi1\"
-  test -n "$tmp" || eval tmp=\"\$path$abi2\"
-  path="$tmp generic"
-fi
-
-
-# Long long limb setup for gmp.h.
-case $limb_chosen in
-longlong) DEFN_LONG_LONG_LIMB="#define _LONG_LONG_LIMB 1"    ;;
-*)        DEFN_LONG_LONG_LIMB="/* #undef _LONG_LONG_LIMB */" ;;
-esac
-AC_SUBST(DEFN_LONG_LONG_LIMB)
-
-
-# The C compiler and preprocessor, put into ANSI mode if possible.
-AC_PROG_CC
-AC_PROG_CC_STDC
-AC_PROG_CPP
-GMP_H_ANSI
-
-
-# The C compiler on the build system, and associated tests.
-GMP_PROG_CC_FOR_BUILD
-GMP_PROG_CPP_FOR_BUILD
-GMP_PROG_EXEEXT_FOR_BUILD
-GMP_C_FOR_BUILD_ANSI
-GMP_CHECK_LIBM_FOR_BUILD
-
-
-# How to assemble, used with CFLAGS etc, see mpn/Makeasm.am.
-# Using the compiler is a lot easier than figuring out how to invoke the
-# assembler directly.
-#
-test -n "$CCAS" || CCAS="$CC -c"
-AC_SUBST(CCAS)
-
-
-# The C++ compiler, if desired.
-want_cxx=no
-if test $enable_cxx != no; then
-  test_CXXFLAGS=${CXXFLAGS+set}
-  AC_PROG_CXX
-
-  echo "CXXFLAGS chosen by autoconf: $CXXFLAGS" >&AC_FD_CC
-  cxxflags_ac_prog_cxx=$CXXFLAGS
-  cxxflags_list=ac_prog_cxx
-
-  # If the user didn't specify $CXXFLAGS, then try $CFLAGS, with -g removed
-  # if AC_PROG_CXX thinks that doesn't work.  $CFLAGS stands a good chance
-  # of working, eg. on a GNU system where CC=gcc and CXX=g++.
-  #
-  if test "$test_CXXFLAGS" != set; then
-    cxxflags_cflags=$CFLAGS
-    cxxflags_list="cflags $cxxflags_list"
-    if test "$ac_prog_cxx_g" = no; then
-      cxxflags_cflags=`echo "$cxxflags_cflags" | sed -e 's/ -g //' -e 's/^-g //' -e 's/ -g$//'`
-    fi
-  fi
-
-  # See if the C++ compiler works.  If the user specified CXXFLAGS then all
-  # we're doing is checking whether AC_PROG_CXX succeeded, since it doesn't
-  # give a fatal error, just leaves CXX set to a default g++.  If on the
-  # other hand the user didn't specify CXXFLAGS then we get to try here our
-  # $cxxflags_list alternatives.
-  #
-  # Automake includes $CPPFLAGS in a C++ compile, so we do the same here.
-  #
-  for cxxflags_choice in $cxxflags_list; do
-    eval CXXFLAGS=\"\$cxxflags_$cxxflags_choice\"
-    GMP_PROG_CXX_WORKS($CXX $CPPFLAGS $CXXFLAGS,
-      [want_cxx=yes
-      break])
-  done
-
-  # If --enable-cxx=yes but a C++ compiler can't be found, then abort.
-  if test $want_cxx = no && test $enable_cxx = yes; then
-    AC_MSG_ERROR([C++ compiler not available, see config.log for details])
-  fi
-fi
-
-AM_CONDITIONAL(WANT_CXX, test $want_cxx = yes)
-
-# FIXME: We're not interested in CXXCPP for ourselves, but if we don't do it
-# here then AC_PROG_LIBTOOL will AC_REQUIRE it (via _LT_AC_TAGCONFIG) and
-# hence execute it unconditionally, and that will fail if there's no C++
-# compiler (and no generic /lib/cpp).
-#
-if test $want_cxx = yes; then
-  AC_PROG_CXXCPP
-fi
-
-
-# Path setups for Cray, according to IEEE or CFP.  These must come after
-# deciding the compiler.
-#
-GMP_CRAY_OPTIONS(
-  [add_path="cray/ieee"],
-  [add_path="cray/cfp"; extra_functions="mulwwc90"],
-  [add_path="cray/cfp"; extra_functions="mulwwj90"])
-
-
-if test -z "$MPN_PATH"; then
-  path="$add_path $path"
-fi
-
-# For a nail build, also look in "nails" subdirectories.
-#
-if test $GMP_NAIL_BITS != 0 && test -z "$MPN_PATH"; then
-  new_path=
-  for i in $path; do
-    case $i in
-    generic) new_path="$new_path $i" ;;
-    *)       new_path="$new_path $i/nails $i" ;;
-    esac
-  done
-  path=$new_path
-fi
-
-
-# Put all directories into CPUVEC_list so as to get a full set of
-# CPUVEC_SETUP_$tmp_suffix defines into config.h, even if some of them are
-# empty because mmx and/or sse2 had to be dropped.
-#
-for i in $fat_path; do
-  GMP_FAT_SUFFIX(tmp_suffix, $i)
-  CPUVEC_list="$CPUVEC_list CPUVEC_SETUP_$tmp_suffix"
-done
-
-
-# If there's any sse2 or mmx in the path, check whether the assembler
-# supports it, and remove if not.
-#
-# We only need this in ABI=32, for ABI=64 on x86_64 we can assume a new
-# enough assembler.
-#
-case $host in
-  X86_PATTERN | X86_64_PATTERN)
-    if test "$ABI" = 32; then
-      case "$path $fat_path" in
-        *mmx*)   GMP_ASM_X86_MMX( , [GMP_STRIP_PATH(*mmx*)]) ;;
-      esac
-      case "$path $fat_path" in
-        *sse2*)  GMP_ASM_X86_SSE2( , [GMP_STRIP_PATH(sse2)]) ;;
-      esac
-    fi
-    ;;
-esac
-
-
-cat >&AC_FD_CC <<EOF
-Decided:
-ABI=$ABI
-CC=$CC
-CFLAGS=$CFLAGS
-CPPFLAGS=$CPPFLAGS
-GMP_LDFLAGS=$GMP_LDFLAGS
-CXX=$CXX
-CXXFLAGS=$CXXFLAGS
-path=$path
-EOF
-echo "using ABI=\"$ABI\""
-echo "      CC=\"$CC\""
-echo "      CFLAGS=\"$CFLAGS\""
-echo "      CPPFLAGS=\"$CPPFLAGS\""
-if test $want_cxx = yes; then
-  echo "      CXX=\"$CXX\""
-  echo "      CXXFLAGS=\"$CXXFLAGS\""
-fi
-echo "      MPN_PATH=\"$path\""
-
-
-# Automake ansi2knr support.
-AM_C_PROTOTYPES
-
-CL_AS_NOEXECSTACK
-
-GMP_PROG_AR
-GMP_PROG_NM
-
-case $host in
-  # FIXME: On AIX 3 and 4, $libname.a is included in libtool
-  # $library_names_spec, so libgmp.a becomes a symlink to libgmp.so, making
-  # it impossible to build shared and static libraries simultaneously.
-  # Disable shared libraries by default, but let the user override with
-  # --enable-shared --disable-static.
-  #
-  # FIXME: This $libname.a problem looks like it might apply to *-*-amigaos*
-  # and *-*-os2* too, but wait for someone to test this before worrying
-  # about it.  If there is a problem then of course libtool is the right
-  # place to fix it.
-  #
-  [*-*-aix[34]*])
-    if test -z "$enable_shared"; then enable_shared=no; fi ;;
-esac
-
-
-# Configs for Windows DLLs.
-
-AC_LIBTOOL_WIN32_DLL
-
-AC_SUBST(LIBGMP_DLL,0)
-case $host in
-  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
-    # By default, build only static.
-    if test -z "$enable_shared"; then
-      enable_shared=no
-    fi
-    # Don't allow both static and DLL.
-    if test "$enable_shared" != no && test "$enable_static" != no; then
-      AC_MSG_ERROR([cannot build both static and DLL, since gmp.h is different for each.
-Use "--disable-static --enable-shared" to build just a DLL.])
-    fi
-
-    # "-no-undefined" is required when building a DLL, see documentation on
-    # AC_LIBTOOL_WIN32_DLL.
-    #
-    # "-Wl,--export-all-symbols" is a bit of a hack, it gets all libgmp and
-    # libgmpxx functions and variables exported.  This is what libtool did
-    # in the past, and it's convenient for us in the test programs.
-    #
-    # Maybe it'd be prudent to check for --export-all-symbols before using
-    # it, but it seems to have been in ld since at least 2000, and there's
-    # not really any alternative we want to take up at the moment.
-    #
-    # "-Wl,output-def" is used to get a .def file for use by MS lib to make
-    # a .lib import library, described in the manual.  libgmp-3.dll.def
-    # corresponds to the libmp-3.dll.def generated by libtool (as a result
-    # of -export-symbols on that library).
-    #
-    # Incidentally, libtool does generate an import library libgmp.dll.a,
-    # but it's "ar" format and cannot be used by the MS linker.  There
-    # doesn't seem to be any GNU tool for generating or converting to .lib.
-    #
-    # FIXME: The .def files produced by -Wl,output-def include isascii,
-    # iscsym, iscsymf and toascii, apparently because mingw ctype.h doesn't
-    # inline isascii (used in gmp).  It gives an extern inline for
-    # __isascii, but for some reason not the plain isascii.
-    #
-    if test "$enable_shared" = yes; then
-      GMP_LDFLAGS="$GMP_LDFLAGS -no-undefined -Wl,--export-all-symbols"
-      LIBGMP_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmp-3.dll.def"
-      LIBGMPXX_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmpxx-3.dll.def"
-      LIBGMP_DLL=1
-    fi
-    ;;
-esac
-
-
-# Ensure that $CONFIG_SHELL is available for AC_LIBTOOL_SYS_MAX_CMD_LEN.
-# It's often set already by _LT_AC_PROG_ECHO_BACKSLASH or
-# _AS_LINENO_PREPARE, but not always.
-#
-# The symptom of CONFIG_SHELL unset is some "expr" errors during the test,
-# and an empty result.  This only happens when invoked as "sh configure",
-# ie. no path, and can be seen for instance on ia64-*-hpux*.
-#
-# FIXME: Newer libtool should have it's own fix for this.
-#
-if test -z "$CONFIG_SHELL"; then
-  CONFIG_SHELL=$SHELL
-fi
-
-# Enable CXX in libtool only if we want it, and never enable GCJ, nor RC on
-# mingw and cygwin.  Under --disable-cxx this avoids some error messages
-# from libtool arising from the fact we didn't actually run AC_PROG_CXX.
-# Notice that any user-supplied --with-tags setting takes precedence.
-#
-# FIXME: Is this the right way to get this effect?  Very possibly not, but
-# the current _LT_AC_TAGCONFIG doesn't really suggest an alternative.
-#
-if test "${with_tags+set}" != set; then
-  if test $want_cxx = yes; then
-    with_tags=CXX
-  else
-    with_tags=
-  fi
-fi
-
-# The dead hand of AC_REQUIRE makes AC_PROG_LIBTOOL expand and execute
-# AC_PROG_F77, even when F77 is not in the selected with_tags.  This is
-# probably harmless, but it's unsightly and bloats our configure, so pretend
-# AC_PROG_F77 has been expanded already.
-#
-# FIXME: Rumour has it libtool will one day provide a way for a configure.in
-# to say what it wants from among supported languages etc.
-#
-AC_PROVIDE([AC_PROG_F77])
-
-AC_PROG_LIBTOOL
-
-# Generate an error here if attempting to build both shared and static when
-# $libname.a is in $library_names_spec (as mentioned above), rather than
-# wait for ar or ld to fail.
-#
-if test "$enable_shared" = yes && test "$enable_static" = yes; then
-  case $library_names_spec in
-    *libname.a*)
-      AC_MSG_ERROR([cannot create both shared and static libraries on this system, --disable one of the two])
-      ;;
-  esac
-fi
-
-AM_CONDITIONAL(ENABLE_STATIC, test "$enable_static" = yes)
-
-
-# Many of these library and header checks are for the benefit of
-# supplementary programs.  libgmp doesn't use anything too weird.
-
-AC_HEADER_STDC
-AC_HEADER_TIME
-
-# Reasons for testing:
-#   float.h - not in SunOS bundled cc
-#   invent.h - IRIX specific
-#   langinfo.h - X/Open standard only, not in djgpp for instance
-#   locale.h - old systems won't have this
-#   nl_types.h - X/Open standard only, not in djgpp for instance
-#       (usually langinfo.h gives nl_item etc, but not on netbsd 1.4.1)
-#   sys/attributes.h - IRIX specific
-#   sys/iograph.h - IRIX specific
-#   sys/mman.h - not in Cray Unicos
-#   sys/param.h - not in mingw
-#   sys/processor.h - solaris specific, though also present in macos
-#   sys/pstat.h - HPUX specific
-#   sys/resource.h - not in mingw
-#   sys/sysctl.h - not in mingw
-#   sys/sysinfo.h - OSF specific
-#   sys/syssgi.h - IRIX specific
-#   sys/systemcfg.h - AIX specific
-#   sys/time.h - autoconf suggests testing, don't know anywhere without it
-#   sys/times.h - not in mingw
-#   machine/hal_sysinfo.h - OSF specific
-#
-# inttypes.h, stdint.h, unistd.h and sys/types.h are already in the autoconf
-# default tests
-#
-AC_CHECK_HEADERS(fcntl.h float.h invent.h langinfo.h locale.h nl_types.h sys/attributes.h sys/iograph.h sys/mman.h sys/param.h sys/processor.h sys/pstat.h sys/sysinfo.h sys/syssgi.h sys/systemcfg.h sys/time.h sys/times.h)
-
-# On SunOS, sys/resource.h needs sys/time.h (for struct timeval)
-AC_CHECK_HEADERS(sys/resource.h,,,
-[#if TIME_WITH_SYS_TIME
-# include <sys/time.h>
-# include <time.h>
-#else
-# if HAVE_SYS_TIME_H
-#  include <sys/time.h>
-# else
-#  include <time.h>
-# endif
-#endif])
-
-# On NetBSD and OpenBSD, sys/sysctl.h needs sys/param.h for various constants
-AC_CHECK_HEADERS(sys/sysctl.h,,,
-[#if HAVE_SYS_PARAM_H
-# include <sys/param.h>
-#endif])
-
-# On OSF 4.0, <machine/hal_sysinfo.h> must have <sys/sysinfo.h> for ulong_t
-AC_CHECK_HEADERS(machine/hal_sysinfo.h,,,
-[#if HAVE_SYS_SYSINFO_H
-# include <sys/sysinfo.h>
-#endif])
-
-# Reasons for testing:
-#   optarg - not declared in mingw
-#   fgetc, fscanf, ungetc, vfprintf - not declared in SunOS 4
-#   sys_errlist, sys_nerr - not declared in SunOS 4
-#
-# optarg should be in unistd.h and the rest in stdio.h, both of which are
-# in the autoconf default includes.
-#
-# sys_errlist and sys_nerr are supposed to be in <errno.h> on SunOS according
-# to the man page (but aren't), in glibc they're in stdio.h.
-#
-AC_CHECK_DECLS([fgetc, fscanf, optarg, ungetc, vfprintf])
-AC_CHECK_DECLS([sys_errlist, sys_nerr], , ,
-[#include <stdio.h>
-#include <errno.h>])
-
-AC_TYPE_SIGNAL
-
-# Reasons for testing:
-#   intmax_t       - C99
-#   long double    - not in the HP bundled K&R cc
-#   long long      - only in reasonably recent compilers
-#   ptrdiff_t      - seems to be everywhere, maybe don't need to check this
-#   quad_t         - BSD specific
-#   uint_least32_t - C99
-#
-# the default includes are sufficient for all these types
-#
-AC_CHECK_TYPES([intmax_t, long double, long long, ptrdiff_t, quad_t,
-               uint_least32_t, intptr_t])
-
-AC_C_STRINGIZE
-
-# FIXME: Really want #ifndef __cplusplus around the #define volatile
-# replacement autoconf gives, since volatile is always available in C++.
-# But we don't use it in C++ currently.
-AC_C_VOLATILE
-
-AC_C_RESTRICT
-
-GMP_C_STDARG
-GMP_C_ATTRIBUTE_CONST
-GMP_C_ATTRIBUTE_MALLOC
-GMP_C_ATTRIBUTE_MODE
-GMP_C_ATTRIBUTE_NORETURN
-
-GMP_H_EXTERN_INLINE
-
-# from libtool
-AC_CHECK_LIBM
-AC_SUBST(LIBM)
-
-GMP_FUNC_ALLOCA
-GMP_OPTION_ALLOCA
-
-GMP_H_HAVE_FILE
-
-AC_C_BIGENDIAN(
-  [AC_DEFINE(HAVE_LIMB_BIG_ENDIAN, 1)
-   GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_BIG_ENDIAN')", POST)],
-  [AC_DEFINE(HAVE_LIMB_LITTLE_ENDIAN, 1)
-   GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_LITTLE_ENDIAN')", POST)
-  ], [:])
-AH_VERBATIM([HAVE_LIMB],
-[/* Define one of these to 1 for the endianness of `mp_limb_t'.
-   If the endianness is not a simple big or little, or you don't know what
-   it is, then leave both undefined. */
-#undef HAVE_LIMB_BIG_ENDIAN
-#undef HAVE_LIMB_LITTLE_ENDIAN])
-
-GMP_C_DOUBLE_FORMAT
-
-
-# Reasons for testing:
-#   alarm - not in mingw
-#   attr_get - IRIX specific
-#   clock_gettime - not in glibc 2.2.4, only very recent systems
-#   cputime - not in glibc
-#   getsysinfo - OSF specific
-#   getrusage - not in mingw
-#   gettimeofday - not in mingw
-#   mmap - not in mingw, djgpp
-#   nl_langinfo - X/Open standard only, not in djgpp for instance
-#   obstack_vprintf - glibc specific
-#   processor_info - solaris specific
-#   pstat_getprocessor - HPUX specific (10.x and up)
-#   raise - an ANSI-ism, though probably almost universal by now
-#   read_real_time - AIX specific
-#   sigaction - not in mingw
-#   sigaltstack - not in mingw, or old AIX (reputedly)
-#   sigstack - not in mingw
-#   strerror - not in SunOS
-#   strnlen - glibc extension (some other systems too)
-#   syssgi - IRIX specific
-#   times - not in mingw
-#
-# clock_gettime is in librt on *-*-osf5.1.  We could look for it
-# there, but that's not worth bothering with unless it has a decent
-# resolution (in a quick test clock_getres said only 1 millisecond).
-#
-# AC_FUNC_STRNLEN is not used because we don't want the AC_LIBOBJ
-# replacement setups it gives.  It detects a faulty strnlen on AIX, but
-# missing out on that test is ok since our only use of strnlen is in
-# __gmp_replacement_vsnprintf which is not required on AIX since it has a
-# vsnprintf.
-#
-AC_CHECK_FUNCS(alarm attr_get clock clock_gettime cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times)
-
-GMP_FUNC_VSNPRINTF
-GMP_FUNC_SSCANF_WRITABLE_INPUT
-
-# Reasons for checking:
-#   pst_processor psp_iticksperclktick - not in hpux 9
-#
-AC_CHECK_MEMBER(struct pst_processor.psp_iticksperclktick,
-                [AC_DEFINE(HAVE_PSP_ITICKSPERCLKTICK, 1,
-[Define to 1 if <sys/pstat.h> `struct pst_processor' exists
-and contains `psp_iticksperclktick'.])],,
-                [#include <sys/pstat.h>])
-
-# C++ tests, when required
-#
-if test $enable_cxx = yes; then
-  AC_LANG_PUSH(C++)
-
-  # Reasons for testing:
-  #   <sstream> - not in g++ 2.95.2
-  #   std::locale - not in g++ 2.95.4
-  #
-  AC_CHECK_HEADERS([sstream])
-  AC_CHECK_TYPES([std::locale],,,[#include <locale>])
-
-  AC_LANG_POP(C++)
-fi
-
-
-# Pick the correct source files in $path and link them to mpn/.
-# $gmp_mpn_functions lists all functions we need.
-#
-# The rule is to find a file with the function name and a .asm, .S,
-# .s, or .c extension.  Certain multi-function files with special names
-# can provide some functions too.  (mpn/Makefile.am passes
-# -DOPERATION_<func> to get them to generate the right code.)
-
-# Note: $gmp_mpn_functions must have mod_1 before pre_mod_1 so the former
-#       can optionally provide the latter as an extra entrypoint.  Likewise
-#       divrem_1 and pre_divrem_1.
-
-gmp_mpn_functions_optional="umul udiv                                  \
-  invert_limb sqr_diagonal                                             \
-  mul_2 mul_3 mul_4                                                    \
-  addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8       \
-  addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n                    \
-  addlsh2_n sublsh2_n rsblsh2_n                                                \
-  addlsh_n sublsh_n rsblsh_n                                           \
-  add_n_sub_n addaddmul_1msb0"
-
-gmp_mpn_functions="$extra_functions                                       \
-  add add_1 add_n sub sub_1 sub_n neg com mul_1 addmul_1                  \
-  submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2     \
-  fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump            \
-  mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc                                 \
-  mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul          \
-  random random2 pow_1                                                    \
-  rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp        \
-  perfsqr perfpow                                                         \
-  gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step                    \
-  gcdext_lehmer gcdext_subdiv_step                                        \
-  div_q tdiv_qr jacbase get_d                                             \
-  matrix22_mul hgcd2 hgcd mullo_n mullo_basecase                          \
-  toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul                  \
-  toom33_mul toom43_mul toom53_mul toom63_mul                             \
-  toom44_mul                                                              \
-  toom6h_mul toom6_sqr toom8h_mul toom8_sqr                               \
-  toom_couple_handling                                                    \
-  toom2_sqr toom3_sqr toom4_sqr                                                   \
-  toom_eval_dgr3_pm1 toom_eval_dgr3_pm2                                   \
-  toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp          \
-  toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts       \
-  toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts     \
-  invertappr invert binvert mulmod_bnm1 sqrmod_bnm1                       \
-  sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q                                \
-  dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q                                \
-  mu_div_qr mu_divappr_q mu_div_q                                         \
-  bdiv_q_1                                                                \
-  sbpi1_bdiv_q sbpi1_bdiv_qr                                              \
-  dcpi1_bdiv_q dcpi1_bdiv_qr                                              \
-  mu_bdiv_q mu_bdiv_qr                                                    \
-  bdiv_q bdiv_qr                                                          \
-  divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec subcnd_n           \
-  redc_1_sec trialdiv remove                                              \
-  and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n                    \
-  copyi copyd zero                                                        \
-  $gmp_mpn_functions_optional"
-
-define(GMP_MULFUNC_CHOICES,
-[# functions that can be provided by multi-function files
-tmp_mulfunc=
-case $tmp_fn in
-  add_n|sub_n)       tmp_mulfunc="aors_n"    ;;
-  addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
-  popcount|hamdist)  tmp_mulfunc="popham"    ;;
-  and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
-                     tmp_mulfunc="logops_n"  ;;
-  lshift|rshift)     tmp_mulfunc="lorrshift";;
-  addlsh1_n)
-                    tmp_mulfunc="aorslsh1_n aorrlsh1_n";;
-  sublsh1_n)
-                    tmp_mulfunc="aorslsh1_n sorrlsh1_n";;
-  rsblsh1_n)
-                    tmp_mulfunc="aorrlsh1_n sorrlsh1_n";;
-  addlsh2_n)
-                    tmp_mulfunc="aorslsh2_n aorrlsh2_n";;
-  sublsh2_n)
-                    tmp_mulfunc="aorslsh2_n sorrlsh2_n";;
-  rsblsh2_n)
-                    tmp_mulfunc="aorrlsh2_n sorrlsh2_n";;
-  addlsh_n)
-                    tmp_mulfunc="aorslsh_n aorrlsh_n";;
-  sublsh_n)
-                    tmp_mulfunc="aorslsh_n sorrlsh_n";;
-  rsblsh_n)
-                    tmp_mulfunc="aorrlsh_n sorrlsh_n";;
-  rsh1add_n|rsh1sub_n)
-                    tmp_mulfunc="rsh1aors_n";;
-esac
-])
-
-# the list of all object files used by mpn/Makefile.in and the
-# top-level Makefile.in, respectively
-mpn_objects=
-mpn_objs_in_libgmp=
-
-# links from the sources, to be removed by "make distclean"
-gmp_srclinks=
-
-
-# mpn_relative_top_srcdir is $top_srcdir, but for use from within the mpn
-# build directory.  If $srcdir is relative then we use a relative path too,
-# so the two trees can be moved together.
-case $srcdir in
-  [[\\/]* | ?:[\\/]*])  # absolute, as per autoconf
-    mpn_relative_top_srcdir=$srcdir ;;
-  *)                    # relative
-    mpn_relative_top_srcdir=../$srcdir ;;
-esac
-
-
-define(MPN_SUFFIXES,[asm S s c])
-
-dnl  Usage: GMP_FILE_TO_FUNCTION_BASE(func,file)
-dnl
-dnl  Set $func to the function base name for $file, eg. dive_1 gives
-dnl  divexact_1.
-dnl
-define(GMP_FILE_TO_FUNCTION,
-[case $$2 in
-  dive_1)      $1=divexact_1 ;;
-  diveby3)     $1=divexact_by3c ;;
-  pre_divrem_1) $1=preinv_divrem_1 ;;
-  mode1o)      $1=modexact_1c_odd ;;
-  pre_mod_1)   $1=preinv_mod_1 ;;
-  *)           $1=$$2 ;;
-esac
-])
-
-# Fat binary setups.
-#
-# We proceed through each $fat_path directory, and look for $fat_function
-# routines there.  Those found are incorporated in the build by generating a
-# little mpn/<foo>.asm or mpn/<foo>.c file in the build directory, with
-# suitable function renaming, and adding that to $mpn_objects (the same as a
-# normal mpn file).
-#
-# fat.h is generated with macros to let internal calls to each $fat_function
-# go directly through __gmpn_cpuvec, plus macros and declarations helping to
-# setup that structure, on a per-directory basis ready for
-# mpn/<cpu>/fat/fat.c.
-#
-# fat.h includes thresholds listed in $fat_thresholds, extracted from
-# gmp-mparam.h in each directory.  An overall maximum for each threshold is
-# established, for use in making fixed size arrays of temporary space.
-# (Eg. MUL_TOOM33_THRESHOLD_LIMIT used by mpn/generic/mul.c.)
-#
-# It'd be possible to do some of this manually, but when there's more than a
-# few functions and a few directories it becomes very tedious, and very
-# prone to having some routine accidentally omitted.  On that basis it seems
-# best to automate as much as possible, even if the code to do so is a bit
-# ugly.
-#
-
-if test -n "$fat_path"; then
-  # Usually the mpn build directory is created with mpn/Makefile
-  # instantiation, but we want to write to it sooner.
-  mkdir mpn 2>/dev/null
-
-  echo "/* fat.h - setups for fat binaries." >fat.h
-  echo "   Generated by configure - DO NOT EDIT.  */" >>fat.h
-
-  AC_DEFINE(WANT_FAT_BINARY, 1, [Define to 1 when building a fat binary.])
-  GMP_DEFINE(WANT_FAT_BINARY, yes)
-
-  # Don't want normal copies of fat functions
-  for tmp_fn in $fat_functions; do
-    GMP_REMOVE_FROM_LIST(gmp_mpn_functions, $tmp_fn)
-    GMP_REMOVE_FROM_LIST(gmp_mpn_functions_optional, $tmp_fn)
-  done
-
-  for tmp_fn in $fat_functions; do
-    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
-    echo "
-#ifndef OPERATION_$tmp_fn
-#undef  mpn_$tmp_fbase
-#define mpn_$tmp_fbase  (*__gmpn_cpuvec.$tmp_fbase)
-#endif
-DECL_$tmp_fbase (__MPN(${tmp_fbase}_init));" >>fat.h
-    # encourage various macros to use fat functions
-    AC_DEFINE_UNQUOTED(HAVE_NATIVE_mpn_$tmp_fbase)
-  done
-
-  echo "" >>fat.h
-  echo "/* variable thresholds */" >>fat.h
-  for tmp_tn in $fat_thresholds; do
-    echo "#undef  $tmp_tn" >>fat.h
-    echo "#define $tmp_tn  CPUVEC_THRESHOLD (`echo $tmp_tn | tr [A-Z] [a-z]`)" >>fat.h
-  done
-
-  echo "
-/* Copy all fields into __gmpn_cpuvec.
-   memcpy is not used because it might operate byte-wise (depending on its
-   implementation), and we need the function pointer writes to be atomic.
-   "volatile" discourages the compiler from trying to optimize this.  */
-#define CPUVEC_INSTALL(vec) \\
-  do { \\
-    volatile struct cpuvec_t *p = &__gmpn_cpuvec; \\" >>fat.h
-  for tmp_fn in $fat_functions; do
-    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
-    echo "    p->$tmp_fbase = vec.$tmp_fbase; \\" >>fat.h
-  done
-  for tmp_tn in $fat_thresholds; do
-    tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
-    echo "    p->$tmp_field_name = vec.$tmp_field_name; \\" >>fat.h
-  done
-  echo "  } while (0)" >>fat.h
-
-  echo "
-/* A helper to check all fields are filled. */
-#define ASSERT_CPUVEC(vec) \\
-  do { \\" >>fat.h
-  for tmp_fn in $fat_functions; do
-    GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
-    echo "    ASSERT (vec.$tmp_fbase != NULL); \\" >>fat.h
-  done
-  for tmp_tn in $fat_thresholds; do
-    tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
-    echo "    ASSERT (vec.$tmp_field_name != 0); \\" >>fat.h
-  done
-  echo "  } while (0)" >>fat.h
-
-  echo "
-/* Call ITERATE(field) for each fat threshold field. */
-#define ITERATE_FAT_THRESHOLDS() \\
-  do { \\" >>fat.h
-  for tmp_tn in $fat_thresholds; do
-    tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
-    echo "    ITERATE ($tmp_tn, $tmp_field_name); \\" >>fat.h
-  done
-  echo "  } while (0)" >>fat.h
-
-  for tmp_dir in $fat_path; do
-    CPUVEC_SETUP=
-    THRESH_ASM_SETUP=
-    echo "" >>fat.h
-    GMP_FAT_SUFFIX(tmp_suffix, $tmp_dir)
-
-    # In order to keep names unique on a DOS 8.3 filesystem, use a prefix
-    # (rather than a suffix) for the generated file names, and abbreviate.
-    case $tmp_suffix in
-      pentium)       tmp_prefix=p   ;;
-      pentium_mmx)   tmp_prefix=pm  ;;
-      p6_mmx)        tmp_prefix=p2  ;;
-      p6_p3mmx)      tmp_prefix=p3  ;;
-      pentium4)      tmp_prefix=p4  ;;
-      pentium4_mmx)  tmp_prefix=p4m ;;
-      pentium4_sse2) tmp_prefix=p4s ;;
-      k6_mmx)        tmp_prefix=k6m ;;
-      k6_k62mmx)     tmp_prefix=k62 ;;
-      k7_mmx)        tmp_prefix=k7m ;;
-      *)             tmp_prefix=$tmp_suffix ;;
-    esac
-
-    # Extract desired thresholds from gmp-mparam.h file in this directory,
-    # if present.
-    tmp_mparam=$srcdir/mpn/$tmp_dir/gmp-mparam.h
-    if test -f $tmp_mparam; then
-      for tmp_tn in $fat_thresholds; do
-        tmp_thresh=`sed -n "s/^#define $tmp_tn[        ]*\\([0-9][0-9]*\\).*$/\\1/p" $tmp_mparam`
-        if test -n "$tmp_thresh"; then
-          THRESH_ASM_SETUP=["${THRESH_ASM_SETUP}define($tmp_tn,$tmp_thresh)
-"]
-          CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.`echo $tmp_tn | tr [[A-Z]] [[a-z]]` = $tmp_thresh; \\
-"
-          eval tmp_limit=\$${tmp_tn}_LIMIT
-          if test -z "$tmp_limit"; then
-            tmp_limit=0
-          fi
-          if test $tmp_thresh -gt $tmp_limit; then
-            eval ${tmp_tn}_LIMIT=$tmp_thresh
-          fi
-        fi
-      done
-    fi
-
-    for tmp_fn in $fat_functions; do
-      GMP_MULFUNC_CHOICES
-
-      for tmp_base in $tmp_fn $tmp_mulfunc; do
-        for tmp_ext in MPN_SUFFIXES; do
-          tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
-          if test -f $tmp_file; then
-
-            mpn_objects="$mpn_objects ${tmp_prefix}_$tmp_fn.lo"
-            mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_prefix}_$tmp_fn.lo"
-
-            GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
-
-            # carry-in variant, eg. divrem_1c or modexact_1c_odd
-            case $tmp_fbase in
-              *_1*) tmp_fbasec=`echo $tmp_fbase | sed 's/_1/_1c/'` ;;
-              *)    tmp_fbasec=${tmp_fbase}c ;;
-            esac
-
-            # Create a little file doing an include from srcdir.  The
-            # OPERATION and renamings aren't all needed all the time, but
-            # they don't hurt if unused.
-            #
-            # FIXME: Should generate these via config.status commands.
-            # Would need them all in one AC_CONFIG_COMMANDS though, since
-            # that macro doesn't accept a set of separate commands generated
-            # by shell code.
-            #
-            case $tmp_ext in
-              asm)
-                # hide the d-n-l from autoconf's error checking
-                tmp_d_n_l=d""nl
-                echo ["$tmp_d_n_l  mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
-$tmp_d_n_l  Generated by configure - DO NOT EDIT.
-
-define(OPERATION_$tmp_fn)
-define(__gmpn_$tmp_fbase, __gmpn_${tmp_fbase}_$tmp_suffix)
-define(__gmpn_$tmp_fbasec,__gmpn_${tmp_fbasec}_${tmp_suffix})
-define(__gmpn_preinv_${tmp_fbase},__gmpn_preinv_${tmp_fbase}_${tmp_suffix})
-
-$tmp_d_n_l  For k6 and k7 gcd_1 calling their corresponding mpn_modexact_1_odd
-ifdef(\`__gmpn_modexact_1_odd',,
-\`define(__gmpn_modexact_1_odd,__gmpn_modexact_1_odd_${tmp_suffix})')
-
-$THRESH_ASM_SETUP
-include][($mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.asm)
-"] >mpn/${tmp_prefix}_$tmp_fn.asm
-                ;;
-              c)
-                echo ["/* mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
-   Generated by configure - DO NOT EDIT. */
-
-#define OPERATION_$tmp_fn 1
-#define __gmpn_$tmp_fbase           __gmpn_${tmp_fbase}_$tmp_suffix
-#define __gmpn_$tmp_fbasec          __gmpn_${tmp_fbasec}_${tmp_suffix}
-#define __gmpn_preinv_${tmp_fbase}  __gmpn_preinv_${tmp_fbase}_${tmp_suffix}
-
-#include \"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.c\"
-"] >mpn/${tmp_prefix}_$tmp_fn.c
-                ;;
-            esac
-
-            # Prototype, and append to CPUVEC_SETUP for this directory.
-            echo "DECL_$tmp_fbase (__gmpn_${tmp_fbase}_$tmp_suffix);" >>fat.h
-            CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.$tmp_fbase = __gmpn_${tmp_fbase}_${tmp_suffix}; \\
-"
-            # Ditto for any preinv variant (preinv_divrem_1, preinv_mod_1).
-            if grep "^PROLOGUE(mpn_preinv_$tmp_fn)" $tmp_file >/dev/null; then
-              echo "DECL_preinv_$tmp_fbase (__gmpn_preinv_${tmp_fbase}_$tmp_suffix);" >>fat.h
-              CPUVEC_SETUP="$CPUVEC_SETUP    decided_cpuvec.preinv_$tmp_fbase = __gmpn_preinv_${tmp_fbase}_${tmp_suffix}; \\
-"
-            fi
-          fi
-        done
-      done
-    done
-
-    # Emit CPUVEC_SETUP for this directory
-    echo "" >>fat.h
-    echo "#define CPUVEC_SETUP_$tmp_suffix \\" >>fat.h
-    echo "  do { \\" >>fat.h
-    echo "$CPUVEC_SETUP  } while (0)" >>fat.h
-  done
-
-  # Emit threshold limits
-  echo "" >>fat.h
-  for tmp_tn in $fat_thresholds; do
-    eval tmp_limit=\$${tmp_tn}_LIMIT
-    echo "#define ${tmp_tn}_LIMIT  $tmp_limit" >>fat.h
-  done
-fi
-
-
-# Normal binary setups.
-#
-
-for tmp_ext in MPN_SUFFIXES; do
-  eval found_$tmp_ext=no
-done
-
-for tmp_fn in $gmp_mpn_functions; do
-  for tmp_ext in MPN_SUFFIXES; do
-    test "$no_create" = yes || rm -f mpn/$tmp_fn.$tmp_ext
-  done
-
-  # mpn_preinv_divrem_1 might have been provided by divrem_1.asm, likewise
-  # mpn_preinv_mod_1 by mod_1.asm.
-  case $tmp_fn in
-  pre_divrem_1)
-    if test "$HAVE_NATIVE_mpn_preinv_divrem_1" = yes; then continue; fi ;;
-  pre_mod_1)
-    if test "$HAVE_NATIVE_mpn_preinv_mod_1" = yes; then continue; fi ;;
-  esac
-
-  GMP_MULFUNC_CHOICES
-
-  found=no
-  for tmp_dir in $path; do
-    for tmp_base in $tmp_fn $tmp_mulfunc; do
-      for tmp_ext in MPN_SUFFIXES; do
-        tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
-        if test -f $tmp_file; then
-
-          # For a nails build, check if the file supports our nail bits.
-          # Generic code always supports all nails.
-          #
-          # FIXME: When a multi-function file is selected to provide one of
-          # the nails-neutral routines, like logops_n for and_n, the
-          # PROLOGUE grepping will create HAVE_NATIVE_mpn_<foo> defines for
-          # all functions in that file, even if they haven't all been
-          # nailified.  Not sure what to do about this, it's only really a
-          # problem for logops_n, and it's not too terrible to insist those
-          # get nailified always.
-          #
-          if test $GMP_NAIL_BITS != 0 && test $tmp_dir != generic; then
-            case $tmp_fn in
-              and_n | ior_n | xor_n | andn_n | \
-              copyi | copyd | \
-              popcount | hamdist | \
-              udiv | udiv_w_sdiv | umul | \
-              cntlz | invert_limb)
-                # these operations are either unaffected by nails or defined
-                # to operate on full limbs
-                ;;
-              *)
-                nails=[`sed -n 's/^[   ]*NAILS_SUPPORT(\(.*\))/\1/p' $tmp_file `]
-                for n in $nails; do
-                  case $n in
-                  *-*)
-                    n_start=`echo "$n" | sed -n 's/\(.*\)-.*/\1/p'`
-                    n_end=`echo "$n" | sed -n 's/.*-\(.*\)/\1/p'`
-                    ;;
-                  *)
-                    n_start=$n
-                    n_end=$n
-                    ;;
-                  esac
-                  if test $GMP_NAIL_BITS -ge $n_start && test $GMP_NAIL_BITS -le $n_end; then
-                    found=yes
-                    break
-                  fi
-                done
-                if test $found != yes; then
-                  continue
-                fi
-                ;;
-            esac
-          fi
-
-          found=yes
-          eval found_$tmp_ext=yes
-
-          if test $tmp_ext = c; then
-            tmp_u='$U'
-          else
-            tmp_u=
-          fi
-
-          mpn_objects="$mpn_objects $tmp_fn$tmp_u.lo"
-          mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/$tmp_fn$tmp_u.lo"
-          AC_CONFIG_LINKS(mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext)
-          gmp_srclinks="$gmp_srclinks mpn/$tmp_fn.$tmp_ext"
-
-          # Duplicate AC_DEFINEs are harmless, so it doesn't matter
-          # that multi-function files get grepped here repeatedly.
-          # The PROLOGUE pattern excludes the optional second parameter.
-          gmp_ep=[`
-            sed -n 's/^[       ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
-            sed -n 's/^[       ]*PROLOGUE(\([^,]*\).*)/\1/p' $tmp_file
-          `]
-          for gmp_tmp in $gmp_ep; do
-            AC_DEFINE_UNQUOTED(HAVE_NATIVE_$gmp_tmp)
-            eval HAVE_NATIVE_$gmp_tmp=yes
-          done
-
-          case $tmp_fn in
-          sqr_basecase) sqr_basecase_source=$tmp_file ;;
-          esac
-
-          break
-        fi
-      done
-      if test $found = yes; then break ; fi
-    done
-    if test $found = yes; then break ; fi
-  done
-
-  if test $found = no; then
-    for tmp_optional in $gmp_mpn_functions_optional; do
-      if test $tmp_optional = $tmp_fn; then
-        found=yes
-      fi
-    done
-    if test $found = no; then
-      AC_MSG_ERROR([no version of $tmp_fn found in path: $path])
-    fi
-  fi
-done
-
-# All cycle counters are .asm files currently
-if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
-  found_asm=yes
-fi
-
-dnl  The following list only needs to have templates for those defines which
-dnl  are going to be tested by the code, there's no need to have every
-dnl  possible mpn routine.
-
-AH_VERBATIM([HAVE_NATIVE],
-[/* Define to 1 each of the following for which a native (ie. CPU specific)
-    implementation of the corresponding routine exists.  */
-#undef HAVE_NATIVE_mpn_add_n
-#undef HAVE_NATIVE_mpn_add_n_sub_n
-#undef HAVE_NATIVE_mpn_add_nc
-#undef HAVE_NATIVE_mpn_addaddmul_1msb0
-#undef HAVE_NATIVE_mpn_addlsh1_n
-#undef HAVE_NATIVE_mpn_addlsh2_n
-#undef HAVE_NATIVE_mpn_addlsh_n
-#undef HAVE_NATIVE_mpn_addmul_1c
-#undef HAVE_NATIVE_mpn_addmul_2
-#undef HAVE_NATIVE_mpn_addmul_3
-#undef HAVE_NATIVE_mpn_addmul_4
-#undef HAVE_NATIVE_mpn_addmul_5
-#undef HAVE_NATIVE_mpn_addmul_6
-#undef HAVE_NATIVE_mpn_addmul_7
-#undef HAVE_NATIVE_mpn_addmul_8
-#undef HAVE_NATIVE_mpn_and_n
-#undef HAVE_NATIVE_mpn_andn_n
-#undef HAVE_NATIVE_mpn_bdiv_dbm1c
-#undef HAVE_NATIVE_mpn_bdiv_q_1
-#undef HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#undef HAVE_NATIVE_mpn_com
-#undef HAVE_NATIVE_mpn_copyd
-#undef HAVE_NATIVE_mpn_copyi
-#undef HAVE_NATIVE_mpn_divexact_1
-#undef HAVE_NATIVE_mpn_divexact_by3c
-#undef HAVE_NATIVE_mpn_divrem_1
-#undef HAVE_NATIVE_mpn_divrem_1c
-#undef HAVE_NATIVE_mpn_divrem_2
-#undef HAVE_NATIVE_mpn_gcd_1
-#undef HAVE_NATIVE_mpn_hamdist
-#undef HAVE_NATIVE_mpn_invert_limb
-#undef HAVE_NATIVE_mpn_ior_n
-#undef HAVE_NATIVE_mpn_iorn_n
-#undef HAVE_NATIVE_mpn_lshift
-#undef HAVE_NATIVE_mpn_lshiftc
-#undef HAVE_NATIVE_mpn_lshsub_n
-#undef HAVE_NATIVE_mpn_mod_1
-#undef HAVE_NATIVE_mpn_mod_1_1p
-#undef HAVE_NATIVE_mpn_mod_1c
-#undef HAVE_NATIVE_mpn_mod_1s_2p
-#undef HAVE_NATIVE_mpn_mod_1s_4p
-#undef HAVE_NATIVE_mpn_mod_34lsub1
-#undef HAVE_NATIVE_mpn_modexact_1_odd
-#undef HAVE_NATIVE_mpn_modexact_1c_odd
-#undef HAVE_NATIVE_mpn_mul_1
-#undef HAVE_NATIVE_mpn_mul_1c
-#undef HAVE_NATIVE_mpn_mul_2
-#undef HAVE_NATIVE_mpn_mul_3
-#undef HAVE_NATIVE_mpn_mul_4
-#undef HAVE_NATIVE_mpn_mul_basecase
-#undef HAVE_NATIVE_mpn_nand_n
-#undef HAVE_NATIVE_mpn_nior_n
-#undef HAVE_NATIVE_mpn_popcount
-#undef HAVE_NATIVE_mpn_preinv_divrem_1
-#undef HAVE_NATIVE_mpn_preinv_mod_1
-#undef HAVE_NATIVE_mpn_redc_1
-#undef HAVE_NATIVE_mpn_redc_2
-#undef HAVE_NATIVE_mpn_rsblsh1_n
-#undef HAVE_NATIVE_mpn_rsblsh2_n
-#undef HAVE_NATIVE_mpn_rsblsh_n
-#undef HAVE_NATIVE_mpn_rsh1add_n
-#undef HAVE_NATIVE_mpn_rsh1add_nc
-#undef HAVE_NATIVE_mpn_rsh1sub_n
-#undef HAVE_NATIVE_mpn_rsh1sub_nc
-#undef HAVE_NATIVE_mpn_rshift
-#undef HAVE_NATIVE_mpn_sqr_basecase
-#undef HAVE_NATIVE_mpn_sqr_diagonal
-#undef HAVE_NATIVE_mpn_sub_n
-#undef HAVE_NATIVE_mpn_sub_nc
-#undef HAVE_NATIVE_mpn_sublsh1_n
-#undef HAVE_NATIVE_mpn_sublsh2_n
-#undef HAVE_NATIVE_mpn_sublsh_n
-#undef HAVE_NATIVE_mpn_submul_1c
-#undef HAVE_NATIVE_mpn_udiv_qrnnd
-#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
-#undef HAVE_NATIVE_mpn_umul_ppmm
-#undef HAVE_NATIVE_mpn_umul_ppmm_r
-#undef HAVE_NATIVE_mpn_xor_n
-#undef HAVE_NATIVE_mpn_xnor_n])
-
-
-# Don't demand an m4 unless it's actually needed.
-if test $found_asm = yes; then
-  GMP_PROG_M4
-  GMP_M4_M4WRAP_SPURIOUS
-# else
-# It's unclear why this m4-not-needed stuff was ever done.
-#  if test -z "$M4" ; then
-#    M4=m4-not-needed
-#  fi
-fi
-
-# Only do the GMP_ASM checks if there's a .S or .asm wanting them.
-if test $found_asm = no && test $found_S = no; then
-  gmp_asm_syntax_testing=no
-fi
-
-if test "$gmp_asm_syntax_testing" != no; then
-  GMP_ASM_TEXT
-  GMP_ASM_DATA
-  GMP_ASM_LABEL_SUFFIX
-  GMP_ASM_GLOBL
-  GMP_ASM_GLOBL_ATTR
-  GMP_ASM_UNDERSCORE
-  GMP_ASM_RODATA
-  GMP_ASM_TYPE
-  GMP_ASM_SIZE
-  GMP_ASM_LSYM_PREFIX
-  GMP_ASM_W32
-  GMP_ASM_ALIGN_LOG
-
-  case $host in
-    hppa*-*-*)
-      # for both pa32 and pa64
-      GMP_INCLUDE_MPN(pa32/pa-defs.m4)
-      ;;
-    IA64_PATTERN)
-      GMP_ASM_IA64_ALIGN_OK
-      ;;
-    M68K_PATTERN)
-      GMP_ASM_M68K_INSTRUCTION
-      GMP_ASM_M68K_ADDRESSING
-      GMP_ASM_M68K_BRANCHES
-      ;;
-    [powerpc*-*-* | power[3-9]-*-*])
-      GMP_ASM_POWERPC_PIC_ALWAYS
-      GMP_ASM_POWERPC_R_REGISTERS
-      GMP_INCLUDE_MPN(powerpc32/powerpc-defs.m4)
-      case $host in
-        *-*-aix*)
-         case $ABI in
-           64 | aix64)  GMP_INCLUDE_MPN(powerpc64/aix.m4) ;;
-            *)           GMP_INCLUDE_MPN(powerpc32/aix.m4) ;;
-          esac
-          ;;
-        *-*-linux* | *-*-*bsd*)
-         case $ABI in
-           mode64)      GMP_INCLUDE_MPN(powerpc64/elf.m4) ;;
-           mode32 | 32) GMP_INCLUDE_MPN(powerpc32/elf.m4) ;;
-          esac
-          ;;
-        *-*-darwin*)
-         case $ABI in
-           mode64)      GMP_INCLUDE_MPN(powerpc64/darwin.m4) ;;
-           mode32 | 32) GMP_INCLUDE_MPN(powerpc32/darwin.m4) ;;
-          esac
-          ;;
-        *)
-         # Assume unrecognized operating system is the powerpc eABI
-          GMP_INCLUDE_MPN(powerpc32/eabi.m4)
-         ;;
-      esac
-      ;;
-    power*-*-aix*)
-      GMP_INCLUDE_MPN(powerpc32/aix.m4)
-      ;;
-    sparcv9*-*-* | ultrasparc*-*-* | sparc64-*-*)
-      case $ABI in
-        64)
-          GMP_ASM_SPARC_REGISTER
-          ;;
-      esac
-      ;;
-    X86_PATTERN | X86_64_PATTERN)
-      GMP_ASM_ALIGN_FILL_0x90
-      case $ABI in
-        32)
-          GMP_INCLUDE_MPN(x86/x86-defs.m4)
-          AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86)
-          GMP_ASM_COFF_TYPE
-          GMP_ASM_X86_GOT_UNDERSCORE
-          GMP_ASM_X86_SHLDL_CL
-         case $enable_profiling in
-           prof | gprof)  GMP_ASM_X86_MCOUNT ;;
-         esac
-         case $host in
-           *-*-darwin*)
-             GMP_INCLUDE_MPN(x86/darwin.m4) ;;
-         esac
-          ;;
-        64)
-          GMP_INCLUDE_MPN(x86_64/x86_64-defs.m4)
-          AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86_64)
-         case $host in
-           *-*-darwin*)
-             GMP_INCLUDE_MPN(x86_64/darwin.m4) ;;
-         esac
-          ;;
-      esac
-      ;;
-  esac
-fi
-
-# For --enable-minithres, prepend "minithres" to path so that its special
-# gmp-mparam.h will be used.
-if test $enable_minithres = yes; then
-  path="minithres $path"
-fi
-
-# Create link for gmp-mparam.h.
-gmp_mparam_source=
-for gmp_mparam_dir in $path; do
-  test "$no_create" = yes || rm -f gmp-mparam.h
-  tmp_file=$srcdir/mpn/$gmp_mparam_dir/gmp-mparam.h
-  if test -f $tmp_file; then
-    AC_CONFIG_LINKS(gmp-mparam.h:mpn/$gmp_mparam_dir/gmp-mparam.h)
-    gmp_srclinks="$gmp_srclinks gmp-mparam.h"
-    gmp_mparam_source=$tmp_file
-    break
-  fi
-done
-if test -z "$gmp_mparam_source"; then
-  AC_MSG_ERROR([no version of gmp-mparam.h found in path: $path])
-fi
-
-# For a helpful message from tune/tuneup.c
-gmp_mparam_suggest=$gmp_mparam_source
-if test "$gmp_mparam_dir" = generic; then
-  for i in $path; do break; done
-  if test "$i" != generic; then
-    gmp_mparam_suggest="new file $srcdir/mpn/$i/gmp-mparam.h"
-  fi
-fi
-AC_DEFINE_UNQUOTED(GMP_MPARAM_H_SUGGEST, "$gmp_mparam_source",
-[The gmp-mparam.h file (a string) the tune program should suggest updating.])
-
-
-# Copy any SQR_TOOM2_THRESHOLD from gmp-mparam.h to config.m4.
-# Some versions of sqr_basecase.asm use this.
-# Fat binaries do this on a per-file basis, so skip in that case.
-#
-if test -z "$fat_path"; then
-  tmp_gmp_karatsuba_sqr_threshold=`sed -n 's/^#define SQR_TOOM2_THRESHOLD[     ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
-  if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
-    GMP_DEFINE_RAW(["define(<SQR_TOOM2_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)"])
-  fi
-fi
-
-
-# Sizes of some types, needed at preprocessing time.
-#
-# FIXME: The assumption that GMP_LIMB_BITS is 8*sizeof(mp_limb_t) might
-# be slightly rash, but it's true everywhere we know of and ought to be true
-# of any sensible system.  In a generic C build, grepping LONG_BIT out of
-# <limits.h> might be an alternative, for maximum portability.
-#
-AC_CHECK_SIZEOF(void *)
-AC_CHECK_SIZEOF(unsigned short)
-AC_CHECK_SIZEOF(unsigned)
-AC_CHECK_SIZEOF(unsigned long)
-AC_CHECK_SIZEOF(mp_limb_t, , GMP_INCLUDE_GMP_H)
-if test "$ac_cv_sizeof_mp_limb_t" = 0; then
-  AC_MSG_ERROR([Oops, mp_limb_t doesn't seem to work])
-fi
-AC_SUBST(GMP_LIMB_BITS, `expr 8 \* $ac_cv_sizeof_mp_limb_t`)
-GMP_DEFINE_RAW(["define(<SIZEOF_UNSIGNED>,<$ac_cv_sizeof_unsigned>)"])
-
-# Check compiler limb size matches gmp-mparam.h
-#
-# FIXME: Some of the cycle counter objects in the tune directory depend on
-# the size of ulong, it'd be possible to check that here, though a mismatch
-# probably wouldn't want to be fatal, none of the libgmp assembler code
-# depends on ulong.
-#
-mparam_bits=[`sed -n 's/^#define GMP_LIMB_BITS[        ][      ]*\([0-9]*\).*$/\1/p' $gmp_mparam_source`]
-if test -n "$mparam_bits" && test "$mparam_bits" -ne $GMP_LIMB_BITS; then
-  if test "$test_CFLAGS" = set; then
-    AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
-in this configuration expects $mparam_bits bits.
-You appear to have set \$CFLAGS, perhaps you also need to tell GMP the
-intended ABI, see "ABI and ISA" in the manual.])
-  else
-    AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
-in this configuration expects $mparam_bits bits.])
-  fi
-fi
-
-GMP_DEFINE_RAW(["define(<GMP_LIMB_BITS>,$GMP_LIMB_BITS)"])
-GMP_DEFINE_RAW(["define(<GMP_NAIL_BITS>,$GMP_NAIL_BITS)"])
-GMP_DEFINE_RAW(["define(<GMP_NUMB_BITS>,eval(GMP_LIMB_BITS-GMP_NAIL_BITS))"])
-
-
-# Exclude the mpn random functions from mpbsd since that would drag in the
-# top-level rand things, all of which are unnecessary for libmp.  There's
-# other unnecessary objects too actually, if we could be bothered figuring
-# out exactly which they are.
-#
-mpn_objs_in_libmp=
-for i in $mpn_objs_in_libgmp; do
-  case $i in
-  *random*) ;;
-  *) mpn_objs_in_libmp="$mpn_objs_in_libmp $i" ;;
-  esac
-done
-AC_SUBST(mpn_objs_in_libmp)
-
-AC_SUBST(mpn_objects)
-AC_SUBST(mpn_objs_in_libgmp)
-AC_SUBST(gmp_srclinks)
-
-
-# A recompiled sqr_basecase for use in the tune program, if necessary.
-TUNE_SQR_OBJ=
-test -d tune || mkdir tune
-case $sqr_basecase_source in
-  *.asm)
-    sqr_max=[`sed -n 's/^def...(SQR_TOOM2_THRESHOLD_MAX, *\([0-9]*\))/\1/p' $sqr_basecase_source`]
-    if test -n "$sqr_max"; then
-      TUNE_SQR_OBJ=sqr_asm.o
-      AC_DEFINE_UNQUOTED(TUNE_SQR_TOOM2_MAX,$sqr_max,
-      [Maximum size the tune program can test for SQR_TOOM2_THRESHOLD])
-    fi
-    cat >tune/sqr_basecase.c <<EOF
-/* not sure that an empty file can compile, so put in a dummy */
-int sqr_basecase_dummy;
-EOF
-    ;;
-  *.c)
-    TUNE_SQR_OBJ=
-    AC_DEFINE(TUNE_SQR_TOOM2_MAX,SQR_TOOM2_MAX_GENERIC)
-    cat >tune/sqr_basecase.c <<EOF
-#define TUNE_PROGRAM_BUILD 1
-#define TUNE_PROGRAM_BUILD_SQR 1
-#include "mpn/sqr_basecase.c"
-EOF
-    ;;
-esac
-AC_SUBST(TUNE_SQR_OBJ)
-
-
-# Configs for demos/pexpr.c.
-#
-AC_CONFIG_FILES(demos/pexpr-config.h:demos/pexpr-config-h.in)
-GMP_SUBST_CHECK_FUNCS(clock, cputime, getrusage, gettimeofday, sigaction, sigaltstack, sigstack)
-GMP_SUBST_CHECK_HEADERS(sys/resource.h)
-AC_CHECK_TYPES([stack_t], HAVE_STACK_T_01=1, HAVE_STACK_T_01=0,
-               [#include <signal.h>])
-AC_SUBST(HAVE_STACK_T_01)
-
-# Configs for demos/calc directory
-#
-# AC_SUBST+AC_CONFIG_FILES is used for calc-config.h, rather than AC_DEFINE+
-# AC_CONFIG_HEADERS, since with the latter automake (1.8) will then put the
-# directory (ie. demos/calc) into $(DEFAULT_INCLUDES) for every Makefile.in,
-# which would look very strange.
-#
-# -lcurses is required by libreadline.  On a typical SVR4 style system this
-# normally doesn't have to be given explicitly, since libreadline.so will
-# have a NEEDED record for it.  But if someone for some reason is using only
-# a static libreadline.a then we must give -lcurses.  Readline (as of
-# version 4.3) doesn't use libtool, so we can't rely on a .la to cover
-# necessary dependencies.
-#
-# On a couple of systems we've seen libreadline available, but the headers
-# not in the default include path, so check for readline/readline.h.  We've
-# also seen readline/history.h missing, not sure if that's just a broken
-# install or a very old version, but check that too.
-#
-AC_CONFIG_FILES(demos/calc/calc-config.h:demos/calc/calc-config-h.in)
-LIBCURSES=
-if test $with_readline != no; then
-  AC_CHECK_LIB(ncurses, tputs, [LIBCURSES=-lncurses],
-    [AC_CHECK_LIB(curses, tputs, [LIBCURSES=-lcurses])])
-fi
-AC_SUBST(LIBCURSES)
-use_readline=$with_readline
-if test $with_readline = detect; then
-  use_readline=no
-  AC_CHECK_LIB(readline, readline,
-    [AC_CHECK_HEADER(readline/readline.h,
-      [AC_CHECK_HEADER(readline/history.h, use_readline=yes)])],
-    , $LIBCURSES)
-  AC_MSG_CHECKING(readline detected)
-  AC_MSG_RESULT($use_readline)
-fi
-if test $use_readline = yes; then
-  AC_SUBST(WITH_READLINE_01, 1)
-  AC_SUBST(LIBREADLINE, -lreadline)
-else
-  WITH_READLINE_01=0
-fi
-AC_PROG_YACC
-AM_PROG_LEX
-
-# Configs for demos/expr directory
-#
-# Libtool already runs an AC_CHECK_TOOL for ranlib, but we give
-# AC_PROG_RANLIB anyway since automake is supposed to complain if it's not
-# called.  (Automake 1.8.4 doesn't, at least not when the only library is in
-# an EXTRA_LIBRARIES.)
-#
-AC_PROG_RANLIB
-
-
-# Create config.m4.
-GMP_FINISH
-
-# Create Makefiles
-# FIXME: Upcoming version of autoconf/automake may not like broken lines.
-#        Right now automake isn't accepting the new AC_CONFIG_FILES scheme.
-
-AC_OUTPUT(Makefile                                                     \
-  mpbsd/Makefile mpf/Makefile mpn/Makefile mpq/Makefile                        \
-  mpz/Makefile printf/Makefile scanf/Makefile cxx/Makefile             \
-  tests/Makefile tests/devel/Makefile tests/mpbsd/Makefile             \
-  tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile             \
-  tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile           \
-  tests/cxx/Makefile                                                   \
-  doc/Makefile tune/Makefile                                           \
-  demos/Makefile demos/calc/Makefile demos/expr/Makefile               \
-  gmp.h:gmp-h.in mp.h:mp-h.in)
diff --git a/cxx/Makefile.am b/cxx/Makefile.am

index c4a42dce4e276a00db9eea59fd9198f525281aea..a5d51905d660839fbd2d8c15bf7017802c573646 100644 (file)
--- a/cxx/Makefile.am
+++ b/cxx/Makefile.am
@@ -1,6 +1,6 @@
  ## Process this file with automake to generate Makefile.in
  
-# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+# Copyright 2001, 2002, 2003, 2012 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -25,5 +25,5 @@ noinst_LTLIBRARIES = libcxx.la
  endif
  
  libcxx_la_SOURCES = \
-  isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc \
+  isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc limits.cc \
    osdoprnti.cc osfuns.cc osmpf.cc osmpq.cc osmpz.cc
diff --git a/cxx/Makefile.in b/cxx/Makefile.in

index a6e278f20d9360888d476a0035faedca976fee4d..0e9441772d82eec182051ba2382b3395003bc0e6 100644 (file)
--- a/cxx/Makefile.in
+++ b/cxx/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -15,7 +15,7 @@
  
  @SET_MAKE@
  
-# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+# Copyright 2001, 2002, 2003, 2012 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -33,6 +33,23 @@
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -51,12 +68,11 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  subdir = cxx
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -66,7 +82,7 @@ CONFIG_CLEAN_VPATH_FILES =
  LTLIBRARIES = $(noinst_LTLIBRARIES)
  libcxx_la_LIBADD =
  am_libcxx_la_OBJECTS = isfuns.lo ismpf.lo ismpq.lo ismpz.lo ismpznw.lo \
-       osdoprnti.lo osfuns.lo osmpf.lo osmpq.lo osmpz.lo
+       limits.lo osdoprnti.lo osfuns.lo osmpf.lo osmpq.lo osmpz.lo
  libcxx_la_OBJECTS = $(am_libcxx_la_OBJECTS)
  @WANT_CXX_TRUE@am_libcxx_la_rpath =
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
@@ -83,6 +99,11 @@ CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(libcxx_la_SOURCES)
  DIST_SOURCES = $(libcxx_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -184,8 +205,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -232,7 +253,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -249,7 +269,7 @@ top_srcdir = @top_srcdir@
  INCLUDES = -D__GMP_WITHIN_GMPXX -I$(top_srcdir)
  @WANT_CXX_TRUE@noinst_LTLIBRARIES = libcxx.la
  libcxx_la_SOURCES = \
-  isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc \
+  isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc limits.cc \
    osdoprnti.cc osfuns.cc osmpf.cc osmpq.cc osmpz.cc
  
  all: all-am
@@ -295,7 +315,7 @@ clean-noinstLTLIBRARIES:
           echo "rm -f \"$${dir}/so_locations\""; \
           rm -f "$${dir}/so_locations"; \
         done
-libcxx.la: $(libcxx_la_OBJECTS) $(libcxx_la_DEPENDENCIES) 
+libcxx.la: $(libcxx_la_OBJECTS) $(libcxx_la_DEPENDENCIES) $(EXTRA_libcxx_la_DEPENDENCIES) 
         $(CXXLINK) $(am_libcxx_la_rpath) $(libcxx_la_OBJECTS) $(libcxx_la_LIBADD) $(LIBS)
  
  mostlyclean-compile:
@@ -303,11 +323,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .cc.o:
         $(CXXCOMPILE) -c -o $@ $<
@@ -420,10 +435,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -491,7 +511,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -504,7 +524,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool clean-noinstLTLIBRARIES ctags distclean \
@@ -516,9 +536,8 @@ uninstall-am:
         install-pdf install-pdf-am install-ps install-ps-am \
         install-strip installcheck installcheck-am installdirs \
         maintainer-clean maintainer-clean-generic mostlyclean \
-       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
-       uninstall-am
+       mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+       pdf pdf-am ps ps-am tags uninstall uninstall-am
  
  
  # Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/cxx/ismpf.cc b/cxx/ismpf.cc

index bfe4dc8b919d917f76bbc8dd3d99c40056457790..520d4c3e211d7275129aa6062c04dfcfec160aa3 100644 (file)
--- a/cxx/ismpf.cc
+++ b/cxx/ismpf.cc
@@ -120,7 +120,7 @@ operator>> (istream &i, mpf_ptr f)
    if (i.good()) // last character read was non-numeric
      i.putback(c);
    else if (i.eof() && ok) // stopped just before eof
-    i.clear();
+    i.clear(ios::eofbit);
  
    if (ok)
      ASSERT_NOCARRY (mpf_set_str(f, s.c_str(), base)); // extract the number
diff --git a/cxx/ismpq.cc b/cxx/ismpq.cc

index 23eec7657ddd1378f831ef318ab27677d7c5afb7..5cf8c4f94975a4eaaf681a31534cc37f8301802f 100644 (file)
--- a/cxx/ismpq.cc
+++ b/cxx/ismpq.cc
@@ -49,7 +49,7 @@ operator>> (istream &i, mpq_ptr q)
        if (i.good())
          i.putback(c);
        else if (i.eof())
-        i.clear();
+        i.clear(ios::eofbit);
      }
  
    return i;
diff --git a/cxx/ismpznw.cc b/cxx/ismpznw.cc

index 387d092acf71cbc2fe2a0ebeab36a10edc2d86ae..4111575d098713623f8001df643cabc94d6b77ae 100644 (file)
--- a/cxx/ismpznw.cc
+++ b/cxx/ismpznw.cc
@@ -49,7 +49,7 @@ __gmpz_operator_in_nowhite (istream &i, mpz_ptr z, char c)
    if (i.good()) // last character read was non-numeric
      i.putback(c);
    else if (i.eof() && (ok || zero)) // stopped just before eof
-    i.clear();
+    i.clear(ios::eofbit);
  
    if (ok)
      ASSERT_NOCARRY (mpz_set_str (z, s.c_str(), base)); // extract the number
diff --git a/cxx/limits.cc b/cxx/limits.cc

new file mode 100644 (file)

index 0000000..9b32df7
--- /dev/null
+++ b/cxx/limits.cc
@@ -0,0 +1,51 @@
+/* instantiation of numeric_limits specializations.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmpxx.h"
+
+namespace std {
+#define GMPXX_INSTANTIATE_LIMITS(T) \
+  const bool numeric_limits<T>::is_specialized; \
+  const int  numeric_limits<T>::digits; \
+  const int  numeric_limits<T>::digits10; \
+  const int  numeric_limits<T>::max_digits10; \
+  const bool numeric_limits<T>::is_signed; \
+  const bool numeric_limits<T>::is_integer; \
+  const bool numeric_limits<T>::is_exact; \
+  const int  numeric_limits<T>::radix; \
+  const int  numeric_limits<T>::min_exponent; \
+  const int  numeric_limits<T>::min_exponent10; \
+  const int  numeric_limits<T>::max_exponent; \
+  const int  numeric_limits<T>::max_exponent10; \
+  const bool numeric_limits<T>::has_infinity; \
+  const bool numeric_limits<T>::has_quiet_NaN; \
+  const bool numeric_limits<T>::has_signaling_NaN; \
+  const float_denorm_style numeric_limits<T>::has_denorm; \
+  const bool numeric_limits<T>::has_denorm_loss; \
+  const bool numeric_limits<T>::is_iec559; \
+  const bool numeric_limits<T>::is_bounded; \
+  const bool numeric_limits<T>::is_modulo; \
+  const bool numeric_limits<T>::traps; \
+  const bool numeric_limits<T>::tinyness_before; \
+  const float_round_style numeric_limits<T>::round_style
+
+  GMPXX_INSTANTIATE_LIMITS(mpz_class);
+  GMPXX_INSTANTIATE_LIMITS(mpq_class);
+  GMPXX_INSTANTIATE_LIMITS(mpf_class);
+}
diff --git a/cxx/osdoprnti.cc b/cxx/osdoprnti.cc

index e5dea4e36107057d7bb13be0781e4d1e859a9835..7034bfbc65e9762eb9b5ed75f979ecc11c337831 100644 (file)
--- a/cxx/osdoprnti.cc
+++ b/cxx/osdoprnti.cc
@@ -22,8 +22,8 @@ You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <iostream>
-#include <cstdarg>    /* for va_list and hence doprnt_funs_t */
-#include <cstring>    /* for strlen */
+#include <stdarg.h>   /* for va_list and hence doprnt_funs_t */
+#include <string.h>   /* for strlen */
  
  #include "gmp.h"
  #include "gmp-impl.h"
diff --git a/demos/Makefile.am b/demos/Makefile.am

index 97cb3ef4656ca1b265bb1ec2fa47729003bb57d5..d82e947c8058b3fcc4fe97db53689a5e402a25dc 100644 (file)
--- a/demos/Makefile.am
+++ b/demos/Makefile.am
@@ -1,6 +1,6 @@
  ## Process this file with automake to generate Makefile.in
  
-# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -19,7 +19,7 @@
  
  
  SUBDIRS = calc expr
-EXTRA_DIST = perl
+EXTRA_DIST = perl primes.h
  
  INCLUDES = -I$(top_srcdir)
  LDADD = $(top_builddir)/libgmp.la
diff --git a/demos/Makefile.in b/demos/Makefile.in

index 9e7a690761635b8436f5a99f38ac8f7374eec1e0..a9101aef10aeb3c91cb09f9505f736a793a37ac3 100644 (file)
--- a/demos/Makefile.in
+++ b/demos/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -15,7 +15,7 @@
  
  @SET_MAKE@
  
-# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -32,6 +32,23 @@
  # You should have received a copy of the GNU Lesser General Public License
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -50,7 +67,6 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  EXTRA_PROGRAMS = factorize$(EXEEXT) isprime$(EXEEXT) pexpr$(EXEEXT) \
         primes$(EXEEXT) qcn$(EXEEXT)
  subdir = demos
@@ -58,7 +74,7 @@ DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
         $(srcdir)/pexpr-config-h.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -66,23 +82,23 @@ CONFIG_HEADER = $(top_builddir)/config.h
  CONFIG_CLEAN_FILES = pexpr-config.h
  CONFIG_CLEAN_VPATH_FILES =
  factorize_SOURCES = factorize.c
-factorize_OBJECTS = factorize$U.$(OBJEXT)
+factorize_OBJECTS = factorize.$(OBJEXT)
  factorize_LDADD = $(LDADD)
  factorize_DEPENDENCIES = $(top_builddir)/libgmp.la
  isprime_SOURCES = isprime.c
-isprime_OBJECTS = isprime$U.$(OBJEXT)
+isprime_OBJECTS = isprime.$(OBJEXT)
  isprime_LDADD = $(LDADD)
  isprime_DEPENDENCIES = $(top_builddir)/libgmp.la
  pexpr_SOURCES = pexpr.c
-pexpr_OBJECTS = pexpr$U.$(OBJEXT)
+pexpr_OBJECTS = pexpr.$(OBJEXT)
  pexpr_LDADD = $(LDADD)
  pexpr_DEPENDENCIES = $(top_builddir)/libgmp.la
  primes_SOURCES = primes.c
-primes_OBJECTS = primes$U.$(OBJEXT)
+primes_OBJECTS = primes.$(OBJEXT)
  am__DEPENDENCIES_1 =
  primes_DEPENDENCIES = $(LDADD) $(am__DEPENDENCIES_1)
  qcn_SOURCES = qcn.c
-qcn_OBJECTS = qcn$U.$(OBJEXT)
+qcn_OBJECTS = qcn.$(OBJEXT)
  qcn_DEPENDENCIES = $(LDADD) $(am__DEPENDENCIES_1)
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
  depcomp =
@@ -105,6 +121,11 @@ RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
         install-pdf-recursive install-ps-recursive install-recursive \
         installcheck-recursive installdirs-recursive pdf-recursive \
         ps-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive        \
    distclean-recursive maintainer-clean-recursive
  AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
@@ -237,8 +258,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -285,7 +306,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -300,7 +320,7 @@ top_build_prefix = @top_build_prefix@
  top_builddir = @top_builddir@
  top_srcdir = @top_srcdir@
  SUBDIRS = calc expr
-EXTRA_DIST = perl
+EXTRA_DIST = perl primes.h
  INCLUDES = -I$(top_srcdir)
  LDADD = $(top_builddir)/libgmp.la
  qcn_LDADD = $(LDADD) $(LIBM)
@@ -342,19 +362,19 @@ $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
  $(am__aclocal_m4_deps):
  pexpr-config.h: $(top_builddir)/config.status $(srcdir)/pexpr-config-h.in
         cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
-factorize$(EXEEXT): $(factorize_OBJECTS) $(factorize_DEPENDENCIES) 
+factorize$(EXEEXT): $(factorize_OBJECTS) $(factorize_DEPENDENCIES) $(EXTRA_factorize_DEPENDENCIES) 
         @rm -f factorize$(EXEEXT)
         $(LINK) $(factorize_OBJECTS) $(factorize_LDADD) $(LIBS)
-isprime$(EXEEXT): $(isprime_OBJECTS) $(isprime_DEPENDENCIES) 
+isprime$(EXEEXT): $(isprime_OBJECTS) $(isprime_DEPENDENCIES) $(EXTRA_isprime_DEPENDENCIES) 
         @rm -f isprime$(EXEEXT)
         $(LINK) $(isprime_OBJECTS) $(isprime_LDADD) $(LIBS)
-pexpr$(EXEEXT): $(pexpr_OBJECTS) $(pexpr_DEPENDENCIES) 
+pexpr$(EXEEXT): $(pexpr_OBJECTS) $(pexpr_DEPENDENCIES) $(EXTRA_pexpr_DEPENDENCIES) 
         @rm -f pexpr$(EXEEXT)
         $(LINK) $(pexpr_OBJECTS) $(pexpr_LDADD) $(LIBS)
-primes$(EXEEXT): $(primes_OBJECTS) $(primes_DEPENDENCIES) 
+primes$(EXEEXT): $(primes_OBJECTS) $(primes_DEPENDENCIES) $(EXTRA_primes_DEPENDENCIES) 
         @rm -f primes$(EXEEXT)
         $(LINK) $(primes_OBJECTS) $(primes_LDADD) $(LIBS)
-qcn$(EXEEXT): $(qcn_OBJECTS) $(qcn_DEPENDENCIES) 
+qcn$(EXEEXT): $(qcn_OBJECTS) $(qcn_DEPENDENCIES) $(EXTRA_qcn_DEPENDENCIES) 
         @rm -f qcn$(EXEEXT)
         $(LINK) $(qcn_OBJECTS) $(qcn_LDADD) $(LIBS)
  
@@ -363,11 +383,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -377,19 +392,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-factorize_.c: factorize.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/factorize.c; then echo $(srcdir)/factorize.c; else echo factorize.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-isprime_.c: isprime.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/isprime.c; then echo $(srcdir)/isprime.c; else echo isprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pexpr_.c: pexpr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pexpr.c; then echo $(srcdir)/pexpr.c; else echo pexpr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-primes_.c: primes.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/primes.c; then echo $(srcdir)/primes.c; else echo primes.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-qcn_.c: qcn.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/qcn.c; then echo $(srcdir)/qcn.c; else echo qcn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-factorize_.$(OBJEXT) factorize_.lo isprime_.$(OBJEXT) isprime_.lo \
-pexpr_.$(OBJEXT) pexpr_.lo primes_.$(OBJEXT) primes_.lo qcn_.$(OBJEXT) \
-qcn_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -564,13 +566,10 @@ distdir: $(DISTFILES)
         done
         @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
           if test "$$subdir" = .; then :; else \
-           test -d "$(distdir)/$$subdir" \
-           || $(MKDIR_P) "$(distdir)/$$subdir" \
-           || exit 1; \
-         fi; \
-       done
-       @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
-         if test "$$subdir" = .; then :; else \
+           $(am__make_dryrun) \
+             || test -d "$(distdir)/$$subdir" \
+             || $(MKDIR_P) "$(distdir)/$$subdir" \
+             || exit 1; \
             dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
             $(am__relativize); \
             new_distdir=$$reldir; \
@@ -605,10 +604,15 @@ install-am: all-am
  
  installcheck: installcheck-recursive
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -676,7 +680,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-recursive
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-recursive
@@ -689,9 +693,8 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \
-       $(top_builddir)/ansi2knr ctags-recursive install-am \
-       install-strip tags-recursive
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
+       install-am install-strip tags-recursive
  
  .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
         all all-am check check-am clean clean-generic clean-libtool \
@@ -704,9 +707,8 @@ uninstall-am:
         install-pdf-am install-ps install-ps-am install-strip \
         installcheck installcheck-am installdirs installdirs-am \
         maintainer-clean maintainer-clean-generic mostlyclean \
-       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
-       uninstall uninstall-am
+       mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+       pdf pdf-am ps ps-am tags tags-recursive uninstall uninstall-am
  
  
  allprogs: $(EXTRA_PROGRAMS)
diff --git a/demos/calc/Makefile.in b/demos/calc/Makefile.in

index 9053e0f0ff44bfa82f8784868fde55ebd3a466f8..d84f2a27b4a54235b5b1f2b1b8d0dc5abd289c99 100644 (file)
--- a/demos/calc/Makefile.in
+++ b/demos/calc/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -32,6 +32,23 @@
  # You should have received a copy of the GNU Lesser General Public License
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -50,22 +67,20 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  EXTRA_PROGRAMS = calc$(EXEEXT)
  subdir = demos/calc
  DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
         $(srcdir)/calc-config-h.in calc.c calc.h calclex.c
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
  CONFIG_HEADER = $(top_builddir)/config.h
  CONFIG_CLEAN_FILES = calc-config.h
  CONFIG_CLEAN_VPATH_FILES =
-am_calc_OBJECTS = calc$U.$(OBJEXT) calclex$U.$(OBJEXT) \
-       calcread$U.$(OBJEXT)
+am_calc_OBJECTS = calc.$(OBJEXT) calclex.$(OBJEXT) calcread.$(OBJEXT)
  calc_OBJECTS = $(am_calc_OBJECTS)
  calc_LDADD = $(LDADD)
  am__DEPENDENCIES_1 =
@@ -84,16 +99,21 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
         $(LDFLAGS) -o $@
  @MAINTAINER_MODE_FALSE@am__skiplex = test -f $@ ||
-LEXCOMPILE = $(LEX) $(LFLAGS) $(AM_LFLAGS)
+LEXCOMPILE = $(LEX) $(AM_LFLAGS) $(LFLAGS)
  LTLEXCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-       --mode=compile $(LEX) $(LFLAGS) $(AM_LFLAGS)
+       --mode=compile $(LEX) $(AM_LFLAGS) $(LFLAGS)
  YLWRAP = $(top_srcdir)/ylwrap
  @MAINTAINER_MODE_FALSE@am__skipyacc = test -f $@ ||
-YACCCOMPILE = $(YACC) $(YFLAGS) $(AM_YFLAGS)
+YACCCOMPILE = $(YACC) $(AM_YFLAGS) $(YFLAGS)
  LTYACCCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-       --mode=compile $(YACC) $(YFLAGS) $(AM_YFLAGS)
+       --mode=compile $(YACC) $(AM_YFLAGS) $(YFLAGS)
  SOURCES = $(calc_SOURCES)
  DIST_SOURCES = $(calc_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -195,8 +215,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -243,7 +263,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -306,11 +325,9 @@ $(am__aclocal_m4_deps):
  calc-config.h: $(top_builddir)/config.status $(srcdir)/calc-config-h.in
         cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
  calc.h: calc.c
-       @if test ! -f $@; then \
-         rm -f calc.c; \
-         $(MAKE) $(AM_MAKEFLAGS) calc.c; \
-       else :; fi
-calc$(EXEEXT): $(calc_OBJECTS) $(calc_DEPENDENCIES) 
+       @if test ! -f $@; then rm -f calc.c; else :; fi
+       @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) calc.c; else :; fi
+calc$(EXEEXT): $(calc_OBJECTS) $(calc_DEPENDENCIES) $(EXTRA_calc_DEPENDENCIES) 
         @rm -f calc$(EXEEXT)
         $(LINK) $(calc_OBJECTS) $(calc_LDADD) $(LIBS)
  
@@ -319,11 +336,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -333,14 +345,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-calc_.c: calc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/calc.c; then echo $(srcdir)/calc.c; else echo calc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-calclex_.c: calclex.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/calclex.c; then echo $(srcdir)/calclex.c; else echo calclex.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-calcread_.c: calcread.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/calcread.c; then echo $(srcdir)/calcread.c; else echo calcread.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-calc_.$(OBJEXT) calc_.lo calclex_.$(OBJEXT) calclex_.lo \
-calcread_.$(OBJEXT) calcread_.lo : $(ANSI2KNR)
  
  .l.c:
         $(am__skiplex) $(SHELL) $(YLWRAP) $< $(LEX_OUTPUT_ROOT).c $@ -- $(LEXCOMPILE)
@@ -452,10 +456,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -527,7 +536,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -540,8 +549,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr all check install install-am \
-       install-strip
+.MAKE: all check install install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool ctags distclean distclean-compile \
@@ -553,8 +561,8 @@ uninstall-am:
         install-pdf-am install-ps install-ps-am install-strip \
         installcheck installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  allprogs: $(EXTRA_PROGRAMS)
diff --git a/demos/calc/calc.c b/demos/calc/calc.c

index 40ed02ea2011e6138885d1133ff9878b91324e55..1b7777d634a9f944631db35e068ccff17f5e91bb 100644 (file)
--- a/demos/calc/calc.c
+++ b/demos/calc/calc.c
@@ -1,8 +1,8 @@
-/* A Bison parser, made by GNU Bison 2.5.  */
+/* A Bison parser, made by GNU Bison 2.7.12-4996.  */
  
  /* Bison implementation for Yacc-like parsers in C
     
-      Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+      Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc.
     
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -44,7 +44,7 @@
  #define YYBISON 1
  
  /* Bison version.  */
-#define YYBISON_VERSION "2.5"
+#define YYBISON_VERSION "2.7.12-4996"
  
  /* Skeleton name.  */
  #define YYSKELETON_NAME "yacc.c"
@@ -58,14 +58,11 @@
  /* Pull parsers.  */
  #define YYPULL 1
  
-/* Using locations.  */
-#define YYLSP_NEEDED 0
  
  
  
  /* Copy the first part of user declarations.  */
-
-/* Line 268 of yacc.c  */
+/* Line 371 of yacc.c  */
  #line 1 "calc.y"
  
  /* A simple integer desk calculator using yacc and gmp.
@@ -207,14 +204,16 @@ mpz_t  variable[26];
      }
  
  
+/* Line 371 of yacc.c  */
+#line 209 "calc.c"
  
-/* Line 268 of yacc.c  */
-#line 213 "calc.c"
-
-/* Enabling traces.  */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
+# ifndef YY_NULL
+#  if defined __cplusplus && 201103L <= __cplusplus
+#   define YY_NULL nullptr
+#  else
+#   define YY_NULL 0
+#  endif
+# endif
  
  /* Enabling verbose error messages.  */
  #ifdef YYERROR_VERBOSE
@@ -224,11 +223,17 @@ mpz_t  variable[26];
  # define YYERROR_VERBOSE 0
  #endif
  
-/* Enabling the token table.  */
-#ifndef YYTOKEN_TABLE
-# define YYTOKEN_TABLE 0
+/* In a future release of Bison, this section will be replaced
+   by #include "y.tab.h".  */
+#ifndef YY_YY_Y_TAB_H_INCLUDED
+# define YY_YY_Y_TAB_H_INCLUDED
+/* Enabling traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int yydebug;
  #endif
-
  
  /* Tokens.  */
  #ifndef YYTOKENTYPE
@@ -298,33 +303,46 @@ mpz_t  variable[26];
  
  
  
-
  #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
  typedef union YYSTYPE
  {
-
-/* Line 293 of yacc.c  */
+/* Line 387 of yacc.c  */
  #line 142 "calc.y"
  
    char  *str;
    int   var;
  
  
-
-/* Line 293 of yacc.c  */
-#line 316 "calc.c"
+/* Line 387 of yacc.c  */
+#line 318 "calc.c"
  } YYSTYPE;
  # define YYSTYPE_IS_TRIVIAL 1
  # define yystype YYSTYPE /* obsolescent; will be withdrawn */
  # define YYSTYPE_IS_DECLARED 1
  #endif
  
+extern YYSTYPE yylval;
  
-/* Copy the second part of user declarations.  */
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+#endif /* !YY_YY_Y_TAB_H_INCLUDED  */
  
+/* Copy the second part of user declarations.  */
  
-/* Line 343 of yacc.c  */
-#line 328 "calc.c"
+/* Line 390 of yacc.c  */
+#line 346 "calc.c"
  
  #ifdef short
  # undef short
@@ -377,24 +395,33 @@ typedef short int yytype_int16;
  # if defined YYENABLE_NLS && YYENABLE_NLS
  #  if ENABLE_NLS
  #   include <libintl.h> /* INFRINGES ON USER NAME SPACE */
-#   define YY_(msgid) dgettext ("bison-runtime", msgid)
+#   define YY_(Msgid) dgettext ("bison-runtime", Msgid)
  #  endif
  # endif
  # ifndef YY_
-#  define YY_(msgid) msgid
+#  define YY_(Msgid) Msgid
+# endif
+#endif
+
+#ifndef __attribute__
+/* This feature is available in gcc versions 2.5 and later.  */
+# if (! defined __GNUC__ || __GNUC__ < 2 \
+      || (__GNUC__ == 2 && __GNUC_MINOR__ < 5))
+#  define __attribute__(Spec) /* empty */
  # endif
  #endif
  
  /* Suppress unused-variable warnings by "using" E.  */
  #if ! defined lint || defined __GNUC__
-# define YYUSE(e) ((void) (e))
+# define YYUSE(E) ((void) (E))
  #else
-# define YYUSE(e) /* empty */
+# define YYUSE(E) /* empty */
  #endif
  
+
  /* Identity function, used to suppress warnings about constant conditions.  */
  #ifndef lint
-# define YYID(n) (n)
+# define YYID(N) (N)
  #else
  #if (defined __STDC__ || defined __C99__FUNC__ \
       || defined __cplusplus || defined _MSC_VER)
@@ -430,6 +457,7 @@ YYID (yyi)
  #    if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
       || defined __cplusplus || defined _MSC_VER)
  #     include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+      /* Use EXIT_SUCCESS as a witness for stdlib.h.  */
  #     ifndef EXIT_SUCCESS
  #      define EXIT_SUCCESS 0
  #     endif
@@ -521,20 +549,20 @@ union yyalloc
  #endif
  
  #if defined YYCOPY_NEEDED && YYCOPY_NEEDED
-/* Copy COUNT objects from FROM to TO.  The source and destination do
+/* Copy COUNT objects from SRC to DST.  The source and destination do
     not overlap.  */
  # ifndef YYCOPY
  #  if defined __GNUC__ && 1 < __GNUC__
-#   define YYCOPY(To, From, Count) \
-      __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
+#   define YYCOPY(Dst, Src, Count) \
+      __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src)))
  #  else
-#   define YYCOPY(To, From, Count)             \
-      do                                       \
-       {                                       \
-         YYSIZE_T yyi;                         \
-         for (yyi = 0; yyi < (Count); yyi++)   \
-           (To)[yyi] = (From)[yyi];            \
-       }                                       \
+#   define YYCOPY(Dst, Src, Count)              \
+      do                                        \
+        {                                       \
+          YYSIZE_T yyi;                         \
+          for (yyi = 0; yyi < (Count); yyi++)   \
+            (Dst)[yyi] = (Src)[yyi];            \
+        }                                       \
        while (YYID (0))
  #  endif
  # endif
@@ -642,7 +670,7 @@ static const yytype_uint16 yyrline[] =
  };
  #endif
  
-#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+#if YYDEBUG || YYERROR_VERBOSE || 0
  /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
     First, the terminals, then, starting at YYNTOKENS, nonterminals.  */
  static const char *const yytname[] =
@@ -652,7 +680,7 @@ static const char *const yytname[] =
    "POWM", "ROOT", "SQRT", "NUMBER", "VARIABLE", "LOR", "LAND", "'<'",
    "'>'", "GE", "LE", "NE", "EQ", "RSHIFT", "LSHIFT", "'+'", "'-'", "'*'",
    "'/'", "'%'", "UMINUS", "'^'", "'!'", "'='", "'('", "')'", "','",
-  "$accept", "top", "statements", "statement", "e", "gcdlist", "lcmlist", 0
+  "$accept", "top", "statements", "statement", "e", "gcdlist", "lcmlist", YY_NULL
  };
  #endif
  
@@ -803,11 +831,11 @@ static const yytype_int8 yytable[] =
         0,    59,    60
  };
  
-#define yypact_value_is_default(yystate) \
-  ((yystate) == (-39))
+#define yypact_value_is_default(Yystate) \
+  (!!((Yystate) == (-39)))
  
-#define yytable_value_is_error(yytable_value) \
-  ((yytable_value) == (-8))
+#define yytable_value_is_error(Yytable_value) \
+  (!!((Yytable_value) == (-8)))
  
  static const yytype_int8 yycheck[] =
  {
@@ -914,62 +942,35 @@ static const yytype_uint8 yystos[] =
  
  #define YYRECOVERING()  (!!yyerrstatus)
  
-#define YYBACKUP(Token, Value)                                 \
-do                                                             \
-  if (yychar == YYEMPTY && yylen == 1)                         \
-    {                                                          \
-      yychar = (Token);                                                \
-      yylval = (Value);                                                \
-      YYPOPSTACK (1);                                          \
-      goto yybackup;                                           \
-    }                                                          \
-  else                                                         \
-    {                                                          \
+#define YYBACKUP(Token, Value)                                  \
+do                                                              \
+  if (yychar == YYEMPTY)                                        \
+    {                                                           \
+      yychar = (Token);                                         \
+      yylval = (Value);                                         \
+      YYPOPSTACK (yylen);                                       \
+      yystate = *yyssp;                                         \
+      goto yybackup;                                            \
+    }                                                           \
+  else                                                          \
+    {                                                           \
        yyerror (YY_("syntax error: cannot back up")); \
        YYERROR;                                                 \
      }                                                          \
  while (YYID (0))
  
-
+/* Error token number */
  #define YYTERROR       1
  #define YYERRCODE      256
  
  
-/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
-   If N is 0, then set CURRENT to the empty location which ends
-   the previous symbol: RHS[0] (always defined).  */
-
-#define YYRHSLOC(Rhs, K) ((Rhs)[K])
-#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N)                               \
-    do                                                                 \
-      if (YYID (N))                                                    \
-       {                                                               \
-         (Current).first_line   = YYRHSLOC (Rhs, 1).first_line;        \
-         (Current).first_column = YYRHSLOC (Rhs, 1).first_column;      \
-         (Current).last_line    = YYRHSLOC (Rhs, N).last_line;         \
-         (Current).last_column  = YYRHSLOC (Rhs, N).last_column;       \
-       }                                                               \
-      else                                                             \
-       {                                                               \
-         (Current).first_line   = (Current).last_line   =              \
-           YYRHSLOC (Rhs, 0).last_line;                                \
-         (Current).first_column = (Current).last_column =              \
-           YYRHSLOC (Rhs, 0).last_column;                              \
-       }                                                               \
-    while (YYID (0))
-#endif
-
-
  /* This macro is provided for backward compatibility. */
-
  #ifndef YY_LOCATION_PRINT
  # define YY_LOCATION_PRINT(File, Loc) ((void) 0)
  #endif
  
  
  /* YYLEX -- calling `yylex' with the right arguments.  */
-
  #ifdef YYLEX_PARAM
  # define YYLEX yylex (YYLEX_PARAM)
  #else
@@ -1019,6 +1020,8 @@ yy_symbol_value_print (yyoutput, yytype, yyvaluep)
      YYSTYPE const * const yyvaluep;
  #endif
  {
+  FILE *yyo = yyoutput;
+  YYUSE (yyo);
    if (!yyvaluep)
      return;
  # ifdef YYPRINT
@@ -1027,11 +1030,7 @@ yy_symbol_value_print (yyoutput, yytype, yyvaluep)
  # else
    YYUSE (yyoutput);
  # endif
-  switch (yytype)
-    {
-      default:
-       break;
-    }
+  YYUSE (yytype);
  }
  
  
@@ -1270,12 +1269,11 @@ static int
  yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
                  yytype_int16 *yyssp, int yytoken)
  {
-  YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
+  YYSIZE_T yysize0 = yytnamerr (YY_NULL, yytname[yytoken]);
    YYSIZE_T yysize = yysize0;
-  YYSIZE_T yysize1;
    enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
    /* Internationalized format string. */
-  const char *yyformat = 0;
+  const char *yyformat = YY_NULL;
    /* Arguments of yyformat. */
    char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
    /* Number of reported tokens (one for the "unexpected", one per
@@ -1335,11 +1333,13 @@ yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
                      break;
                    }
                  yyarg[yycount++] = yytname[yyx];
-                yysize1 = yysize + yytnamerr (0, yytname[yyx]);
-                if (! (yysize <= yysize1
-                       && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
-                  return 2;
-                yysize = yysize1;
+                {
+                  YYSIZE_T yysize1 = yysize + yytnamerr (YY_NULL, yytname[yyx]);
+                  if (! (yysize <= yysize1
+                         && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+                    return 2;
+                  yysize = yysize1;
+                }
                }
          }
      }
@@ -1359,10 +1359,12 @@ yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
  # undef YYCASE_
      }
  
-  yysize1 = yysize + yystrlen (yyformat);
-  if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
-    return 2;
-  yysize = yysize1;
+  {
+    YYSIZE_T yysize1 = yysize + yystrlen (yyformat);
+    if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+      return 2;
+    yysize = yysize1;
+  }
  
    if (*yymsg_alloc < yysize)
      {
@@ -1418,36 +1420,26 @@ yydestruct (yymsg, yytype, yyvaluep)
      yymsg = "Deleting";
    YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
  
-  switch (yytype)
-    {
-
-      default:
-       break;
-    }
+  YYUSE (yytype);
  }
  
  
-/* Prevent warnings from -Wmissing-prototypes.  */
-#ifdef YYPARSE_PARAM
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void *YYPARSE_PARAM);
-#else
-int yyparse ();
-#endif
-#else /* ! YYPARSE_PARAM */
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void);
-#else
-int yyparse ();
-#endif
-#endif /* ! YYPARSE_PARAM */
  
  
  /* The lookahead symbol.  */
  int yychar;
  
+
+#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END
+#endif
+#ifndef YY_INITIAL_VALUE
+# define YY_INITIAL_VALUE(Value) /* Nothing. */
+#endif
+
  /* The semantic value of the lookahead symbol.  */
-YYSTYPE yylval;
+YYSTYPE yylval YY_INITIAL_VALUE(yyval_default);
  
  /* Number of syntax errors so far.  */
  int yynerrs;
@@ -1487,7 +1479,7 @@ yyparse ()
         `yyss': related to states.
         `yyvs': related to semantic values.
  
-       Refer to the stacks thru separate pointers, to allow yyoverflow
+       Refer to the stacks through separate pointers, to allow yyoverflow
         to reallocate them elsewhere.  */
  
      /* The state stack.  */
@@ -1505,7 +1497,7 @@ yyparse ()
    int yyn;
    int yyresult;
    /* Lookahead token as an internal (translated) token number.  */
-  int yytoken;
+  int yytoken = 0;
    /* The variables used to return semantic value and location from the
       action routines.  */
    YYSTYPE yyval;
@@ -1523,9 +1515,8 @@ yyparse ()
       Keep to zero when no symbol should be popped.  */
    int yylen = 0;
  
-  yytoken = 0;
-  yyss = yyssa;
-  yyvs = yyvsa;
+  yyssp = yyss = yyssa;
+  yyvsp = yyvs = yyvsa;
    yystacksize = YYINITDEPTH;
  
    YYDPRINTF ((stderr, "Starting parse\n"));
@@ -1534,14 +1525,6 @@ yyparse ()
    yyerrstatus = 0;
    yynerrs = 0;
    yychar = YYEMPTY; /* Cause a token to be read.  */
-
-  /* Initialize stack pointers.
-     Waste one element of value and location stack
-     so that they stay on the same level as the state stack.
-     The wasted elements are never initialized.  */
-  yyssp = yyss;
-  yyvsp = yyvs;
-
    goto yysetstate;
  
  /*------------------------------------------------------------.
@@ -1682,7 +1665,9 @@ yybackup:
    yychar = YYEMPTY;
  
    yystate = yyn;
+  YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
    *++yyvsp = yylval;
+  YY_IGNORE_MAYBE_UNINITIALIZED_END
  
    goto yynewstate;
  
@@ -1719,15 +1704,13 @@ yyreduce:
    switch (yyn)
      {
          case 6:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 173 "calc.y"
      { sp = stack[0]; yyerrok; }
      break;
  
    case 8:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 177 "calc.y"
      {
        mpz_out_str (stdout, obase, sp); putchar ('\n');
@@ -1737,8 +1720,7 @@ yyreduce:
      break;
  
    case 9:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 182 "calc.y"
      {
        CHECK_VARIABLE ((yyvsp[(1) - (3)].var));
@@ -1749,234 +1731,202 @@ yyreduce:
      break;
  
    case 10:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 188 "calc.y"
      { calc_help (); }
      break;
  
    case 11:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 189 "calc.y"
      { ibase = 16; obase = -16; }
      break;
  
    case 12:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 190 "calc.y"
      { ibase = 0;  obase = 10; }
      break;
  
    case 13:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 191 "calc.y"
      { exit (0); }
      break;
  
    case 15:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 198 "calc.y"
      { sp--; mpz_add    (sp, sp, sp+1); }
      break;
  
    case 16:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 199 "calc.y"
      { sp--; mpz_sub    (sp, sp, sp+1); }
      break;
  
    case 17:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 200 "calc.y"
      { sp--; mpz_mul    (sp, sp, sp+1); }
      break;
  
    case 18:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 201 "calc.y"
      { sp--; mpz_fdiv_q (sp, sp, sp+1); }
      break;
  
    case 19:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 202 "calc.y"
      { sp--; mpz_fdiv_r (sp, sp, sp+1); }
      break;
  
    case 20:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 203 "calc.y"
      { CHECK_UI ("Exponent", sp);
                      sp--; mpz_pow_ui (sp, sp, mpz_get_ui (sp+1)); }
      break;
  
    case 21:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 205 "calc.y"
      { CHECK_UI ("Shift count", sp);
                      sp--; mpz_mul_2exp (sp, sp, mpz_get_ui (sp+1)); }
      break;
  
    case 22:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 207 "calc.y"
      { CHECK_UI ("Shift count", sp);
                      sp--; mpz_fdiv_q_2exp (sp, sp, mpz_get_ui (sp+1)); }
      break;
  
    case 23:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 209 "calc.y"
      { CHECK_UI ("Factorial", sp);
                      mpz_fac_ui (sp, mpz_get_ui (sp)); }
      break;
  
    case 24:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 211 "calc.y"
      { mpz_neg (sp, sp); }
      break;
  
    case 25:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 213 "calc.y"
      { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) <  0); }
      break;
  
    case 26:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 214 "calc.y"
      { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) <= 0); }
      break;
  
    case 27:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 215 "calc.y"
      { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) == 0); }
      break;
  
    case 28:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 216 "calc.y"
      { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) != 0); }
      break;
  
    case 29:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 217 "calc.y"
      { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) >= 0); }
      break;
  
    case 30:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 218 "calc.y"
      { sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) >  0); }
      break;
  
    case 31:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 220 "calc.y"
      { sp--; mpz_set_ui (sp, mpz_sgn (sp) && mpz_sgn (sp+1)); }
      break;
  
    case 32:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 221 "calc.y"
      { sp--; mpz_set_ui (sp, mpz_sgn (sp) || mpz_sgn (sp+1)); }
      break;
  
    case 33:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 223 "calc.y"
      { mpz_abs (sp, sp); }
      break;
  
    case 34:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 224 "calc.y"
      { sp--; CHECK_UI ("Binomial base", sp+1);
                                     mpz_bin_ui (sp, sp, mpz_get_ui (sp+1)); }
      break;
  
    case 35:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 226 "calc.y"
      { CHECK_UI ("Fibonacci", sp);
                                     mpz_fib_ui (sp, mpz_get_ui (sp)); }
      break;
  
    case 37:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 229 "calc.y"
      { sp--; mpz_set_si (sp,
                                           mpz_kronecker (sp, sp+1)); }
      break;
  
    case 39:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 232 "calc.y"
      { CHECK_UI ("Lucas number", sp);
                                     mpz_lucnum_ui (sp, mpz_get_ui (sp)); }
      break;
  
    case 40:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 234 "calc.y"
      { mpz_nextprime (sp, sp); }
      break;
  
    case 41:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 235 "calc.y"
      { sp -= 2; mpz_powm (sp, sp, sp+1, sp+2); }
      break;
  
    case 42:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 236 "calc.y"
      { sp--; CHECK_UI ("Nth-root", sp+1);
                                     mpz_root (sp, sp, mpz_get_ui (sp+1)); }
      break;
  
    case 43:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 238 "calc.y"
      { mpz_sqrt (sp, sp); }
      break;
  
    case 44:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 240 "calc.y"
      {
          sp++;
@@ -1987,8 +1937,7 @@ yyreduce:
      break;
  
    case 45:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 246 "calc.y"
      {
          sp++;
@@ -2002,23 +1951,20 @@ yyreduce:
      break;
  
    case 47:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 258 "calc.y"
      { sp--; mpz_gcd (sp, sp, sp+1); }
      break;
  
    case 49:
-
-/* Line 1806 of yacc.c  */
+/* Line 1787 of yacc.c  */
  #line 262 "calc.y"
      { sp--; mpz_lcm (sp, sp, sp+1); }
      break;
  
  
-
-/* Line 1806 of yacc.c  */
-#line 2022 "calc.c"
+/* Line 1787 of yacc.c  */
+#line 1968 "calc.c"
        default: break;
      }
    /* User semantic actions sometimes alter yychar, and that requires
@@ -2181,7 +2127,9 @@ yyerrlab1:
        YY_STACK_PRINT (yyss, yyssp);
      }
  
+  YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
    *++yyvsp = yylval;
+  YY_IGNORE_MAYBE_UNINITIALIZED_END
  
  
    /* Shift the error token.  */
@@ -2205,7 +2153,7 @@ yyabortlab:
    yyresult = 1;
    goto yyreturn;
  
-#if !defined(yyoverflow) || YYERROR_VERBOSE
+#if !defined yyoverflow || YYERROR_VERBOSE
  /*-------------------------------------------------.
  | yyexhaustedlab -- memory exhaustion comes here.  |
  `-------------------------------------------------*/
@@ -2247,8 +2195,7 @@ yyreturn:
  }
  
  
-
-/* Line 2067 of yacc.c  */
+/* Line 2050 of yacc.c  */
  #line 264 "calc.y"
  
  
@@ -2305,4 +2252,3 @@ main (int argc, char *argv[])
  
    return yyparse ();
  }
-
diff --git a/demos/calc/calc.h b/demos/calc/calc.h

index 2afcf0f26bb232d8c0f04d7560c3dd223e1a64ef..78e0195d244283b926b860c8c55d9d4bf5bc582c 100644 (file)
--- a/demos/calc/calc.h
+++ b/demos/calc/calc.h
@@ -1,8 +1,8 @@
-/* A Bison parser, made by GNU Bison 2.5.  */
+/* A Bison parser, made by GNU Bison 2.7.12-4996.  */
  
  /* Bison interface for Yacc-like parsers in C
     
-      Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+      Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc.
     
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -30,6 +30,15 @@
     This special exception was added by the Free Software Foundation in
     version 2.2 of Bison.  */
  
+#ifndef YY_YY_CALC_H_INCLUDED
+# define YY_YY_CALC_H_INCLUDED
+/* Enabling traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int yydebug;
+#endif
  
  /* Tokens.  */
  #ifndef YYTOKENTYPE
@@ -99,21 +108,18 @@
  
  
  
-
  #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
  typedef union YYSTYPE
  {
-
-/* Line 2068 of yacc.c  */
+/* Line 2053 of yacc.c  */
  #line 142 "calc.y"
  
    char  *str;
    int   var;
  
  
-
-/* Line 2068 of yacc.c  */
-#line 117 "calc.h"
+/* Line 2053 of yacc.c  */
+#line 123 "calc.h"
  } YYSTYPE;
  # define YYSTYPE_IS_TRIVIAL 1
  # define yystype YYSTYPE /* obsolescent; will be withdrawn */
@@ -122,4 +128,18 @@ typedef union YYSTYPE
  
  extern YYSTYPE yylval;
  
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
  
+#endif /* !YY_YY_CALC_H_INCLUDED  */
diff --git a/demos/calc/calclex.c b/demos/calc/calclex.c

index df6f33156223c04bff19e1740b5a37521d8c33d6..d61e32b60d2d2fb3cb27a25bbea1ca8d0363d033 100644 (file)
--- a/demos/calc/calclex.c
+++ b/demos/calc/calclex.c
@@ -8,7 +8,7 @@
  #define FLEX_SCANNER
  #define YY_FLEX_MAJOR_VERSION 2
  #define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
+#define YY_FLEX_SUBMINOR_VERSION 37
  #if YY_FLEX_SUBMINOR_VERSION > 0
  #define FLEX_BETA
  #endif
@@ -53,7 +53,6 @@ typedef int flex_int32_t;
  typedef unsigned char flex_uint8_t; 
  typedef unsigned short int flex_uint16_t;
  typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
  
  /* Limits of integral types. */
  #ifndef INT8_MIN
@@ -84,6 +83,8 @@ typedef unsigned int flex_uint32_t;
  #define UINT32_MAX             (4294967295U)
  #endif
  
+#endif /* ! C99 */
+
  #endif /* ! FLEXINT_H */
  
  #ifdef __cplusplus
@@ -152,7 +153,12 @@ typedef unsigned int flex_uint32_t;
  typedef struct yy_buffer_state *YY_BUFFER_STATE;
  #endif
  
-extern int yyleng;
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+extern yy_size_t yyleng;
  
  extern FILE *yyin, *yyout;
  
@@ -178,11 +184,6 @@ extern FILE *yyin, *yyout;
  
  #define unput(c) yyunput( c, (yytext_ptr)  )
  
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
  #ifndef YY_STRUCT_YY_BUFFER_STATE
  #define YY_STRUCT_YY_BUFFER_STATE
  struct yy_buffer_state
@@ -200,7 +201,7 @@ struct yy_buffer_state
         /* Number of characters read into yy_ch_buf, not including EOB
          * characters.
          */
-       int yy_n_chars;
+       yy_size_t yy_n_chars;
  
         /* Whether we "own" the buffer - i.e., we know we created it,
          * and can realloc() it to grow it, and should free() it to
@@ -270,8 +271,8 @@ static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
  
  /* yy_hold_char holds the character lost when yytext is formed. */
  static char yy_hold_char;
-static int yy_n_chars;         /* number of characters read into yy_ch_buf */
-int yyleng;
+static yy_size_t yy_n_chars;           /* number of characters read into yy_ch_buf */
+yy_size_t yyleng;
  
  /* Points to current character in buffer. */
  static char *yy_c_buf_p = (char *) 0;
@@ -299,7 +300,7 @@ static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file  );
  
  YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size  );
  YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str  );
-YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len  );
+YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len  );
  
  void *yyalloc (yy_size_t  );
  void *yyrealloc (void *,yy_size_t  );
@@ -522,7 +523,7 @@ const struct calc_keywords_t  calc_keywords[] = {
    { "sqrt",      SQRT },
    { NULL }
  };
-#line 526 "calclex.c"
+#line 527 "calclex.c"
  
  #define INITIAL 0
  
@@ -561,7 +562,7 @@ FILE *yyget_out (void );
  
  void yyset_out  (FILE * out_str  );
  
-int yyget_leng (void );
+yy_size_t yyget_leng (void );
  
  char *yyget_text (void );
  
@@ -611,7 +612,7 @@ static int input (void );
  /* This used to be an fputs(), but since the string might contain NUL's,
   * we now use fwrite().
   */
-#define ECHO fwrite( yytext, yyleng, 1, yyout )
+#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
  #endif
  
  /* Gets input and stuffs it into "buf".  number of characters read, or YY_NULL,
@@ -622,7 +623,7 @@ static int input (void );
         if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
                 { \
                 int c = '*'; \
-               int n; \
+               size_t n; \
                 for ( n = 0; n < max_size && \
                              (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
                         buf[n] = (char) c; \
@@ -707,7 +708,7 @@ YY_DECL
  #line 57 "calclex.l"
  
  
-#line 711 "calclex.c"
+#line 712 "calclex.c"
  
         if ( !(yy_init) )
                 {
@@ -910,7 +911,7 @@ YY_RULE_SETUP
  #line 107 "calclex.l"
  ECHO;
         YY_BREAK
-#line 914 "calclex.c"
+#line 915 "calclex.c"
  case YY_STATE_EOF(INITIAL):
         yyterminate();
  
@@ -1096,21 +1097,21 @@ static int yy_get_next_buffer (void)
  
         else
                 {
-                       int num_to_read =
+                       yy_size_t num_to_read =
                         YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
  
                 while ( num_to_read <= 0 )
                         { /* Not enough room in the buffer - grow it. */
  
                         /* just a shorter name for the current buffer */
-                       YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
+                       YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
  
                         int yy_c_buf_p_offset =
                                 (int) ((yy_c_buf_p) - b->yy_ch_buf);
  
                         if ( b->yy_is_our_buffer )
                                 {
-                               int new_size = b->yy_buf_size * 2;
+                               yy_size_t new_size = b->yy_buf_size * 2;
  
                                 if ( new_size <= 0 )
                                         b->yy_buf_size += b->yy_buf_size / 8;
@@ -1141,7 +1142,7 @@ static int yy_get_next_buffer (void)
  
                 /* Read in more data. */
                 YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
-                       (yy_n_chars), (size_t) num_to_read );
+                       (yy_n_chars), num_to_read );
  
                 YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
                 }
@@ -1236,7 +1237,7 @@ static int yy_get_next_buffer (void)
         yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
         yy_is_jam = (yy_current_state == 38);
  
-       return yy_is_jam ? 0 : yy_current_state;
+               return yy_is_jam ? 0 : yy_current_state;
  }
  
      static void yyunput (int c, register char * yy_bp )
@@ -1251,7 +1252,7 @@ static int yy_get_next_buffer (void)
         if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
                 { /* need to shift things up to make room */
                 /* +2 for EOB chars. */
-               register int number_to_move = (yy_n_chars) + 2;
+               register yy_size_t number_to_move = (yy_n_chars) + 2;
                 register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
                                         YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
                 register char *source =
@@ -1300,7 +1301,7 @@ static int yy_get_next_buffer (void)
  
                 else
                         { /* need more input */
-                       int offset = (yy_c_buf_p) - (yytext_ptr);
+                       yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
                         ++(yy_c_buf_p);
  
                         switch ( yy_get_next_buffer(  ) )
@@ -1460,10 +1461,6 @@ static void yy_load_buffer_state  (void)
         yyfree((void *) b  );
  }
  
-#ifndef __cplusplus
-extern int isatty (int );
-#endif /* __cplusplus */
-    
  /* Initializes or reinitializes a buffer.
   * This function is sometimes called more than once on the same buffer,
   * such as during a yyrestart() or at EOF.
@@ -1576,7 +1573,7 @@ void yypop_buffer_state (void)
   */
  static void yyensure_buffer_stack (void)
  {
-       int num_to_alloc;
+       yy_size_t num_to_alloc;
      
         if (!(yy_buffer_stack)) {
  
@@ -1668,12 +1665,12 @@ YY_BUFFER_STATE yy_scan_string (yyconst char * yystr )
  
  /** Setup the input buffer state to scan the given bytes. The next call to yylex() will
   * scan from a @e copy of @a bytes.
- * @param bytes the byte buffer to scan
- * @param len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
   * 
   * @return the newly allocated buffer state object.
   */
-YY_BUFFER_STATE yy_scan_bytes  (yyconst char * yybytes, int  _yybytes_len )
+YY_BUFFER_STATE yy_scan_bytes  (yyconst char * yybytes, yy_size_t  _yybytes_len )
  {
         YY_BUFFER_STATE b;
         char *buf;
@@ -1760,7 +1757,7 @@ FILE *yyget_out  (void)
  /** Get the length of the current token.
   * 
   */
-int yyget_leng  (void)
+yy_size_t yyget_leng  (void)
  {
          return yyleng;
  }
diff --git a/demos/expr/Makefile.in b/demos/expr/Makefile.in

index 5f959479ab9eb5ebca0669cac9a626c8639c7663..f4a000bc56635e3c7bfb1c4d1e53ebe9c1ebc6b5 100644 (file)
--- a/demos/expr/Makefile.in
+++ b/demos/expr/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -32,6 +32,23 @@
  # You should have received a copy of the GNU Lesser General Public License
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -50,13 +67,12 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  EXTRA_PROGRAMS = run-expr$(EXEEXT) t-expr$(EXEEXT)
  subdir = demos/expr
  DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -65,16 +81,16 @@ CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  libexpr_a_AR = $(AR) $(ARFLAGS)
  libexpr_a_LIBADD =
-am_libexpr_a_OBJECTS = expr$U.$(OBJEXT) exprv$U.$(OBJEXT) \
-       exprz$U.$(OBJEXT) exprza$U.$(OBJEXT) exprq$U.$(OBJEXT) \
-       exprqa$U.$(OBJEXT) exprf$U.$(OBJEXT) exprfa$U.$(OBJEXT)
+am_libexpr_a_OBJECTS = expr.$(OBJEXT) exprv.$(OBJEXT) exprz.$(OBJEXT) \
+       exprza.$(OBJEXT) exprq.$(OBJEXT) exprqa.$(OBJEXT) \
+       exprf.$(OBJEXT) exprfa.$(OBJEXT)
  libexpr_a_OBJECTS = $(am_libexpr_a_OBJECTS)
  run_expr_SOURCES = run-expr.c
-run_expr_OBJECTS = run-expr$U.$(OBJEXT)
+run_expr_OBJECTS = run-expr.$(OBJEXT)
  run_expr_LDADD = $(LDADD)
  run_expr_DEPENDENCIES = libexpr.a $(top_builddir)/libgmp.la
  t_expr_SOURCES = t-expr.c
-t_expr_OBJECTS = t-expr$U.$(OBJEXT)
+t_expr_OBJECTS = t-expr.$(OBJEXT)
  t_expr_DEPENDENCIES = $(top_builddir)/tests/libtests.la $(LDADD)
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
  depcomp =
@@ -90,6 +106,11 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(libexpr_a_SOURCES) run-expr.c t-expr.c
  DIST_SOURCES = $(libexpr_a_SOURCES) run-expr.c t-expr.c
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -191,8 +212,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -239,7 +260,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -302,14 +322,14 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
  $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
         cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
  $(am__aclocal_m4_deps):
-libexpr.a: $(libexpr_a_OBJECTS) $(libexpr_a_DEPENDENCIES) 
+libexpr.a: $(libexpr_a_OBJECTS) $(libexpr_a_DEPENDENCIES) $(EXTRA_libexpr_a_DEPENDENCIES) 
         -rm -f libexpr.a
         $(libexpr_a_AR) libexpr.a $(libexpr_a_OBJECTS) $(libexpr_a_LIBADD)
         $(RANLIB) libexpr.a
-run-expr$(EXEEXT): $(run_expr_OBJECTS) $(run_expr_DEPENDENCIES) 
+run-expr$(EXEEXT): $(run_expr_OBJECTS) $(run_expr_DEPENDENCIES) $(EXTRA_run_expr_DEPENDENCIES) 
         @rm -f run-expr$(EXEEXT)
         $(LINK) $(run_expr_OBJECTS) $(run_expr_LDADD) $(LIBS)
-t-expr$(EXEEXT): $(t_expr_OBJECTS) $(t_expr_DEPENDENCIES) 
+t-expr$(EXEEXT): $(t_expr_OBJECTS) $(t_expr_DEPENDENCIES) $(EXTRA_t_expr_DEPENDENCIES) 
         @rm -f t-expr$(EXEEXT)
         $(LINK) $(t_expr_OBJECTS) $(t_expr_LDADD) $(LIBS)
  
@@ -318,11 +338,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -332,31 +347,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-expr_.c: expr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/expr.c; then echo $(srcdir)/expr.c; else echo expr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprf_.c: exprf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprf.c; then echo $(srcdir)/exprf.c; else echo exprf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprfa_.c: exprfa.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprfa.c; then echo $(srcdir)/exprfa.c; else echo exprfa.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprq_.c: exprq.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprq.c; then echo $(srcdir)/exprq.c; else echo exprq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprqa_.c: exprqa.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprqa.c; then echo $(srcdir)/exprqa.c; else echo exprqa.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprv_.c: exprv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprv.c; then echo $(srcdir)/exprv.c; else echo exprv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprz_.c: exprz.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprz.c; then echo $(srcdir)/exprz.c; else echo exprz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprza_.c: exprza.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprza.c; then echo $(srcdir)/exprza.c; else echo exprza.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-run-expr_.c: run-expr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/run-expr.c; then echo $(srcdir)/run-expr.c; else echo run-expr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-expr_.c: t-expr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-expr.c; then echo $(srcdir)/t-expr.c; else echo t-expr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-expr_.$(OBJEXT) expr_.lo exprf_.$(OBJEXT) exprf_.lo exprfa_.$(OBJEXT) \
-exprfa_.lo exprq_.$(OBJEXT) exprq_.lo exprqa_.$(OBJEXT) exprqa_.lo \
-exprv_.$(OBJEXT) exprv_.lo exprz_.$(OBJEXT) exprz_.lo \
-exprza_.$(OBJEXT) exprza_.lo run-expr_.$(OBJEXT) run-expr_.lo \
-t-expr_.$(OBJEXT) t-expr_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -460,10 +450,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -531,7 +526,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -544,7 +539,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool ctags distclean distclean-compile \
@@ -556,8 +551,8 @@ uninstall-am:
         install-pdf-am install-ps install-ps-am install-strip \
         installcheck installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  allprogs: $(EXTRA_PROGRAMS)
diff --git a/demos/expr/expr-impl.h b/demos/expr/expr-impl.h

index e6050c5622ecddcdd54a0b71e19b833319484534..84c5bce8812689007397991feb1d648d1414df57 100644 (file)
--- a/demos/expr/expr-impl.h
+++ b/demos/expr/expr-impl.h
@@ -53,83 +53,76 @@ union mpX_t {
  };
  
  typedef union mpX_t *mpX_ptr;
-typedef __gmp_const union mpX_t *mpX_srcptr;
-
-typedef void (*mpexpr_fun_one_t) __GMP_PROTO ((mpX_ptr));
-typedef unsigned long (*mpexpr_fun_ui_one_t) __GMP_PROTO ((mpX_ptr));
-
-typedef void (*mpexpr_fun_0ary_t) __GMP_PROTO ((mpX_ptr));
-typedef int  (*mpexpr_fun_i_0ary_t) __GMP_PROTO ((void));
-
-typedef void (*mpexpr_fun_unary_t) __GMP_PROTO ((mpX_ptr, mpX_srcptr));
-typedef void (*mpexpr_fun_unary_ui_t) __GMP_PROTO ((mpX_ptr, unsigned long));
-typedef int  (*mpexpr_fun_i_unary_t) __GMP_PROTO ((mpX_srcptr));
-typedef int  (*mpexpr_fun_i_unary_ui_t) __GMP_PROTO ((unsigned long));
-
-typedef void (*mpexpr_fun_binary_t) __GMP_PROTO ((mpX_ptr, mpX_srcptr, mpX_srcptr));
-typedef void (*mpexpr_fun_binary_ui_t) __GMP_PROTO ((mpX_ptr, mpX_srcptr, unsigned long));
-typedef int  (*mpexpr_fun_i_binary_t) __GMP_PROTO ((mpX_srcptr, mpX_srcptr));
-typedef int  (*mpexpr_fun_i_binary_ui_t) __GMP_PROTO ((mpX_srcptr, unsigned long));
-
-typedef void (*mpexpr_fun_ternary_t)
-     __GMP_PROTO ((mpX_ptr, mpX_srcptr, mpX_srcptr, mpX_srcptr));
-typedef void (*mpexpr_fun_ternary_ui_t)
-     __GMP_PROTO ((mpX_ptr, mpX_srcptr, mpX_srcptr, unsigned long));
-typedef int (*mpexpr_fun_i_ternary_t)
-     __GMP_PROTO ((mpX_srcptr, mpX_srcptr, mpX_srcptr));
-typedef int (*mpexpr_fun_i_ternary_ui_t)
-     __GMP_PROTO ((mpX_srcptr, mpX_srcptr, unsigned long));
-
-typedef size_t (*mpexpr_fun_number_t)
-     __GMP_PROTO ((mpX_ptr, __gmp_const char *str, size_t len, int base));
-typedef void (*mpexpr_fun_swap_t) __GMP_PROTO ((mpX_ptr, mpX_ptr));
-typedef unsigned long (*mpexpr_fun_get_ui_t) __GMP_PROTO ((mpX_srcptr));
-typedef void (*mpexpr_fun_set_si_t) __GMP_PROTO ((mpX_srcptr, long));
+typedef const union mpX_t *mpX_srcptr;
+
+typedef void (*mpexpr_fun_one_t) (mpX_ptr);
+typedef unsigned long (*mpexpr_fun_ui_one_t) (mpX_ptr);
+
+typedef void (*mpexpr_fun_0ary_t) (mpX_ptr);
+typedef int  (*mpexpr_fun_i_0ary_t) (void);
+
+typedef void (*mpexpr_fun_unary_t) (mpX_ptr, mpX_srcptr);
+typedef void (*mpexpr_fun_unary_ui_t) (mpX_ptr, unsigned long);
+typedef int  (*mpexpr_fun_i_unary_t) (mpX_srcptr);
+typedef int  (*mpexpr_fun_i_unary_ui_t) (unsigned long);
+
+typedef void (*mpexpr_fun_binary_t) (mpX_ptr, mpX_srcptr, mpX_srcptr);
+typedef void (*mpexpr_fun_binary_ui_t) (mpX_ptr, mpX_srcptr, unsigned long);
+typedef int  (*mpexpr_fun_i_binary_t) (mpX_srcptr, mpX_srcptr);
+typedef int  (*mpexpr_fun_i_binary_ui_t) (mpX_srcptr, unsigned long);
+
+typedef void (*mpexpr_fun_ternary_t) (mpX_ptr, mpX_srcptr, mpX_srcptr, mpX_srcptr);
+typedef void (*mpexpr_fun_ternary_ui_t) (mpX_ptr, mpX_srcptr, mpX_srcptr, unsigned long);
+typedef int (*mpexpr_fun_i_ternary_t) (mpX_srcptr, mpX_srcptr, mpX_srcptr);
+typedef int (*mpexpr_fun_i_ternary_ui_t) (mpX_srcptr, mpX_srcptr, unsigned long);
+
+typedef size_t (*mpexpr_fun_number_t) (mpX_ptr, const char *str, size_t len, int base);
+typedef void (*mpexpr_fun_swap_t) (mpX_ptr, mpX_ptr);
+typedef unsigned long (*mpexpr_fun_get_ui_t) (mpX_srcptr);
+typedef void (*mpexpr_fun_set_si_t) (mpX_srcptr, long);
  
  struct mpexpr_control_t {
-  __gmp_const struct mpexpr_operator_t  *op;
-  int                                   argcount;
+  const struct mpexpr_operator_t  *op;
+  int                             argcount;
  };
  
  #define MPEXPR_VARIABLES  26
  
  struct mpexpr_parse_t {
-  __gmp_const struct mpexpr_operator_t  *table;
-
-  mpX_ptr                               res;
-  int                                   base;
-  unsigned long                         prec;
-  __gmp_const char                      *e;
-  size_t                                elen;
-  mpX_srcptr                            *var;
-  int                                   error_code;
-
-  int                                   token;
-  __gmp_const struct mpexpr_operator_t  *token_op;
-
-  union mpX_t                           *data_stack;
-  int                                   data_top;
-  int                                   data_alloc;
-  int                                   data_inited;
-
-  struct mpexpr_control_t               *control_stack;
-  int                                   control_top;
-  int                                   control_alloc;
-
-
-  mpexpr_fun_0ary_t                     mpX_clear;
-  mpexpr_fun_i_unary_t                  mpX_ulong_p;
-  mpexpr_fun_get_ui_t                   mpX_get_ui;
-  mpexpr_fun_unary_ui_t                 mpX_init;
-  mpexpr_fun_number_t                   mpX_number;
-  mpexpr_fun_unary_t                    mpX_set;
-  mpexpr_fun_unary_t                    mpX_set_or_swap;
-  mpexpr_fun_set_si_t                   mpX_set_si;
-  mpexpr_fun_swap_t                     mpX_swap;
+  const struct mpexpr_operator_t  *table;
+
+  mpX_ptr                         res;
+  int                             base;
+  unsigned long                   prec;
+  const char                      *e;
+  size_t                          elen;
+  mpX_srcptr                      *var;
+  int                             error_code;
+
+  int                             token;
+  const struct mpexpr_operator_t  *token_op;
+
+  union mpX_t                     *data_stack;
+  int                             data_top;
+  int                             data_alloc;
+  int                             data_inited;
+
+  struct mpexpr_control_t         *control_stack;
+  int                             control_top;
+  int                             control_alloc;
+
+  mpexpr_fun_0ary_t               mpX_clear;
+  mpexpr_fun_i_unary_t            mpX_ulong_p;
+  mpexpr_fun_get_ui_t             mpX_get_ui;
+  mpexpr_fun_unary_ui_t           mpX_init;
+  mpexpr_fun_number_t             mpX_number;
+  mpexpr_fun_unary_t              mpX_set;
+  mpexpr_fun_unary_t              mpX_set_or_swap;
+  mpexpr_fun_set_si_t             mpX_set_si;
+  mpexpr_fun_swap_t               mpX_swap;
  };
  
  
-int mpexpr_evaluate __GMP_PROTO ((struct mpexpr_parse_t *p));
-int mpexpr_va_to_var __GMP_PROTO ((void *var[], va_list ap));
-size_t mpexpr_mpz_number __GMP_PROTO ((mpz_ptr res,
-                                  __gmp_const char *e, size_t elen, int base));
+int mpexpr_evaluate (struct mpexpr_parse_t *p);
+int mpexpr_va_to_var (void *var[], va_list ap);
+size_t mpexpr_mpz_number (mpz_ptr res, const char *e, size_t elen, int base);
diff --git a/demos/expr/expr.c b/demos/expr/expr.c

index f78c32173853d82fbfccc2b50d22c127f0bb4037..1f4af6cea64f0aecb64319356dbe78e5c4b3a96a 100644 (file)
--- a/demos/expr/expr.c
+++ b/demos/expr/expr.c
@@ -136,7 +136,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  static int
  lookahead (struct mpexpr_parse_t *p, int prefix)
  {
-  __gmp_const struct mpexpr_operator_t  *op, *op_found;
+  const struct mpexpr_operator_t  *op, *op_found;
    size_t  oplen, oplen_found, wlen;
    int     i;
  
@@ -278,7 +278,7 @@ lookahead (struct mpexpr_parse_t *p, int prefix)
     a reference through CP.  */
  #define CONTROL_PUSH(opptr,args)                        \
    do {                                                  \
-    __gmp_const struct mpexpr_operator_t *op = opptr;   \
+    const struct mpexpr_operator_t *op = opptr;                \
      struct mpexpr_control_t *cp;                        \
      CONTROL_SPACE ();                                   \
      p->control_top++;                                   \
@@ -371,7 +371,7 @@ mpexpr_evaluate (struct mpexpr_parse_t *p)
    /* "done" is a special sentinel at the bottom of the control stack,
       precedence -1 is lower than any normal operator.  */
    {
-    static __gmp_const struct mpexpr_operator_t  operator_done
+    static const struct mpexpr_operator_t  operator_done
        = { "DONE", NULL, MPEXPR_TYPE_DONE, -1 };
  
      p->control_alloc = 20;
diff --git a/demos/expr/expr.h b/demos/expr/expr.h

index c3525b61ee325cef7aa579c10a826853d726207d..0cfda0a3356a1a0d5570f2ed1297ac0ac90f847a 100644 (file)
--- a/demos/expr/expr.h
+++ b/demos/expr/expr.h
@@ -98,32 +98,26 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MPEXPR_TYPE_OPERATOR       0x2000
  
  
-typedef void (*mpexpr_fun_t) __GMP_PROTO ((void));
+typedef void (*mpexpr_fun_t) (void);
  
  struct mpexpr_operator_t {
-  __gmp_const char  *name;
-  mpexpr_fun_t      fun;
-  int               type;
-  int               precedence;
+  const char   *name;
+  mpexpr_fun_t fun;
+  int          type;
+  int          precedence;
  };
  
  
-int mpf_expr_a __GMP_PROTO ((__gmp_const struct mpexpr_operator_t *table,
-                       mpf_ptr res, int base, unsigned long prec,
-                       __gmp_const char *e, size_t elen,
-                       mpf_srcptr var[26]));
-int mpf_expr __GMP_PROTO ((mpf_ptr res, int base, __gmp_const char *e, ...));
+int mpf_expr_a (const struct mpexpr_operator_t *, mpf_ptr, int,
+               unsigned long, const char *, size_t, mpf_srcptr [26]);
+int mpf_expr (mpf_ptr, int, const char *, ...);
  
-int mpq_expr_a __GMP_PROTO ((__gmp_const struct mpexpr_operator_t *table,
-                       mpq_ptr res, int base,
-                       __gmp_const char *e, size_t elen,
-                       mpq_srcptr var[26]));
-int mpq_expr __GMP_PROTO ((mpq_ptr res, int base, __gmp_const char *e, ...));
+int mpq_expr_a (const struct mpexpr_operator_t *, mpq_ptr,
+               int, const char *, size_t, mpq_srcptr [26]);
+int mpq_expr (mpq_ptr, int, const char *, ...);
  
-int mpz_expr_a __GMP_PROTO ((__gmp_const struct mpexpr_operator_t *table,
-                       mpz_ptr res, int base,
-                       __gmp_const char *e, size_t elen,
-                       mpz_srcptr var[26]));
-int mpz_expr __GMP_PROTO ((mpz_ptr res, int base, __gmp_const char *e, ...));
+int mpz_expr_a (const struct mpexpr_operator_t *, mpz_ptr, int,
+               const char *, size_t, mpz_srcptr [26]);
+int mpz_expr (mpz_ptr, int, const char *, ...);
  
  #endif
diff --git a/demos/expr/exprf.c b/demos/expr/exprf.c

index c67ee51783b2e0bd64404d1b2da83eb43dc2b9b6..98ce834798acdfe4e1662943f99a4968cfc212cb 100644 (file)
--- a/demos/expr/exprf.c
+++ b/demos/expr/exprf.c
@@ -34,7 +34,7 @@ e_mpf_sgn (mpf_srcptr x)
  }
  
  
-static __gmp_const struct mpexpr_operator_t  _mpf_expr_standard_table[] = {
+static const struct mpexpr_operator_t  _mpf_expr_standard_table[] = {
  
    { "**",  (mpexpr_fun_t) mpf_pow_ui,
      MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC,                   220 },
@@ -88,13 +88,13 @@ static __gmp_const struct mpexpr_operator_t  _mpf_expr_standard_table[] = {
    { NULL }
  };
  
-__gmp_const struct mpexpr_operator_t * __gmp_const mpf_expr_standard_table
+const struct mpexpr_operator_t * const mpf_expr_standard_table
  = _mpf_expr_standard_table;
  
  
  int
  #if HAVE_STDARG
-mpf_expr (mpf_ptr res, int base, __gmp_const char *e, ...)
+mpf_expr (mpf_ptr res, int base, const char *e, ...)
  #else
  mpf_expr (va_alist)
       va_dcl
@@ -106,13 +106,13 @@ mpf_expr (va_alist)
  #if HAVE_STDARG
    va_start (ap, e);
  #else
-  mpf_ptr           res;
-  int               base;
-  __gmp_const char  *e;
+  mpf_ptr     res;
+  int         base;
+  const char  *e;
    va_start (ap);
    res  = va_arg (ap, mpf_ptr);
    base = va_arg (ap, int);
-  e    = va_arg (ap, __gmp_const char *);
+  e    = va_arg (ap, const char *);
  #endif
  
    TRACE (printf ("mpf_expr(): base %d, %s\n", base, e));
diff --git a/demos/expr/exprfa.c b/demos/expr/exprfa.c

index 1cc00e0f04279cefccbe977a0ab13deb84130d08..4ab294e08866468494e20e2c1b2eb0095b5b3fa8 100644 (file)
--- a/demos/expr/exprfa.c
+++ b/demos/expr/exprfa.c
@@ -35,7 +35,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  
  static size_t
-e_mpf_number (mpf_ptr res, __gmp_const char *e, size_t elen, int base)
+e_mpf_number (mpf_ptr res, const char *e, size_t elen, int base)
  {
    char    *edup;
    size_t  i, ret, extra=0;
@@ -151,9 +151,9 @@ e_mpf_set_or_swap (mpf_ptr w, mpf_ptr x)
  
  
  int
-mpf_expr_a (__gmp_const struct mpexpr_operator_t *table,
+mpf_expr_a (const struct mpexpr_operator_t *table,
              mpf_ptr res, int base, unsigned long prec,
-            __gmp_const char *e, size_t elen,
+            const char *e, size_t elen,
              mpf_srcptr var[26])
  {
    struct mpexpr_parse_t  p;
diff --git a/demos/expr/exprq.c b/demos/expr/exprq.c

index af91b00167d1a3cf4da29399d8d608b658973178..a16b2165c1ec20f2ae934f9c511e70899d757aab 100644 (file)
--- a/demos/expr/exprq.c
+++ b/demos/expr/exprq.c
@@ -72,7 +72,7 @@ e_mpq_den (mpq_ptr w, mpq_srcptr x)
  }
  
  
-static __gmp_const struct mpexpr_operator_t  _mpq_expr_standard_table[] = {
+static const struct mpexpr_operator_t  _mpq_expr_standard_table[] = {
  
    { "**",  (mpexpr_fun_t) e_mpq_pow_ui,
      MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC,                   220 },
@@ -121,13 +121,13 @@ static __gmp_const struct mpexpr_operator_t  _mpq_expr_standard_table[] = {
    { NULL }
  };
  
-__gmp_const struct mpexpr_operator_t * __gmp_const mpq_expr_standard_table
+const struct mpexpr_operator_t * const mpq_expr_standard_table
  = _mpq_expr_standard_table;
  
  
  int
  #if HAVE_STDARG
-mpq_expr (mpq_ptr res, int base, __gmp_const char *e, ...)
+mpq_expr (mpq_ptr res, int base, const char *e, ...)
  #else
  mpq_expr (va_alist)
       va_dcl
@@ -139,13 +139,13 @@ mpq_expr (va_alist)
  #if HAVE_STDARG
    va_start (ap, e);
  #else
-  mpq_ptr           res;
-  int               base;
-  __gmp_const char  *e;
+  mpq_ptr     res;
+  int         base;
+  const char  *e;
    va_start (ap);
    res  = va_arg (ap, mpq_ptr);
    base = va_arg (ap, int);
-  e    = va_arg (ap, __gmp_const char *);
+  e    = va_arg (ap, const char *);
  #endif
  
    TRACE (printf ("mpq_expr(): base %d, %s\n", base, e));
diff --git a/demos/expr/exprqa.c b/demos/expr/exprqa.c

index c56f1ca691563403f5abd1568b6e3b5e20c4525e..8eaf602486e8eb98df216228d9583d3297da97c7 100644 (file)
--- a/demos/expr/exprqa.c
+++ b/demos/expr/exprqa.c
@@ -46,7 +46,7 @@ e_mpq_set_si1 (mpq_ptr q, long num)
  /* The same as mpz, but putting the result in the numerator.  Negatives and
     fractions aren't parsed here because '-' and '/' are operators. */
  static size_t
-e_mpq_number (mpq_ptr res, __gmp_const char *e, size_t elen, int base)
+e_mpq_number (mpq_ptr res, const char *e, size_t elen, int base)
  {
    mpz_set_ui (mpq_denref (res), 1L);
    return mpexpr_mpz_number (mpq_numref (res), e, elen, base);
@@ -61,9 +61,9 @@ e_mpq_init (mpq_ptr q, unsigned long prec)
  }
  
  int
-mpq_expr_a (__gmp_const struct mpexpr_operator_t *table,
+mpq_expr_a (const struct mpexpr_operator_t *table,
              mpq_ptr res, int base,
-            __gmp_const char *e, size_t elen,
+            const char *e, size_t elen,
              mpq_srcptr var[26])
  {
    struct mpexpr_parse_t  p;
diff --git a/demos/expr/exprz.c b/demos/expr/exprz.c

index 7c01b9dc5fdd744ddbd25f591f52d48746a6aa7d..3d4ec6582881d5ebd931937d9cfe68c7eecdd2f8 100644 (file)
--- a/demos/expr/exprz.c
+++ b/demos/expr/exprz.c
@@ -86,7 +86,7 @@ e_mpz_clrbit (mpz_ptr w, mpz_srcptr x, unsigned long n)
    mpz_clrbit (w, n);
  }
  
-static __gmp_const struct mpexpr_operator_t  _mpz_expr_standard_table[] = {
+static const struct mpexpr_operator_t  _mpz_expr_standard_table[] = {
  
    { "**",  (mpexpr_fun_t) mpz_pow_ui,
      MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC,                  220 },
@@ -172,13 +172,13 @@ static __gmp_const struct mpexpr_operator_t  _mpz_expr_standard_table[] = {
  
  /* The table is available globally only through a pointer, so the table size
     can change without breaking binary compatibility. */
-__gmp_const struct mpexpr_operator_t * __gmp_const mpz_expr_standard_table
+const struct mpexpr_operator_t * const mpz_expr_standard_table
  = _mpz_expr_standard_table;
  
  
  int
  #if HAVE_STDARG
-mpz_expr (mpz_ptr res, int base, __gmp_const char *e, ...)
+mpz_expr (mpz_ptr res, int base, const char *e, ...)
  #else
  mpz_expr (va_alist)
       va_dcl
@@ -190,13 +190,13 @@ mpz_expr (va_alist)
  #if HAVE_STDARG
    va_start (ap, e);
  #else
-  mpz_ptr           res;
-  int               base;
-  __gmp_const char  *e;
+  mpz_ptr     res;
+  int         base;
+  const char  *e;
    va_start (ap);
    res  = va_arg (ap, mpz_ptr);
    base = va_arg (ap, int);
-  e    = va_arg (ap, __gmp_const char *);
+  e    = va_arg (ap, const char *);
  #endif
  
    TRACE (printf ("mpz_expr(): base %d, %s\n", base, e));
diff --git a/demos/expr/exprza.c b/demos/expr/exprza.c

index 81e16ce28612ece23a584b6920807f65a6e05d5a..dc544228081ad4ce0a4c328a580341b4850e89a9 100644 (file)
--- a/demos/expr/exprza.c
+++ b/demos/expr/exprza.c
@@ -27,7 +27,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  /* No need to parse '-' since that's handled as an operator.
     This function also by mpq_expr_a, so it's not static.  */
  size_t
-mpexpr_mpz_number (mpz_ptr res, __gmp_const char *e, size_t elen, int base)
+mpexpr_mpz_number (mpz_ptr res, const char *e, size_t elen, int base)
  {
    char    *edup;
    size_t  i, ret;
@@ -69,9 +69,9 @@ e_mpz_init (mpz_ptr z, unsigned long prec)
  }
  
  int
-mpz_expr_a (__gmp_const struct mpexpr_operator_t *table,
+mpz_expr_a (const struct mpexpr_operator_t *table,
              mpz_ptr res, int base,
-            __gmp_const char *e, size_t elen,
+            const char *e, size_t elen,
              mpz_srcptr var[26])
  {
    struct mpexpr_parse_t  p;
diff --git a/demos/factorize.c b/demos/factorize.c

index 67cf0ccb32ac098f05d1be7ef3e0329cb698782a..9c9c6dbffbc6d5d6b14561e47b70fdc0a5342ccd 100644 (file)
--- a/demos/factorize.c
+++ b/demos/factorize.c
@@ -1,6 +1,6 @@
  /* Factoring with Pollard's rho method.
  
-Copyright 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2009
+Copyright 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2009, 2012
  Free Software Foundation, Inc.
  
  This program is free software; you can redistribute it and/or modify it under
@@ -19,138 +19,268 @@ this program.  If not, see http://www.gnu.org/licenses/.  */
  #include <stdlib.h>
  #include <stdio.h>
  #include <string.h>
+#include <inttypes.h>
  
  #include "gmp.h"
  
+static unsigned char primes_diff[] = {
+#define P(a,b,c) a,
+#include "primes.h"
+#undef P
+};
+#define PRIMES_PTAB_ENTRIES (sizeof(primes_diff) / sizeof(primes_diff[0]))
+
  int flag_verbose = 0;
  
-static unsigned add[] = {4, 2, 4, 2, 4, 6, 2, 6};
+/* Prove primality or run probabilistic tests.  */
+int flag_prove_primality = 1;
+
+/* Number of Miller-Rabin tests to run when not proving primality. */
+#define MR_REPS 25
+
+struct factors
+{
+  mpz_t         *p;
+  unsigned long *e;
+  long nfactors;
+};
+
+void factor (mpz_t, struct factors *);
  
  void
-factor_using_division (mpz_t t, unsigned int limit)
+factor_init (struct factors *factors)
  {
-  mpz_t q, r;
-  unsigned long int f;
-  int ai;
-  unsigned *addv = add;
-  unsigned int failures;
+  factors->p = malloc (1);
+  factors->e = malloc (1);
+  factors->nfactors = 0;
+}
  
-  if (flag_verbose > 0)
+void
+factor_clear (struct factors *factors)
+{
+  int i;
+
+  for (i = 0; i < factors->nfactors; i++)
+    mpz_clear (factors->p[i]);
+
+  free (factors->p);
+  free (factors->e);
+}
+
+void
+factor_insert (struct factors *factors, mpz_t prime)
+{
+  long    nfactors  = factors->nfactors;
+  mpz_t         *p  = factors->p;
+  unsigned long *e  = factors->e;
+  long i, j;
+
+  /* Locate position for insert new or increment e.  */
+  for (i = nfactors - 1; i >= 0; i--)
      {
-      printf ("[trial division (%u)] ", limit);
-      fflush (stdout);
+      if (mpz_cmp (p[i], prime) <= 0)
+       break;
      }
  
-  mpz_init (q);
-  mpz_init (r);
+  if (i < 0 || mpz_cmp (p[i], prime) != 0)
+    {
+      p = realloc (p, (nfactors + 1) * sizeof p[0]);
+      e = realloc (e, (nfactors + 1) * sizeof e[0]);
+
+      mpz_init (p[nfactors]);
+      for (j = nfactors - 1; j > i; j--)
+       {
+         mpz_set (p[j + 1], p[j]);
+         e[j + 1] = e[j];
+       }
+      mpz_set (p[i + 1], prime);
+      e[i + 1] = 1;
  
-  f = mpz_scan1 (t, 0);
-  mpz_div_2exp (t, t, f);
-  while (f)
+      factors->p = p;
+      factors->e = e;
+      factors->nfactors = nfactors + 1;
+    }
+  else
      {
-      printf ("2 ");
-      fflush (stdout);
-      --f;
+      e[i] += 1;
      }
+}
  
-  for (;;)
+void
+factor_insert_ui (struct factors *factors, unsigned long prime)
+{
+  mpz_t pz;
+
+  mpz_init_set_ui (pz, prime);
+  factor_insert (factors, pz);
+  mpz_clear (pz);
+}
+
+
+void
+factor_using_division (mpz_t t, struct factors *factors)
+{
+  mpz_t q;
+  unsigned long int p;
+  int i;
+
+  if (flag_verbose > 0)
      {
-      mpz_tdiv_qr_ui (q, r, t, 3);
-      if (mpz_cmp_ui (r, 0) != 0)
-       break;
-      mpz_set (t, q);
-      printf ("3 ");
-      fflush (stdout);
+      printf ("[trial division] ");
      }
  
-  for (;;)
+  mpz_init (q);
+
+  p = mpz_scan1 (t, 0);
+  mpz_div_2exp (t, t, p);
+  while (p)
      {
-      mpz_tdiv_qr_ui (q, r, t, 5);
-      if (mpz_cmp_ui (r, 0) != 0)
-       break;
-      mpz_set (t, q);
-      printf ("5 ");
-      fflush (stdout);
+      factor_insert_ui (factors, 2);
+      --p;
      }
  
-  failures = 0;
-  f = 7;
-  ai = 0;
-  while (mpz_cmp_ui (t, 1) != 0)
+  p = 3;
+  for (i = 1; i <= PRIMES_PTAB_ENTRIES;)
      {
-      mpz_tdiv_qr_ui (q, r, t, f);
-      if (mpz_cmp_ui (r, 0) != 0)
+      if (! mpz_divisible_ui_p (t, p))
         {
-         f += addv[ai];
-         if (mpz_cmp_ui (q, f) < 0)
-           break;
-         ai = (ai + 1) & 7;
-         failures++;
-         if (failures > limit)
+         p += primes_diff[i++];
+         if (mpz_cmp_ui (t, p * p) < 0)
             break;
         }
        else
         {
-         mpz_swap (t, q);
-         printf ("%lu ", f);
-         fflush (stdout);
-         failures = 0;
+         mpz_tdiv_q_ui (t, t, p);
+         factor_insert_ui (factors, p);
         }
      }
  
-  mpz_clears (q, r, NULL);
+  mpz_clear (q);
  }
  
-void
-factor_using_division_2kp (mpz_t t, unsigned int limit, unsigned long p)
+static int
+mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
+               mpz_srcptr q, unsigned long int k)
  {
-  mpz_t r;
-  mpz_t f;
-  unsigned int k;
+  unsigned long int i;
  
-  if (flag_verbose > 0)
+  mpz_powm (y, x, q, n);
+
+  if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
+    return 1;
+
+  for (i = 1; i < k; i++)
+    {
+      mpz_powm_ui (y, y, 2, n);
+      if (mpz_cmp (y, nm1) == 0)
+       return 1;
+      if (mpz_cmp_ui (y, 1) == 0)
+       return 0;
+    }
+  return 0;
+}
+
+int
+mp_prime_p (mpz_t n)
+{
+  int k, r, is_prime;
+  mpz_t q, a, nm1, tmp;
+  struct factors factors;
+
+  if (mpz_cmp_ui (n, 1) <= 0)
+    return 0;
+
+  /* We have already casted out small primes. */
+  if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
+    return 1;
+
+  mpz_inits (q, a, nm1, tmp, NULL);
+
+  /* Precomputation for Miller-Rabin.  */
+  mpz_sub_ui (nm1, n, 1);
+
+  /* Find q and k, where q is odd and n = 1 + 2**k * q.  */
+  k = mpz_scan1 (nm1, 0);
+  mpz_tdiv_q_2exp (q, nm1, k);
+
+  mpz_set_ui (a, 2);
+
+  /* Perform a Miller-Rabin test, finds most composites quickly.  */
+  if (!mp_millerrabin (n, nm1, a, tmp, q, k))
      {
-      printf ("[trial division (%u)] ", limit);
-      fflush (stdout);
+      is_prime = 0;
+      goto ret2;
      }
  
-  mpz_init (r);
-  mpz_init_set_ui (f, 2 * p);
-  mpz_add_ui (f, f, 1);
-  for (k = 1; k < limit; k++)
+  if (flag_prove_primality)
      {
-      mpz_tdiv_r (r, t, f);
-      while (mpz_cmp_ui (r, 0) == 0)
+      /* Factor n-1 for Lucas.  */
+      mpz_set (tmp, nm1);
+      factor (tmp, &factors);
+    }
+
+  /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
+     number composite.  */
+  for (r = 0; r < PRIMES_PTAB_ENTRIES; r++)
+    {
+      int i;
+
+      if (flag_prove_primality)
+       {
+         is_prime = 1;
+         for (i = 0; i < factors.nfactors && is_prime; i++)
+           {
+             mpz_divexact (tmp, nm1, factors.p[i]);
+             mpz_powm (tmp, a, tmp, n);
+             is_prime = mpz_cmp_ui (tmp, 1) != 0;
+           }
+       }
+      else
         {
-         mpz_tdiv_q (t, t, f);
-         mpz_tdiv_r (r, t, f);
-         mpz_out_str (stdout, 10, f);
-         fflush (stdout);
-         fputc (' ', stdout);
+         /* After enough Miller-Rabin runs, be content. */
+         is_prime = (r == MR_REPS - 1);
+       }
+
+      if (is_prime)
+       goto ret1;
+
+      mpz_add_ui (a, a, primes_diff[r]);       /* Establish new base.  */
+
+      if (!mp_millerrabin (n, nm1, a, tmp, q, k))
+       {
+         is_prime = 0;
+         goto ret1;
         }
-      mpz_add_ui (f, f, 2 * p);
      }
  
-  mpz_clears (f, r, NULL);
+  fprintf (stderr, "Lucas prime test failure.  This should not happen\n");
+  abort ();
+
+ ret1:
+  if (flag_prove_primality)
+    factor_clear (&factors);
+ ret2:
+  mpz_clears (q, a, nm1, tmp, NULL);
+
+  return is_prime;
  }
  
  void
-factor_using_pollard_rho (mpz_t n, unsigned long a, unsigned long p)
+factor_using_pollard_rho (mpz_t n, unsigned long a, struct factors *factors)
  {
-  mpz_t x, x1, y, P;
-  mpz_t t1, t2;
+  mpz_t x, z, y, P;
+  mpz_t t, t2;
    unsigned long long k, l, i;
  
    if (flag_verbose > 0)
      {
        printf ("[pollard-rho (%lu)] ", a);
-      fflush (stdout);
      }
  
-  mpz_inits (t1, t2, NULL);
+  mpz_inits (t, t2, NULL);
    mpz_init_set_si (y, 2);
    mpz_init_set_si (x, 2);
-  mpz_init_set_si (x1, 2);
+  mpz_init_set_si (z, 2);
    mpz_init_set_ui (P, 1);
    k = 1;
    l = 1;
@@ -161,52 +291,32 @@ factor_using_pollard_rho (mpz_t n, unsigned long a, unsigned long p)
         {
           do
             {
-             if (p != 0)
-               {
-                 mpz_powm_ui (x, x, p, n);
-                 mpz_add_ui (x, x, a);
-               }
-             else
-               {
-                 mpz_mul (t1, x, x);
-                 mpz_mod (x, t1, n);
-                 mpz_add_ui (x, x, a);
-               }
+             mpz_mul (t, x, x);
+             mpz_mod (x, t, n);
+             mpz_add_ui (x, x, a);
  
-             mpz_sub (t1, x1, x);
-             mpz_mul (t2, P, t1);
+             mpz_sub (t, z, x);
+             mpz_mul (t2, P, t);
               mpz_mod (P, t2, n);
  
               if (k % 32 == 1)
                 {
-                 mpz_gcd (t1, P, n);
-                 if (mpz_cmp_ui (t1, 1) != 0)
+                 mpz_gcd (t, P, n);
+                 if (mpz_cmp_ui (t, 1) != 0)
                     goto factor_found;
                   mpz_set (y, x);
                 }
             }
           while (--k != 0);
  
-         mpz_gcd (t1, P, n);
-         if (mpz_cmp_ui (t1, 1) != 0)
-           goto factor_found;
-
-         mpz_set (x1, x);
+         mpz_set (z, x);
           k = l;
           l = 2 * l;
           for (i = 0; i < k; i++)
             {
-             if (p != 0)
-               {
-                 mpz_powm_ui (x, x, p, n);
-                 mpz_add_ui (x, x, a);
-               }
-             else
-               {
-                 mpz_mul (t1, x, x);
-                 mpz_mod (x, t1, n);
-                 mpz_add_ui (x, x, a);
-               }
+             mpz_mul (t, x, x);
+             mpz_mod (x, t, n);
+             mpz_add_ui (x, x, a);
             }
           mpz_set (y, x);
         }
@@ -214,92 +324,64 @@ factor_using_pollard_rho (mpz_t n, unsigned long a, unsigned long p)
      factor_found:
        do
         {
-         if (p != 0)
-           {
-             mpz_powm_ui (y, y, p, n); mpz_add_ui (y, y, a);
-           }
-         else
-           {
-             mpz_mul (t1, y, y);
-             mpz_mod (y, t1, n);
-             mpz_add_ui (y, y, a);
-           }
-         mpz_sub (t1, x1, y);
-         mpz_gcd (t1, t1, n);
+         mpz_mul (t, y, y);
+         mpz_mod (y, t, n);
+         mpz_add_ui (y, y, a);
+
+         mpz_sub (t, z, y);
+         mpz_gcd (t, t, n);
         }
-      while (mpz_cmp_ui (t1, 1) == 0);
+      while (mpz_cmp_ui (t, 1) == 0);
  
-      mpz_divexact (n, n, t1); /* divide by t1, before t1 is overwritten */
+      mpz_divexact (n, n, t);  /* divide by t, before t is overwritten */
  
-      if (!mpz_probab_prime_p (t1, 25))
+      if (!mp_prime_p (t))
         {
-         do
-           {
-             mp_limb_t a_limb;
-             mpn_random (&a_limb, (mp_size_t) 1);
-             a = a_limb;
-           }
-         while (a == 0);
-
           if (flag_verbose > 0)
             {
               printf ("[composite factor--restarting pollard-rho] ");
-             fflush (stdout);
             }
-         factor_using_pollard_rho (t1, a, p);
+         factor_using_pollard_rho (t, a + 1, factors);
         }
        else
         {
-         mpz_out_str (stdout, 10, t1);
-         fflush (stdout);
-         fputc (' ', stdout);
+         factor_insert (factors, t);
         }
-      mpz_mod (x, x, n);
-      mpz_mod (x1, x1, n);
-      mpz_mod (y, y, n);
-      if (mpz_probab_prime_p (n, 25))
+
+      if (mp_prime_p (n))
         {
-         mpz_out_str (stdout, 10, n);
-         fflush (stdout);
-         fputc (' ', stdout);
+         factor_insert (factors, n);
           break;
         }
+
+      mpz_mod (x, x, n);
+      mpz_mod (z, z, n);
+      mpz_mod (y, y, n);
      }
  
-  mpz_clears (P, t2, t1, x1, x, y, NULL);
+  mpz_clears (P, t2, t, z, x, y, NULL);
  }
  
  void
-factor (mpz_t t, unsigned long p)
+factor (mpz_t t, struct factors *factors)
  {
-  unsigned int division_limit;
-
-  if (mpz_sgn (t) == 0)
-    return;
-
-  /* Set the trial division limit according the size of t.  */
-  division_limit = mpz_sizeinbase (t, 2);
-  if (division_limit > 1000)
-    division_limit = 1000 * 1000;
-  else
-    division_limit = division_limit * division_limit;
+  factor_init (factors);
  
-  if (p != 0)
-    factor_using_division_2kp (t, division_limit / 10, p);
-  else
-    factor_using_division (t, division_limit);
-
-  if (mpz_cmp_ui (t, 1) != 0)
+  if (mpz_sgn (t) != 0)
      {
-      if (flag_verbose > 0)
+      factor_using_division (t, factors);
+
+      if (mpz_cmp_ui (t, 1) != 0)
         {
-         printf ("[is number prime?] ");
-         fflush (stdout);
+         if (flag_verbose > 0)
+           {
+             printf ("[is number prime?] ");
+           }
+         if (mp_prime_p (t))
+           factor_insert (factors, t);
+         else
+           factor_using_pollard_rho (t, 1, factors);
         }
-      if (mpz_probab_prime_p (t, 25))
-       mpz_out_str (stdout, 10, t);
-      else
-       factor_using_pollard_rho (t, 1L, p);
      }
  }
  
@@ -307,18 +389,18 @@ int
  main (int argc, char *argv[])
  {
    mpz_t t;
-  unsigned long p;
-  int i;
+  int i, j, k;
+  struct factors factors;
  
-  if (argc > 1 && !strcmp (argv[1], "-v"))
+  while (argc > 1)
      {
-      flag_verbose = 1;
-      argv++;
-      argc--;
-    }
-  if (argc > 1 && !strcmp (argv[1], "-q"))
-    {
-      flag_verbose = -1;
+      if (!strcmp (argv[1], "-v"))
+       flag_verbose = 1;
+      else if (!strcmp (argv[1], "-w"))
+       flag_prove_primality = 0;
+      else
+       break;
+
        argv++;
        argc--;
      }
@@ -326,33 +408,19 @@ main (int argc, char *argv[])
    mpz_init (t);
    if (argc > 1)
      {
-      p = 0;
        for (i = 1; i < argc; i++)
         {
-         if (!strncmp (argv[i], "-Mp", 3))
-           {
-             p = atoi (argv[i] + 3);
-             mpz_set_ui (t, 1);
-             mpz_mul_2exp (t, t, p);
-             mpz_sub_ui (t, t, 1);
-           }
-         else if (!strncmp (argv[i], "-2kp", 4))
-           {
-             p = atoi (argv[i] + 4);
-             continue;
-           }
-         else
-           {
-             mpz_set_str (t, argv[i], 0);
-           }
+         mpz_set_str (t, argv[i], 0);
  
-         if (mpz_cmp_ui (t, 0) == 0)
-           puts ("-");
-         else
-           {
-             factor (t, p);
-             puts ("");
-           }
+         gmp_printf ("%Zd:", t);
+         factor (t, &factors);
+
+         for (j = 0; j < factors.nfactors; j++)
+           for (k = 0; k < factors.e[j]; k++)
+             gmp_printf (" %Zd", factors.p[j]);
+
+         puts ("");
+         factor_clear (&factors);
         }
      }
    else
@@ -362,12 +430,16 @@ main (int argc, char *argv[])
           mpz_inp_str (t, stdin, 0);
           if (feof (stdin))
             break;
-         if (flag_verbose >= 0)
-           {
-             mpz_out_str (stdout, 10, t); printf (" = ");
-           }
-         factor (t, 0);
+
+         gmp_printf ("%Zd:", t);
+         factor (t, &factors);
+
+         for (j = 0; j < factors.nfactors; j++)
+           for (k = 0; k < factors.e[j]; k++)
+             gmp_printf (" %Zd", factors.p[j]);
+
           puts ("");
+         factor_clear (&factors);
         }
      }
  
diff --git a/demos/isprime.c b/demos/isprime.c

index 083866b9fbf27a9c607075a6cc3fa62fb1cc060b..782106c0a0ccaf20e0794705b8f034ef436b3074 100644 (file)
--- a/demos/isprime.c
+++ b/demos/isprime.c
@@ -1,7 +1,7 @@
  /* Classify numbers as probable primes, primes or composites.
     With -q return true if the following argument is a (probable) prime.
  
-Copyright 1999, 2000, 2002, 2005 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2002, 2005, 2012 Free Software Foundation, Inc.
  
  This program is free software; you can redistribute it and/or modify it under
  the terms of the GNU General Public License as published by the Free Software
diff --git a/demos/pexpr.c b/demos/pexpr.c

index ec4eecbb2f693eefb9ec6907b1427bf3f3eea7b7..8ecb634f60100603c6b31360aa13d56b1d6f111f 100644 (file)
--- a/demos/pexpr.c
+++ b/demos/pexpr.c
@@ -1,7 +1,8 @@
  /* Program for computing integer expressions using the GNU Multiple Precision
     Arithmetic Library.
  
-Copyright 1997, 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 1997, 1999, 2000, 2001, 2002, 2005, 2008, 2012 Free Software
+Foundation, Inc.
  
  This program is free software; you can redistribute it and/or modify it under
  the terms of the GNU General Public License as published by the Free Software
@@ -109,21 +110,21 @@ struct expr
  
  typedef struct expr *expr_t;
  
-void cleanup_and_exit __GMP_PROTO ((int));
-
-char *skipspace __GMP_PROTO ((char *));
-void makeexp __GMP_PROTO ((expr_t *, enum op_t, expr_t, expr_t));
-void free_expr __GMP_PROTO ((expr_t));
-char *expr __GMP_PROTO ((char *, expr_t *));
-char *term __GMP_PROTO ((char *, expr_t *));
-char *power __GMP_PROTO ((char *, expr_t *));
-char *factor __GMP_PROTO ((char *, expr_t *));
-int match __GMP_PROTO ((char *, char *));
-int matchp __GMP_PROTO ((char *, char *));
-int cputime __GMP_PROTO ((void));
-
-void mpz_eval_expr __GMP_PROTO ((mpz_ptr, expr_t));
-void mpz_eval_mod_expr __GMP_PROTO ((mpz_ptr, expr_t, mpz_ptr));
+void cleanup_and_exit (int);
+
+char *skipspace (char *);
+void makeexp (expr_t *, enum op_t, expr_t, expr_t);
+void free_expr (expr_t);
+char *expr (char *, expr_t *);
+char *term (char *, expr_t *);
+char *power (char *, expr_t *);
+char *factor (char *, expr_t *);
+int match (char *, char *);
+int matchp (char *, char *);
+int cputime (void);
+
+void mpz_eval_expr (mpz_ptr, expr_t);
+void mpz_eval_mod_expr (mpz_ptr, expr_t, mpz_ptr);
  
  char *error;
  int flag_print = 1;
diff --git a/demos/primes.c b/demos/primes.c

index 5e078bd3876a43946bd1fa7b29ec7e519eb69a54..61844b9afc87adaa7c78464542f2a77840c7a5d1 100644 (file)
--- a/demos/primes.c
+++ b/demos/primes.c
@@ -2,7 +2,7 @@
     Written by tege while on holiday in Rodupp, August 2001.
     Between 10 and 500 times faster than previous program.
  
-Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2006, 2012 Free Software Foundation, Inc.
  
  This program is free software; you can redistribute it and/or modify it under
  the terms of the GNU General Public License as published by the Free Software
@@ -61,9 +61,9 @@ struct primes
  struct primes *primes;
  unsigned long n_primes;
  
-void find_primes __GMP_PROTO ((unsigned char *, mpz_t, unsigned long, mpz_t));
-void sieve_region __GMP_PROTO ((unsigned char *, mpz_t, unsigned long));
-void make_primelist __GMP_PROTO ((unsigned long));
+void find_primes (unsigned char *, mpz_t, unsigned long, mpz_t);
+void sieve_region (unsigned char *, mpz_t, unsigned long);
+void make_primelist (unsigned long);
  
  int flag_print = 1;
  int flag_count = 0;
diff --git a/demos/primes.h b/demos/primes.h

new file mode 100644 (file)

index 0000000..b85c7e1
--- /dev/null
+++ b/demos/primes.h
@@ -0,0 +1,552 @@
+P( 1, 0xaaaaaaaaaaaaaaabUL, 0x5555555555555555UL) /* 3 */
+P( 2, 0xcccccccccccccccdUL, 0x3333333333333333UL) /* 5 */
+P( 2, 0x6db6db6db6db6db7UL, 0x2492492492492492UL) /* 7 */
+P( 4, 0x2e8ba2e8ba2e8ba3UL, 0x1745d1745d1745d1UL) /* 11 */
+P( 2, 0x4ec4ec4ec4ec4ec5UL, 0x13b13b13b13b13b1UL) /* 13 */
+P( 4, 0xf0f0f0f0f0f0f0f1UL, 0x0f0f0f0f0f0f0f0fUL) /* 17 */
+P( 2, 0x86bca1af286bca1bUL, 0x0d79435e50d79435UL) /* 19 */
+P( 4, 0xd37a6f4de9bd37a7UL, 0x0b21642c8590b216UL) /* 23 */
+P( 6, 0x34f72c234f72c235UL, 0x08d3dcb08d3dcb08UL) /* 29 */
+P( 2, 0xef7bdef7bdef7bdfUL, 0x0842108421084210UL) /* 31 */
+P( 6, 0x14c1bacf914c1badUL, 0x06eb3e45306eb3e4UL) /* 37 */
+P( 4, 0x8f9c18f9c18f9c19UL, 0x063e7063e7063e70UL) /* 41 */
+P( 2, 0x82fa0be82fa0be83UL, 0x05f417d05f417d05UL) /* 43 */
+P( 4, 0x51b3bea3677d46cfUL, 0x0572620ae4c415c9UL) /* 47 */
+P( 6, 0x21cfb2b78c13521dUL, 0x04d4873ecade304dUL) /* 53 */
+P( 6, 0xcbeea4e1a08ad8f3UL, 0x0456c797dd49c341UL) /* 59 */
+P( 2, 0x4fbcda3ac10c9715UL, 0x04325c53ef368eb0UL) /* 61 */
+P( 6, 0xf0b7672a07a44c6bUL, 0x03d226357e16ece5UL) /* 67 */
+P( 4, 0x193d4bb7e327a977UL, 0x039b0ad12073615aUL) /* 71 */
+P( 2, 0x7e3f1f8fc7e3f1f9UL, 0x0381c0e070381c0eUL) /* 73 */
+P( 6, 0x9b8b577e613716afUL, 0x033d91d2a2067b23UL) /* 79 */
+P( 4, 0xa3784a062b2e43dbUL, 0x03159721ed7e7534UL) /* 83 */
+P( 6, 0xf47e8fd1fa3f47e9UL, 0x02e05c0b81702e05UL) /* 89 */
+P( 8, 0xa3a0fd5c5f02a3a1UL, 0x02a3a0fd5c5f02a3UL) /* 97 */
+P( 4, 0x3a4c0a237c32b16dUL, 0x0288df0cac5b3f5dUL) /* 101 */
+P( 2, 0xdab7ec1dd3431b57UL, 0x027c45979c95204fUL) /* 103 */
+P( 4, 0x77a04c8f8d28ac43UL, 0x02647c69456217ecUL) /* 107 */
+P( 2, 0xa6c0964fda6c0965UL, 0x02593f69b02593f6UL) /* 109 */
+P( 4, 0x90fdbc090fdbc091UL, 0x0243f6f0243f6f02UL) /* 113 */
+P(14, 0x7efdfbf7efdfbf7fUL, 0x0204081020408102UL) /* 127 */
+P( 4, 0x03e88cb3c9484e2bUL, 0x01f44659e4a42715UL) /* 131 */
+P( 6, 0xe21a291c077975b9UL, 0x01de5d6e3f8868a4UL) /* 137 */
+P( 2, 0x3aef6ca970586723UL, 0x01d77b654b82c339UL) /* 139 */
+P(10, 0xdf5b0f768ce2cabdUL, 0x01b7d6c3dda338b2UL) /* 149 */
+P( 2, 0x6fe4dfc9bf937f27UL, 0x01b2036406c80d90UL) /* 151 */
+P( 6, 0x5b4fe5e92c0685b5UL, 0x01a16d3f97a4b01aUL) /* 157 */
+P( 6, 0x1f693a1c451ab30bUL, 0x01920fb49d0e228dUL) /* 163 */
+P( 4, 0x8d07aa27db35a717UL, 0x01886e5f0abb0499UL) /* 167 */
+P( 6, 0x882383b30d516325UL, 0x017ad2208e0ecc35UL) /* 173 */
+P( 6, 0xed6866f8d962ae7bUL, 0x016e1f76b4337c6cUL) /* 179 */
+P( 2, 0x3454dca410f8ed9dUL, 0x016a13cd15372904UL) /* 181 */
+P(10, 0x1d7ca632ee936f3fUL, 0x01571ed3c506b39aUL) /* 191 */
+P( 2, 0x70bf015390948f41UL, 0x015390948f40feacUL) /* 193 */
+P( 4, 0xc96bdb9d3d137e0dUL, 0x014cab88725af6e7UL) /* 197 */
+P( 2, 0x2697cc8aef46c0f7UL, 0x0149539e3b2d066eUL) /* 199 */
+P(12, 0xc0e8f2a76e68575bUL, 0x013698df3de07479UL) /* 211 */
+P(12, 0x687763dfdb43bb1fUL, 0x0125e22708092f11UL) /* 223 */
+P( 4, 0x1b10ea929ba144cbUL, 0x0120b470c67c0d88UL) /* 227 */
+P( 2, 0x1d10c4c0478bbcedUL, 0x011e2ef3b3fb8744UL) /* 229 */
+P( 4, 0x63fb9aeb1fdcd759UL, 0x0119453808ca29c0UL) /* 233 */
+P( 6, 0x64afaa4f437b2e0fUL, 0x0112358e75d30336UL) /* 239 */
+P( 2, 0xf010fef010fef011UL, 0x010fef010fef010fUL) /* 241 */
+P(10, 0x28cbfbeb9a020a33UL, 0x0105197f7d734041UL) /* 251 */
+P( 6, 0xff00ff00ff00ff01UL, 0x00ff00ff00ff00ffUL) /* 257 */
+P( 6, 0xd624fd1470e99cb7UL, 0x00f92fb2211855a8UL) /* 263 */
+P( 6, 0x8fb3ddbd6205b5c5UL, 0x00f3a0d52cba8723UL) /* 269 */
+P( 2, 0xd57da36ca27acdefUL, 0x00f1d48bcee0d399UL) /* 271 */
+P( 6, 0xee70c03b25e4463dUL, 0x00ec979118f3fc4dUL) /* 277 */
+P( 4, 0xc5b1a6b80749cb29UL, 0x00e939651fe2d8d3UL) /* 281 */
+P( 2, 0x47768073c9b97113UL, 0x00e79372e225fe30UL) /* 283 */
+P(10, 0x2591e94884ce32adUL, 0x00dfac1f74346c57UL) /* 293 */
+P(14, 0xf02806abc74be1fbUL, 0x00d578e97c3f5fe5UL) /* 307 */
+P( 4, 0x7ec3e8f3a7198487UL, 0x00d2ba083b445250UL) /* 311 */
+P( 2, 0x58550f8a39409d09UL, 0x00d161543e28e502UL) /* 313 */
+P( 4, 0xec9e48ae6f71de15UL, 0x00cebcf8bb5b4169UL) /* 317 */
+P(14, 0x2ff3a018bfce8063UL, 0x00c5fe740317f9d0UL) /* 331 */
+P( 6, 0x7f9ec3fcf61fe7b1UL, 0x00c2780613c0309eUL) /* 337 */
+P(10, 0x89f5abe570e046d3UL, 0x00bcdd535db1cc5bUL) /* 347 */
+P( 2, 0xda971b23f1545af5UL, 0x00bbc8408cd63069UL) /* 349 */
+P( 4, 0x79d5f00b9a7862a1UL, 0x00b9a7862a0ff465UL) /* 353 */
+P( 6, 0x4dba1df32a128a57UL, 0x00b68d31340e4307UL) /* 359 */
+P( 8, 0x87530217b7747d8fUL, 0x00b2927c29da5519UL) /* 367 */
+P( 6, 0x30baae53bb5e06ddUL, 0x00afb321a1496fdfUL) /* 373 */
+P( 6, 0xee70206c12e9b5b3UL, 0x00aceb0f891e6551UL) /* 379 */
+P( 4, 0xcdde9462ec9dbe7fUL, 0x00ab1cbdd3e2970fUL) /* 383 */
+P( 6, 0xafb64b05ec41cf4dUL, 0x00a87917088e262bUL) /* 389 */
+P( 8, 0x02944ff5aec02945UL, 0x00a513fd6bb00a51UL) /* 397 */
+P( 4, 0x2cb033128382df71UL, 0x00a36e71a2cb0331UL) /* 401 */
+P( 8, 0x1ccacc0c84b1c2a9UL, 0x00a03c1688732b30UL) /* 409 */
+P(10, 0x19a93db575eb3a0bUL, 0x009c69169b30446dUL) /* 419 */
+P( 2, 0xcebeef94fa86fe2dUL, 0x009baade8e4a2f6eUL) /* 421 */
+P(10, 0x6faa77fb3f8df54fUL, 0x00980e4156201301UL) /* 431 */
+P( 2, 0x68a58af00975a751UL, 0x00975a750ff68a58UL) /* 433 */
+P( 6, 0xd56e36d0c3efac07UL, 0x009548e4979e0829UL) /* 439 */
+P( 4, 0xd8b44c47a8299b73UL, 0x0093efd1c50e726bUL) /* 443 */
+P( 6, 0x02d9ccaf9ba70e41UL, 0x0091f5bcb8bb02d9UL) /* 449 */
+P( 8, 0x0985e1c023d9e879UL, 0x008f67a1e3fdc261UL) /* 457 */
+P( 4, 0x2a343316c494d305UL, 0x008e2917e0e702c6UL) /* 461 */
+P( 2, 0x70cb7916ab67652fUL, 0x008d8be33f95d715UL) /* 463 */
+P( 4, 0xd398f132fb10fe5bUL, 0x008c55841c815ed5UL) /* 467 */
+P(12, 0x6f2a38a6bf54fa1fUL, 0x0088d180cd3a4133UL) /* 479 */
+P( 8, 0x211df689b98f81d7UL, 0x00869222b1acf1ceUL) /* 487 */
+P( 4, 0x0e994983e90f1ec3UL, 0x0085797b917765abUL) /* 491 */
+P( 8, 0xad671e44bed87f3bUL, 0x008355ace3c897dbUL) /* 499 */
+P( 4, 0xf9623a0516e70fc7UL, 0x00824a4e60b3262bUL) /* 503 */
+P( 6, 0x4b7129be9dece355UL, 0x0080c121b28bd1baUL) /* 509 */
+P(12, 0x190f3b7473f62c39UL, 0x007dc9f3397d4c29UL) /* 521 */
+P( 2, 0x63dacc9aad46f9a3UL, 0x007d4ece8fe88139UL) /* 523 */
+P(18, 0xc1108fda24e8d035UL, 0x0079237d65bcce50UL) /* 541 */
+P( 6, 0xb77578472319bd8bUL, 0x0077cf53c5f7936cUL) /* 547 */
+P(10, 0x473d20a1c7ed9da5UL, 0x0075a8accfbdd11eUL) /* 557 */
+P( 6, 0xfbe85af0fea2c8fbUL, 0x007467ac557c228eUL) /* 563 */
+P( 6, 0x58a1f7e6ce0f4c09UL, 0x00732d70ed8db8e9UL) /* 569 */
+P( 2, 0x1a00e58c544986f3UL, 0x0072c62a24c3797fUL) /* 571 */
+P( 6, 0x7194a17f55a10dc1UL, 0x007194a17f55a10dUL) /* 577 */
+P(10, 0x7084944785e33763UL, 0x006fa549b41da7e7UL) /* 587 */
+P( 6, 0xba10679bd84886b1UL, 0x006e8419e6f61221UL) /* 593 */
+P( 6, 0xebe9c6bb31260967UL, 0x006d68b5356c207bUL) /* 599 */
+P( 2, 0x97a3fe4bd1ff25e9UL, 0x006d0b803685c01bUL) /* 601 */
+P( 6, 0x6c6388395b84d99fUL, 0x006bf790a8b2d207UL) /* 607 */
+P( 6, 0x8c51da6a1335df6dUL, 0x006ae907ef4b96c2UL) /* 613 */
+P( 4, 0x46f3234475d5add9UL, 0x006a37991a23aeadUL) /* 617 */
+P( 2, 0x905605ca3c619a43UL, 0x0069dfbdd4295b66UL) /* 619 */
+P(12, 0xcee8dff304767747UL, 0x0067dc4c45c8033eUL) /* 631 */
+P(10, 0xff99c27f00663d81UL, 0x00663d80ff99c27fUL) /* 641 */
+P( 2, 0xacca407f671ddc2bUL, 0x0065ec17e3559948UL) /* 643 */
+P( 4, 0xe71298bac1e12337UL, 0x00654ac835cfba5cUL) /* 647 */
+P( 6, 0xfa1e94309cd09045UL, 0x00645c854ae10772UL) /* 653 */
+P( 6, 0xbebccb8e91496b9bUL, 0x006372990e5f901fUL) /* 659 */
+P( 2, 0x312fa30cc7d7b8bdUL, 0x006325913c07beefUL) /* 661 */
+P(12, 0x6160ff9e9f006161UL, 0x006160ff9e9f0061UL) /* 673 */
+P( 4, 0x6b03673b5e28152dUL, 0x0060cdb520e5e88eUL) /* 677 */
+P( 6, 0xfe802ffa00bfe803UL, 0x005ff4017fd005ffUL) /* 683 */
+P( 8, 0xe66fe25c9e907c7bUL, 0x005ed79e31a4dccdUL) /* 691 */
+P(10, 0x3f8b236c76528895UL, 0x005d7d42d48ac5efUL) /* 701 */
+P( 8, 0xf6f923bf01ce2c0dUL, 0x005c6f35ccba5028UL) /* 709 */
+P(10, 0x6c3d3d98bed7c42fUL, 0x005b2618ec6ad0a5UL) /* 719 */
+P( 8, 0x30981efcd4b010e7UL, 0x005a2553748e42e7UL) /* 727 */
+P( 6, 0x6f691fc81ebbe575UL, 0x0059686cf744cd5bUL) /* 733 */
+P( 6, 0xb10480ddb47b52cbUL, 0x0058ae97bab79976UL) /* 739 */
+P( 4, 0x74cd59ed64f3f0d7UL, 0x0058345f1876865fUL) /* 743 */
+P( 8, 0x0105cb81316d6c0fUL, 0x005743d5bb24795aUL) /* 751 */
+P( 6, 0x9be64c6d91c1195dUL, 0x005692c4d1ab74abUL) /* 757 */
+P( 4, 0x71b3f945a27b1f49UL, 0x00561e46a4d5f337UL) /* 761 */
+P( 8, 0x77d80d50e508fd01UL, 0x005538ed06533997UL) /* 769 */
+P( 4, 0xa5eb778e133551cdUL, 0x0054c807f2c0bec2UL) /* 773 */
+P(14, 0x18657d3c2d8a3f1bUL, 0x005345efbc572d36UL) /* 787 */
+P(10, 0x2e40e220c34ad735UL, 0x00523a758f941345UL) /* 797 */
+P(12, 0xa76593c70a714919UL, 0x005102370f816c89UL) /* 809 */
+P( 2, 0x1eef452124eea383UL, 0x0050cf129fb94acfUL) /* 811 */
+P(10, 0x38206dc242ba771dUL, 0x004fd31941cafdd1UL) /* 821 */
+P( 2, 0x4cd4c35807772287UL, 0x004fa1704aa75945UL) /* 823 */
+P( 4, 0x83de917d5e69ddf3UL, 0x004f3ed6d45a63adUL) /* 827 */
+P( 2, 0x882ef0403b4a6c15UL, 0x004f0de57154ebedUL) /* 829 */
+P(10, 0xf8fb6c51c606b677UL, 0x004e1cae8815f811UL) /* 839 */
+P(14, 0xb4abaac446d3e1fdUL, 0x004cd47ba5f6ff19UL) /* 853 */
+P( 4, 0xa9f83bbe484a14e9UL, 0x004c78ae734df709UL) /* 857 */
+P( 2, 0x0bebbc0d1ce874d3UL, 0x004c4b19ed85cfb8UL) /* 859 */
+P( 4, 0xbd418eaf0473189fUL, 0x004bf093221d1218UL) /* 863 */
+P(14, 0x44e3af6f372b7e65UL, 0x004aba3c21dc633fUL) /* 877 */
+P( 4, 0xc87fdace4f9e5d91UL, 0x004a6360c344de00UL) /* 881 */
+P( 2, 0xec93479c446bd9bbUL, 0x004a383e9f74d68aUL) /* 883 */
+P( 4, 0xdac4d592e777c647UL, 0x0049e28fbabb9940UL) /* 887 */
+P(20, 0xa63ea8c8f61f0c23UL, 0x0048417b57c78cd7UL) /* 907 */
+P( 4, 0xe476062ea5cbbb6fUL, 0x0047f043713f3a2bUL) /* 911 */
+P( 8, 0xdf68761c69daac27UL, 0x00474ff2a10281cfUL) /* 919 */
+P(10, 0xb813d737637aa061UL, 0x00468b6f9a978f91UL) /* 929 */
+P( 8, 0xa3a77aac1fb15099UL, 0x0045f13f1caff2e2UL) /* 937 */
+P( 4, 0x17f0c3e0712c5825UL, 0x0045a5228cec23e9UL) /* 941 */
+P( 6, 0xfd912a70ff30637bUL, 0x0045342c556c66b9UL) /* 947 */
+P( 6, 0xfbb3b5dc01131289UL, 0x0044c4a23feeced7UL) /* 953 */
+P(14, 0x856d560a0f5acdf7UL, 0x0043c5c20d3c9fe6UL) /* 967 */
+P( 4, 0x96472f314d3f89e3UL, 0x00437e494b239798UL) /* 971 */
+P( 6, 0xa76f5c7ed2253531UL, 0x0043142d118e47cbUL) /* 977 */
+P( 6, 0x816eae7c7bf69fe7UL, 0x0042ab5c73a13458UL) /* 983 */
+P( 8, 0xb6a2bea4cfb1781fUL, 0x004221950db0f3dbUL) /* 991 */
+P( 6, 0xa3900c53318e81edUL, 0x0041bbb2f80a4553UL) /* 997 */
+P(12, 0x60aa7f5d9f148d11UL, 0x0040f391612c6680UL) /* 1009 */
+P( 4, 0x6be8c0102c7a505dUL, 0x0040b1e94173fefdUL) /* 1013 */
+P( 6, 0x8ff3f0ed28728f33UL, 0x004050647d9d0445UL) /* 1019 */
+P( 2, 0x680e0a87e5ec7155UL, 0x004030241b144f3bUL) /* 1021 */
+P(10, 0xbbf70fa49fe829b7UL, 0x003f90c2ab542cb1UL) /* 1031 */
+P( 2, 0xd69d1e7b6a50ca39UL, 0x003f71412d59f597UL) /* 1033 */
+P( 6, 0x1a1e0f46b6d26aefUL, 0x003f137701b98841UL) /* 1039 */
+P(10, 0x7429f9a7a8251829UL, 0x003e79886b60e278UL) /* 1049 */
+P( 2, 0xd9c2219d1b863613UL, 0x003e5b1916a7181dUL) /* 1051 */
+P(10, 0x91406c1820d077adUL, 0x003dc4a50968f524UL) /* 1061 */
+P( 2, 0x521f4ec02e3d2b97UL, 0x003da6e4c9550321UL) /* 1063 */
+P( 6, 0xbb8283b63dc8eba5UL, 0x003d4e4f06f1def3UL) /* 1069 */
+P(18, 0x431eda153229ebbfUL, 0x003c4a6bdd24f9a4UL) /* 1087 */
+P( 4, 0xaf0bf78d7e01686bUL, 0x003c11d54b525c73UL) /* 1091 */
+P( 2, 0xa9ced0742c086e8dUL, 0x003bf5b1c5721065UL) /* 1093 */
+P( 4, 0xc26458ad9f632df9UL, 0x003bbdb9862f23b4UL) /* 1097 */
+P( 6, 0xbbff1255dff892afUL, 0x003b6a8801db5440UL) /* 1103 */
+P( 6, 0xcbd49a333f04d8fdUL, 0x003b183cf0fed886UL) /* 1109 */
+P( 8, 0xec84ed6f9cfdeff5UL, 0x003aabe394bdc3f4UL) /* 1117 */
+P( 6, 0x97980cc40bda9d4bUL, 0x003a5ba3e76156daUL) /* 1123 */
+P( 6, 0x777f34d524f5cbd9UL, 0x003a0c3e953378dbUL) /* 1129 */
+P(22, 0x2797051d94cbbb7fUL, 0x0038f03561320b1eUL) /* 1151 */
+P( 2, 0xea769051b4f43b81UL, 0x0038d6ecaef5908aUL) /* 1153 */
+P(10, 0xce7910f3034d4323UL, 0x003859cf221e6069UL) /* 1163 */
+P( 8, 0x92791d1374f5b99bUL, 0x0037f7415dc9588aUL) /* 1171 */
+P(10, 0x89a5645cc68ea1b5UL, 0x00377df0d3902626UL) /* 1181 */
+P( 6, 0x5f8aacf796c0cf0bUL, 0x00373622136907faUL) /* 1187 */
+P( 6, 0xf2e90a15e33edf99UL, 0x0036ef0c3b39b92fUL) /* 1193 */
+P( 8, 0x8e99e5feb897c451UL, 0x0036915f47d55e6dUL) /* 1201 */
+P(12, 0xaca2eda38fb91695UL, 0x0036072cf3f866fdUL) /* 1213 */
+P( 4, 0x5d9b737be5ea8b41UL, 0x0035d9b737be5ea8UL) /* 1217 */
+P( 6, 0x4aefe1db93fd7cf7UL, 0x0035961559cc81c7UL) /* 1223 */
+P( 6, 0xa0994ef20b3f8805UL, 0x0035531c897a4592UL) /* 1229 */
+P( 2, 0x103890bda912822fUL, 0x00353ceebd3e98a4UL) /* 1231 */
+P( 6, 0xb441659d13a9147dUL, 0x0034fad381585e5eUL) /* 1237 */
+P(12, 0x1e2134440c4c3f21UL, 0x00347884d1103130UL) /* 1249 */
+P(10, 0x263a27727a6883c3UL, 0x00340dd3ac39bf56UL) /* 1259 */
+P(18, 0x78e221472ab33855UL, 0x003351fdfecc140cUL) /* 1277 */
+P( 2, 0x95eac88e82e6faffUL, 0x00333d72b089b524UL) /* 1279 */
+P( 4, 0xf66c258317be8dabUL, 0x0033148d44d6b261UL) /* 1283 */
+P( 6, 0x09ee202c7cb91939UL, 0x0032d7aef8412458UL) /* 1289 */
+P( 2, 0x8d2fca1042a09ea3UL, 0x0032c3850e79c0f1UL) /* 1291 */
+P( 6, 0x82779c856d8b8bf1UL, 0x00328766d59048a2UL) /* 1297 */
+P( 4, 0x3879361cba8a223dUL, 0x00325fa18cb11833UL) /* 1301 */
+P( 2, 0xf23f43639c3182a7UL, 0x00324bd659327e22UL) /* 1303 */
+P( 4, 0xa03868fc474bcd13UL, 0x0032246e784360f4UL) /* 1307 */
+P(12, 0x651e78b8c5311a97UL, 0x0031afa5f1a33a08UL) /* 1319 */
+P( 2, 0x8ffce639c00c6719UL, 0x00319c63ff398e70UL) /* 1321 */
+P( 6, 0xf7b460754b0b61cfUL, 0x003162f7519a86a7UL) /* 1327 */
+P(34, 0x7b03f3359b8e63b1UL, 0x0030271fc9d3fc3cUL) /* 1361 */
+P( 6, 0xa55c5326041eb667UL, 0x002ff104ae89750bUL) /* 1367 */
+P( 6, 0x647f88ab896a76f5UL, 0x002fbb62a236d133UL) /* 1373 */
+P( 8, 0x8fd971434a55a46dUL, 0x002f74997d2070b4UL) /* 1381 */
+P(18, 0x9fbf969958046447UL, 0x002ed84aa8b6fce3UL) /* 1399 */
+P(10, 0x9986feba69be3a81UL, 0x002e832df7a46dbdUL) /* 1409 */
+P(14, 0xa668b3e6d053796fUL, 0x002e0e0846857cabUL) /* 1423 */
+P( 4, 0x97694e6589f4e09bUL, 0x002decfbdfb55ee6UL) /* 1427 */
+P( 2, 0x37890c00b7721dbdUL, 0x002ddc876f3ff488UL) /* 1429 */
+P( 4, 0x5ac094a235f37ea9UL, 0x002dbbc1d4c482c4UL) /* 1433 */
+P( 6, 0x31cff775f2d5d65fUL, 0x002d8af0e0de0556UL) /* 1439 */
+P( 8, 0xddad8e6b36505217UL, 0x002d4a7b7d14b30aUL) /* 1447 */
+P( 4, 0x5a27df897062cd03UL, 0x002d2a85073bcf4eUL) /* 1451 */
+P( 2, 0xe2396fe0fdb5a625UL, 0x002d1a9ab13e8be4UL) /* 1453 */
+P( 6, 0xb352a4957e82317bUL, 0x002ceb1eb4b9fd8bUL) /* 1459 */
+P(12, 0xd8ab3f2c60c2ea3fUL, 0x002c8d503a79794cUL) /* 1471 */
+P(10, 0x6893f702f0452479UL, 0x002c404d708784edUL) /* 1481 */
+P( 2, 0x9686fdc182acf7e3UL, 0x002c31066315ec52UL) /* 1483 */
+P( 4, 0x6854037173dce12fUL, 0x002c1297d80f2664UL) /* 1487 */
+P( 2, 0x7f0ded1685c27331UL, 0x002c037044c55f6bUL) /* 1489 */
+P( 4, 0xeeda72e1fe490b7dUL, 0x002be5404cd13086UL) /* 1493 */
+P( 6, 0x9e7bfc959a8e6e53UL, 0x002bb845adaf0cceUL) /* 1499 */
+P(12, 0x49b314d6d4753dd7UL, 0x002b5f62c639f16dUL) /* 1511 */
+P(12, 0x2e8f8c5ac4aa1b3bUL, 0x002b07e6734f2b88UL) /* 1523 */
+P( 8, 0xb8ef723481163d33UL, 0x002ace569d8342b7UL) /* 1531 */
+P(12, 0x6a2ec96a594287b7UL, 0x002a791d5dbd4dcfUL) /* 1543 */
+P( 6, 0xdba41c6d13aab8c5UL, 0x002a4eff8113017cUL) /* 1549 */
+P( 4, 0xc2adbe648dc3aaf1UL, 0x002a3319e156df32UL) /* 1553 */
+P( 6, 0x87a2bade565f91a7UL, 0x002a0986286526eaUL) /* 1559 */
+P( 8, 0x4d6fe8798c01f5dfUL, 0x0029d29551d91e39UL) /* 1567 */
+P( 4, 0x3791310c8c23d98bUL, 0x0029b7529e109f0aUL) /* 1571 */
+P( 8, 0xf80e446b01228883UL, 0x00298137491ea465UL) /* 1579 */
+P( 4, 0x9aed1436fbf500cfUL, 0x0029665e1eb9f9daUL) /* 1583 */
+P(14, 0x7839b54cc8b24115UL, 0x002909752e019a5eUL) /* 1597 */
+P( 4, 0xc128c646ad0309c1UL, 0x0028ef35e2e5efb0UL) /* 1601 */
+P( 6, 0x14de631624a3c377UL, 0x0028c815aa4b8278UL) /* 1607 */
+P( 2, 0x3f7b9fe68b0ecbf9UL, 0x0028bb1b867199daUL) /* 1609 */
+P( 4, 0x284ffd75ec00a285UL, 0x0028a13ff5d7b002UL) /* 1613 */
+P( 6, 0x37803cb80dea2ddbUL, 0x00287ab3f173e755UL) /* 1619 */
+P( 2, 0x86b63f7c9ac4c6fdUL, 0x00286dead67713bdUL) /* 1621 */
+P( 6, 0x8b6851d1bd99b9d3UL, 0x002847bfcda6503eUL) /* 1627 */
+P(10, 0xb62fda77ca343b6dUL, 0x002808c1ea6b4777UL) /* 1637 */
+P(20, 0x1f0dc009e34383c9UL, 0x00278d0e0f23ff61UL) /* 1657 */
+P( 6, 0x496dc21ddd35b97fUL, 0x002768863c093c7fUL) /* 1663 */
+P( 4, 0xb0e96ce17090f82bUL, 0x0027505115a73ca8UL) /* 1667 */
+P( 2, 0xaadf05acdd7d024dUL, 0x00274441a61dc1b9UL) /* 1669 */
+P(24, 0xcb138196746eafb5UL, 0x0026b5c166113cf0UL) /* 1693 */
+P( 4, 0x347f523736755d61UL, 0x00269e65ad07b18eUL) /* 1697 */
+P( 2, 0xd14a48a051f7dd0bUL, 0x002692c25f877560UL) /* 1699 */
+P(10, 0x474d71b1ce914d25UL, 0x002658fa7523cd11UL) /* 1709 */
+P(12, 0x386063f5e28c1f89UL, 0x0026148710cf0f9eUL) /* 1721 */
+P( 2, 0x1db7325e32d04e73UL, 0x002609363b22524fUL) /* 1723 */
+P(10, 0xfef748d3893b880dUL, 0x0025d1065a1c1122UL) /* 1733 */
+P( 8, 0x2f3351506e935605UL, 0x0025a48a382b863fUL) /* 1741 */
+P( 6, 0x7a3637fa2376415bUL, 0x0025837190eccdbcUL) /* 1747 */
+P( 6, 0x4ac525d2baa21969UL, 0x00256292e95d510cUL) /* 1753 */
+P( 6, 0x3a11c16b42cd351fUL, 0x002541eda98d068cUL) /* 1759 */
+P(18, 0x6c7abde0049c2a11UL, 0x0024e15087fed8f5UL) /* 1777 */
+P( 6, 0x54dad0303e069ac7UL, 0x0024c18b20979e5dUL) /* 1783 */
+P( 4, 0xebf1ac9fdfe91433UL, 0x0024ac7b336de0c5UL) /* 1787 */
+P( 2, 0xfafdda8237cec655UL, 0x0024a1fc478c60bbUL) /* 1789 */
+P(12, 0xdce3ff6e71ffb739UL, 0x002463801231c009UL) /* 1801 */
+P(10, 0xbed5737d6286db1bUL, 0x0024300fd506ed33UL) /* 1811 */
+P(12, 0xe479e431fe08b4dfUL, 0x0023f314a494da81UL) /* 1823 */
+P( 8, 0x9dd9b0dd7742f897UL, 0x0023cadedd2fad3aUL) /* 1831 */
+P(16, 0x8f09d7402c5a5e87UL, 0x00237b7ed2664a03UL) /* 1847 */
+P(14, 0x9216d5c4d958738dUL, 0x0023372967dbaf1dUL) /* 1861 */
+P( 6, 0xb3139ba11d34ca63UL, 0x00231a308a371f20UL) /* 1867 */
+P( 4, 0x47d54f7ed644afafUL, 0x002306fa63e1e600UL) /* 1871 */
+P( 2, 0x92a81d85cf11a1b1UL, 0x0022fd6731575684UL) /* 1873 */
+P( 4, 0x754b26533253bdfdUL, 0x0022ea507805749cUL) /* 1877 */
+P( 2, 0xbbe0efc980bfd467UL, 0x0022e0cce8b3d720UL) /* 1879 */
+P(10, 0xc0d8d594f024dca1UL, 0x0022b1887857d161UL) /* 1889 */
+P(12, 0x8238d43bcaac1a65UL, 0x00227977fcc49cc0UL) /* 1901 */
+P( 6, 0x27779c1fae6175bbUL, 0x00225db37b5e5f4fUL) /* 1907 */
+P( 6, 0xa746ca9af708b2c9UL, 0x0022421b91322ed6UL) /* 1913 */
+P(18, 0x93f3cd9f389be823UL, 0x0021f05b35f52102UL) /* 1931 */
+P( 2, 0x5cb4a4c04c489345UL, 0x0021e75de5c70d60UL) /* 1933 */
+P(16, 0xbf6047743e85b6b5UL, 0x0021a01d6c19be96UL) /* 1949 */
+P( 2, 0x61c147831563545fUL, 0x0021974a6615c81aUL) /* 1951 */
+P(22, 0xedb47c0ae62dee9dUL, 0x00213767697cf36aUL) /* 1973 */
+P( 6, 0x0a3824386673a573UL, 0x00211d9f7fad35f1UL) /* 1979 */
+P( 8, 0xa4a77d19e575a0ebUL, 0x0020fb7d9dd36c18UL) /* 1987 */
+P( 6, 0xa2bee045e066c279UL, 0x0020e2123d661e0eUL) /* 1993 */
+P( 4, 0xc23618de8ab43d05UL, 0x0020d135b66ae990UL) /* 1997 */
+P( 2, 0x266b515216cb9f2fUL, 0x0020c8cded4d7a8eUL) /* 1999 */
+P( 4, 0xe279edd9e9c2e85bUL, 0x0020b80b3f43ddbfUL) /* 2003 */
+P( 8, 0xd0c591c221dc9c53UL, 0x002096b9180f46a6UL) /* 2011 */
+P( 6, 0x06da8ee9c9ee7c21UL, 0x00207de7e28de5daUL) /* 2017 */
+P(10, 0x9dfebcaf4c27e8c3UL, 0x002054dec8cf1fb3UL) /* 2027 */
+P( 2, 0x49aeff9f19dd6de5UL, 0x00204cb630b3aab5UL) /* 2029 */
+P(10, 0x86976a57a296e9c7UL, 0x00202428adc37bebUL) /* 2039 */
+P(14, 0xa3b9abf4872b84cdUL, 0x001fec0c7834def4UL) /* 2053 */
+P(10, 0x34fca6483895e6efUL, 0x001fc46fae98a1d0UL) /* 2063 */
+P( 6, 0x34b5a333988f873dUL, 0x001facda430ff619UL) /* 2069 */
+P(12, 0xd9dd4f19b5f17be1UL, 0x001f7e17dd8e15e5UL) /* 2081 */
+P( 2, 0xb935b507fd0ce78bUL, 0x001f765a3556a4eeUL) /* 2083 */
+P( 4, 0xb450f5540660e797UL, 0x001f66ea49d802f1UL) /* 2087 */
+P( 2, 0x63ff82831ffc1419UL, 0x001f5f3800faf9c0UL) /* 2089 */
+P(10, 0x8992f718c22a32fbUL, 0x001f38f4e6c0f1f9UL) /* 2099 */
+P(12, 0x5f3253ad0d37e7bfUL, 0x001f0b8546752578UL) /* 2111 */
+P( 2, 0x007c0ffe0fc007c1UL, 0x001f03ff83f001f0UL) /* 2113 */
+P(16, 0x4d8ebadc0c0640b1UL, 0x001ec853b0a3883cUL) /* 2129 */
+P( 2, 0xe2729af831037bdbUL, 0x001ec0ee573723ebUL) /* 2131 */
+P( 6, 0xb8f64bf30feebfe9UL, 0x001eaad38e6f6894UL) /* 2137 */
+P( 4, 0xda93124b544c0bf5UL, 0x001e9c28a765fe53UL) /* 2141 */
+P( 2, 0x9cf7ff0b593c539fUL, 0x001e94d8758c2003UL) /* 2143 */
+P(10, 0xd6bd8861fa0e07d9UL, 0x001e707ba8f65e68UL) /* 2153 */
+P( 8, 0x5cfe75c0bd8ab891UL, 0x001e53a2a68f574eUL) /* 2161 */
+P(18, 0x43e808757c2e862bUL, 0x001e1380a56b438dUL) /* 2179 */
+P(24, 0x90caa96d595c9d93UL, 0x001dbf9f513a3802UL) /* 2203 */
+P( 4, 0x8fd550625d07135fUL, 0x001db1d1d58bc600UL) /* 2207 */
+P( 6, 0x76b010a86e209f2dUL, 0x001d9d358f53de38UL) /* 2213 */
+P( 8, 0xecc0426447769b25UL, 0x001d81e6df6165c7UL) /* 2221 */
+P(16, 0xe381339caabe3295UL, 0x001d4bdf7fd40e30UL) /* 2237 */
+P( 2, 0xd1b190a2d0c7673fUL, 0x001d452c7a1c958dUL) /* 2239 */
+P( 4, 0xc3bce3cf26b0e7ebUL, 0x001d37cf9b902659UL) /* 2243 */
+P( 8, 0x5f87e76f56c61ce3UL, 0x001d1d3a5791e97bUL) /* 2251 */
+P(16, 0xc06c6857a124b353UL, 0x001ce89fe6b47416UL) /* 2267 */
+P( 2, 0x38c040fcba630f75UL, 0x001ce219f3235071UL) /* 2269 */
+P( 4, 0xd078bc4fbd533b21UL, 0x001cd516dcf92139UL) /* 2273 */
+P( 8, 0xde8e15c5dd354f59UL, 0x001cbb33bd1c2b8bUL) /* 2281 */
+P( 6, 0xca61d53d7414260fUL, 0x001ca7e7d2546688UL) /* 2287 */
+P( 6, 0xb56bf5ba8eae635dUL, 0x001c94b5c1b3dbd3UL) /* 2293 */
+P( 4, 0x44a72cb0fb6e3949UL, 0x001c87f7f9c241c1UL) /* 2297 */
+P(12, 0x879839a714f45bcdUL, 0x001c6202706c35a9UL) /* 2309 */
+P( 2, 0x02a8994fde5314b7UL, 0x001c5bb8a9437632UL) /* 2311 */
+P(22, 0xb971920cf2b90135UL, 0x001c174343b4111eUL) /* 2333 */
+P( 6, 0x8a8fd0b7df9a6e8bUL, 0x001c04d0d3e46b42UL) /* 2339 */
+P( 2, 0xb31f9a84c1c6eaadUL, 0x001bfeb00fbf4308UL) /* 2341 */
+P( 6, 0x92293b02823c6d83UL, 0x001bec5dce0b202dUL) /* 2347 */
+P( 4, 0xeee77ff20fe5ddcfUL, 0x001be03444620037UL) /* 2351 */
+P( 6, 0x0e1ea0f6c496c11dUL, 0x001bce09c66f6fc3UL) /* 2357 */
+P(14, 0xfdf2d3d6f88ccb6bUL, 0x001ba40228d02b30UL) /* 2371 */
+P( 6, 0xfa9d74a3457738f9UL, 0x001b9225b1cf8919UL) /* 2377 */
+P( 4, 0xefc3ca3db71a5785UL, 0x001b864a2ff3f53fUL) /* 2381 */
+P( 2, 0x8e2071718d0d6dafUL, 0x001b80604150e49bUL) /* 2383 */
+P( 6, 0xbc0fdbfeb6cfabfdUL, 0x001b6eb1aaeaacf3UL) /* 2389 */
+P( 4, 0x1eeab613e5e5aee9UL, 0x001b62f48da3c8ccUL) /* 2393 */
+P( 6, 0x2d2388e90e9e929fUL, 0x001b516babe96092UL) /* 2399 */
+P(12, 0x81dbafba588ddb43UL, 0x001b2e9cef1e0c87UL) /* 2411 */
+P( 6, 0x52eebc51c4799791UL, 0x001b1d56bedc849bUL) /* 2417 */
+P( 6, 0x1c6bc4693b45a047UL, 0x001b0c267546aec0UL) /* 2423 */
+P(14, 0x06eee0974498874dUL, 0x001ae45f62024fa0UL) /* 2437 */
+P( 4, 0xd85b7377a9953cb9UL, 0x001ad917631b5f54UL) /* 2441 */
+P( 6, 0x4b6df412d4caf56fUL, 0x001ac83d18cb608fUL) /* 2447 */
+P(12, 0x6b8afbbb4a053493UL, 0x001aa6c7ad8c063fUL) /* 2459 */
+P( 8, 0xcc5299c96ac7720bUL, 0x001a90a7b1228e2aUL) /* 2467 */
+P( 6, 0xadce84b5c710aa99UL, 0x001a8027c03ba059UL) /* 2473 */
+P( 4, 0x9d673f5aa3804225UL, 0x001a7533289deb89UL) /* 2477 */
+P(26, 0xe6541268efbce7f7UL, 0x001a2ed7ce16b49fUL) /* 2503 */
+P(18, 0xfcf41e76cf5be669UL, 0x0019fefc0a279a73UL) /* 2521 */
+P(10, 0x5c3eb5dc31c383cbUL, 0x0019e4b0cd873b5fUL) /* 2531 */
+P( 8, 0x301832d11d8ad6c3UL, 0x0019cfcdfd60e514UL) /* 2539 */
+P( 4, 0x2e9c0942f1ce450fUL, 0x0019c56932d66c85UL) /* 2543 */
+P( 6, 0x97f3f2be37a39a5dUL, 0x0019b5e1ab6fc7c2UL) /* 2549 */
+P( 2, 0xe8b7d8a9654187c7UL, 0x0019b0b8a62f2a73UL) /* 2551 */
+P( 6, 0xb5d024d7da5b1b55UL, 0x0019a149fc98942cUL) /* 2557 */
+P(22, 0xb8ba9d6e7ae3501bUL, 0x001969517ec25b85UL) /* 2579 */
+P(12, 0xf50865f71b90f1dfUL, 0x00194b3083360ba8UL) /* 2591 */
+P( 2, 0x739c1682847df9e1UL, 0x00194631f4bebdc1UL) /* 2593 */
+P(16, 0xc470a4d842b90ed1UL, 0x00191e84127268fdUL) /* 2609 */
+P( 8, 0x1fb1be11698cc409UL, 0x00190adbb543984fUL) /* 2617 */
+P( 4, 0xd8d5512a7cd35d15UL, 0x001901130bd18200UL) /* 2621 */
+P(12, 0xa5496821723e07f9UL, 0x0018e3e6b889ac94UL) /* 2633 */
+P(14, 0xbcc8c6d7abaa8167UL, 0x0018c233420e1ec1UL) /* 2647 */
+P(10, 0x52c396c95eb619a1UL, 0x0018aa5872d92bd6UL) /* 2657 */
+P( 2, 0x6eb7e380878ec74bUL, 0x0018a5989945ccf9UL) /* 2659 */
+P( 4, 0x3d5513b504537157UL, 0x00189c1e60b57f60UL) /* 2663 */
+P( 8, 0x314391f8862e948fUL, 0x0018893fbc8690b9UL) /* 2671 */
+P( 6, 0xdc0b17cfcd81f5ddUL, 0x00187b2bb3e1041cUL) /* 2677 */
+P( 6, 0x2f6bea3ec89044b3UL, 0x00186d27c9cdcfb8UL) /* 2683 */
+P( 4, 0xce13a05869f1b57fUL, 0x001863d8bf4f2c1cUL) /* 2687 */
+P( 2, 0x7593474e8ace3581UL, 0x00185f33e2ad7593UL) /* 2689 */
+P( 4, 0x07fc329295a05e4dUL, 0x001855ef75973e13UL) /* 2693 */
+P( 6, 0xb05377cba4908d23UL, 0x001848160153f134UL) /* 2699 */
+P( 8, 0xe7b2131a628aa39bUL, 0x001835b72e6f0656UL) /* 2707 */
+P( 4, 0x9031dbed7de01527UL, 0x00182c922d83eb39UL) /* 2711 */
+P( 2, 0x76844b1c670aa9a9UL, 0x0018280243c0365aUL) /* 2713 */
+P( 6, 0x6a03f4533b08915fUL, 0x00181a5cd5898e73UL) /* 2719 */
+P(10, 0x1dbca579db0a3999UL, 0x001803c0961773aaUL) /* 2729 */
+P( 2, 0x002ffe800bffa003UL, 0x0017ff4005ffd001UL) /* 2731 */
+P(10, 0x478ab1a3e936139dUL, 0x0017e8d670433edbUL) /* 2741 */
+P( 8, 0x66e722bc4c5cc095UL, 0x0017d7066cf4bb5dUL) /* 2749 */
+P( 4, 0x7a8f63c717278541UL, 0x0017ce285b806b1fUL) /* 2753 */
+P(14, 0xdf6eee24d292bc2fUL, 0x0017af52cdf27e02UL) /* 2767 */
+P(10, 0x9fc20d17237dd569UL, 0x0017997d47d01039UL) /* 2777 */
+P(12, 0xcdf9932356bda2edUL, 0x00177f7ec2c6d0baUL) /* 2789 */
+P( 2, 0x97b5e332e80f68d7UL, 0x00177b2f3cd00756UL) /* 2791 */
+P( 6, 0x46eee26fd875e2e5UL, 0x00176e4a22f692a0UL) /* 2797 */
+P( 4, 0x3548a8e65157a611UL, 0x001765b94271e11bUL) /* 2801 */
+P( 2, 0xc288d03be9b71e3bUL, 0x001761732b044ae4UL) /* 2803 */
+P(16, 0x8151186db38937abUL, 0x00173f7a5300a2bcUL) /* 2819 */
+P(14, 0x7800b910895a45f1UL, 0x001722112b48be1fUL) /* 2833 */
+P( 4, 0xaee0b024182eec3dUL, 0x001719b7a16eb843UL) /* 2837 */
+P( 6, 0x96323eda173b5713UL, 0x00170d3c99cc5052UL) /* 2843 */
+P( 8, 0x0ed0dbd03ae77c8bUL, 0x0016fcad7aed3bb6UL) /* 2851 */
+P( 6, 0xf73800b7828dc119UL, 0x0016f051b8231ffdUL) /* 2857 */
+P( 4, 0x1b61715ec22b7ca5UL, 0x0016e81beae20643UL) /* 2861 */
+P(18, 0xa8533a991ead64bfUL, 0x0016c3721584c1d8UL) /* 2879 */
+P( 8, 0x7f6c7290e46c2e77UL, 0x0016b34c2ba09663UL) /* 2887 */
+P(10, 0x6325e8d907b01db1UL, 0x00169f3ce292ddcdUL) /* 2897 */
+P( 6, 0x28909f70152a1067UL, 0x00169344b2220a0dUL) /* 2903 */
+P( 6, 0xea7077af0997a0f5UL, 0x001687592593c1b1UL) /* 2909 */
+P( 8, 0x7e605cad10c32e6dUL, 0x00167787f1418ec9UL) /* 2917 */
+P(10, 0x471b33570635b38fUL, 0x001663e190395ff2UL) /* 2927 */
+P(12, 0xab559fa997a61bb3UL, 0x00164c7a4b6eb5b3UL) /* 2939 */
+P(14, 0xad4bdae562bddab9UL, 0x0016316a061182fdUL) /* 2953 */
+P( 4, 0x055e1b2f2ed62f45UL, 0x001629ba914584e4UL) /* 2957 */
+P( 6, 0x03cd328b1a2dca9bUL, 0x00161e3d57de21b2UL) /* 2963 */
+P( 6, 0xd28f4e08733218a9UL, 0x001612cc01b977f0UL) /* 2969 */
+P( 2, 0xb6800b077f186293UL, 0x00160efe30c525ffUL) /* 2971 */
+P(28, 0x6fbd138c3fd9c207UL, 0x0015da45249ec5deUL) /* 2999 */
+P( 2, 0xb117ccd12ae88a89UL, 0x0015d68ab4acff92UL) /* 3001 */
+P(10, 0x2f1a1a044046bcebUL, 0x0015c3f989d1eb15UL) /* 3011 */
+P( 8, 0x548aba0b060541e3UL, 0x0015b535ad11b8f0UL) /* 3019 */
+P( 4, 0xcf4e808cea111b2fUL, 0x0015addb3f424ec1UL) /* 3023 */
+P(14, 0xdbec1b4fa855a475UL, 0x00159445cb91be6bUL) /* 3037 */
+P( 4, 0xe3f794eb600d7821UL, 0x00158d0199771e63UL) /* 3041 */
+P( 8, 0x34fae0d9a11f7c59UL, 0x00157e87d9b69e04UL) /* 3049 */
+P(12, 0xf006b0ccbbac085dUL, 0x001568f58bc01ac3UL) /* 3061 */
+P( 6, 0x3f45076dc3114733UL, 0x00155e3c993fda9bUL) /* 3067 */
+P(12, 0xeef49bfa58a1a1b7UL, 0x001548eacc5e1e6eUL) /* 3079 */
+P( 4, 0x12c4218bea691fa3UL, 0x001541d8f91ba6a7UL) /* 3083 */
+P( 6, 0xbc7504e3bd5e64f1UL, 0x00153747060cc340UL) /* 3089 */
+P(20, 0x4ee21c292bb92fadUL, 0x001514569f93f7c4UL) /* 3109 */
+P(10, 0x34338b7327a4bacfUL, 0x00150309705d3d79UL) /* 3119 */
+P( 2, 0x3fe5c0833d6fccd1UL, 0x0014ff97020cf5bfUL) /* 3121 */
+P(16, 0xb1e70743535203c1UL, 0x0014e42c114cf47eUL) /* 3137 */
+P(26, 0xefbb5dcdfb4e43d3UL, 0x0014b835bdcb6447UL) /* 3163 */
+P( 4, 0xca68467ca5394f9fUL, 0x0014b182b53a9ab7UL) /* 3167 */
+P( 2, 0x8c51c081408b97a1UL, 0x0014ae2ad094a3d3UL) /* 3169 */
+P(12, 0x3275a899dfa5dd65UL, 0x00149a320ea59f96UL) /* 3181 */
+P( 6, 0x9e674cb62e1b78bbUL, 0x001490441de1a2fbUL) /* 3187 */
+P( 4, 0xa37ff5bb2a998d47UL, 0x001489aacce57200UL) /* 3191 */
+P(12, 0x792a999db131a22bUL, 0x001475f82ad6ff99UL) /* 3203 */
+P( 6, 0x1b48841bc30d29b9UL, 0x00146c2cfe53204fUL) /* 3209 */
+P( 8, 0xf06721d2011d3471UL, 0x00145f2ca490d4a1UL) /* 3217 */
+P( 4, 0x93fd2386dff85ebdUL, 0x001458b2aae0ec87UL) /* 3221 */
+P( 8, 0x4ce72f54c07ed9b5UL, 0x00144bcb0a3a3150UL) /* 3229 */
+P(22, 0xd6d0fd3e71dd827bUL, 0x001428a1e65441d4UL) /* 3251 */
+P( 2, 0x856405fb1eed819dUL, 0x00142575a6c210d7UL) /* 3253 */
+P( 4, 0x8ea8aceb7c443989UL, 0x00141f2025ba5c46UL) /* 3257 */
+P( 2, 0x34a13026f62e5873UL, 0x00141bf6e35420fdUL) /* 3259 */
+P(12, 0x1eea0208ec0af4f7UL, 0x001409141d1d313aUL) /* 3271 */
+P(28, 0x63679853cea598cbUL, 0x0013dd8bc19c3513UL) /* 3299 */
+P( 2, 0xc30b3ebd61f2d0edUL, 0x0013da76f714dc8fUL) /* 3301 */
+P( 6, 0x7eb9037bc7f43bc3UL, 0x0013d13e50f8f49eUL) /* 3307 */
+P( 6, 0xa583e6f6ce016411UL, 0x0013c80e37ca3819UL) /* 3313 */
+P( 6, 0xf1938d895f1a74c7UL, 0x0013bee69fa99ccfUL) /* 3319 */
+P( 4, 0x80cf1491c1e81e33UL, 0x0013b8d0ede55835UL) /* 3323 */
+P( 6, 0x3c0f12886ba8f301UL, 0x0013afb7680bb054UL) /* 3329 */
+P( 2, 0x0e4b786e0dfcc5abUL, 0x0013acb0c3841c96UL) /* 3331 */
+P(12, 0x672684c93f2d41efUL, 0x00139a9c5f434fdeUL) /* 3343 */
+P( 4, 0xe00757badb35c51bUL, 0x0013949cf33a0d9dUL) /* 3347 */
+P(12, 0xd6d84afe66472edfUL, 0x001382b4a00c31b0UL) /* 3359 */
+P( 2, 0xfbbc0eedcbbfb6e1UL, 0x00137fbbc0eedcbbUL) /* 3361 */
+P(10, 0x250f43aa08a84983UL, 0x001370ecf047b069UL) /* 3371 */
+P( 2, 0x04400e927b1acaa5UL, 0x00136df9790e3155UL) /* 3373 */
+P(16, 0x56572be34b9d3215UL, 0x0013567dd8defd5bUL) /* 3389 */
+P( 2, 0x87964ef7781c62bfUL, 0x0013539261fdbc34UL) /* 3391 */
+P(16, 0x29ed84051c06e9afUL, 0x00133c564292d28aUL) /* 3407 */
+P( 6, 0xb00acd11ed3f87fdUL, 0x001333ae178d6388UL) /* 3413 */
+P(20, 0x06307881744152d9UL, 0x0013170ad00d1fd7UL) /* 3433 */
+P(16, 0x7a786459f5c1ccc9UL, 0x0013005f01db0947UL) /* 3449 */
+P( 8, 0x1308125d74563281UL, 0x0012f51d40342210UL) /* 3457 */
+P( 4, 0x395310a480b3e34dUL, 0x0012ef815e4ed950UL) /* 3461 */
+P( 2, 0x35985baa8b202837UL, 0x0012ecb4abccd827UL) /* 3463 */
+P( 4, 0x96304a6e052b3223UL, 0x0012e71dc1d3d820UL) /* 3467 */
+P( 2, 0xbd8265fc9af8fd45UL, 0x0012e45389a16495UL) /* 3469 */
+P(22, 0x1b6d0b383ec58e0bUL, 0x0012c5d9226476ccUL) /* 3491 */
+P( 8, 0xc21a7c3b68b28503UL, 0x0012badc391156fdUL) /* 3499 */
+P(12, 0x236fa180fbfd6007UL, 0x0012aa78e412f522UL) /* 3511 */
+P( 6, 0xc42accd440ed9595UL, 0x0012a251f5f47fd1UL) /* 3517 */
+P(10, 0x7acf7128236ba3f7UL, 0x001294cb85c53534UL) /* 3527 */
+P( 2, 0xf909367a987b9c79UL, 0x0012921963beb65eUL) /* 3529 */
+P( 4, 0xb64efb252bfba705UL, 0x00128cb777c69ca8UL) /* 3533 */
+P( 6, 0x980d4f5a7e4cd25bUL, 0x001284aa6cf07294UL) /* 3539 */
+P( 2, 0xe1ecc4ef27b0c37dUL, 0x001281fcf6ac7f87UL) /* 3541 */
+P( 6, 0x9111aebb81d72653UL, 0x001279f937367db9UL) /* 3547 */
+P(10, 0x8951f985cb2c67edUL, 0x00126cad0488be94UL) /* 3557 */
+P( 2, 0xc439d4fc54e0b5d7UL, 0x00126a06794646a2UL) /* 3559 */
+P(12, 0xe857bf31896d533bUL, 0x00125a2f2bcd3e95UL) /* 3571 */
+P(10, 0xb614bb4cb5023755UL, 0x00124d108389e6b1UL) /* 3581 */
+P( 2, 0x938a89e5473bf1ffUL, 0x00124a73083771acUL) /* 3583 */
+P(10, 0xeac481aca34de039UL, 0x00123d6acda0620aUL) /* 3593 */
+P(14, 0x14b961badf4809a7UL, 0x00122b4b2917eafdUL) /* 3607 */
+P( 6, 0x76784fecba352435UL, 0x00122391bfce1e2fUL) /* 3613 */
+P( 4, 0xefa689bb58aef5e1UL, 0x00121e6f1ea579f2UL) /* 3617 */
+P( 6, 0xb2b2c4db9c3a8197UL, 0x001216c09e471568UL) /* 3623 */
+P( 8, 0x2503bc992279f8cfUL, 0x00120c8cb9d93909UL) /* 3631 */
+P( 6, 0xd2ab9aec5ca1541dUL, 0x001204ed58e64ef9UL) /* 3637 */
+P( 6, 0x3e78ba1460f99af3UL, 0x0011fd546578f00cUL) /* 3643 */
+P(16, 0x0a01426572cfcb63UL, 0x0011e9310b8b4c9cUL) /* 3659 */
+P(12, 0xbea857968f3cbd67UL, 0x0011da3405db9911UL) /* 3671 */
+P( 2, 0x78db213eefe659e9UL, 0x0011d7b6f4eb055dUL) /* 3673 */
+P( 4, 0x963e8541a74d35f5UL, 0x0011d2bee748c145UL) /* 3677 */
+P(14, 0x9e22d152776f2e43UL, 0x0011c1706ddce7a7UL) /* 3691 */
+P( 6, 0x05d10d39d1e1f291UL, 0x0011ba0fed2a4f14UL) /* 3697 */
+P( 4, 0x374468dccaced1ddUL, 0x0011b528538ed64aUL) /* 3701 */
+P( 8, 0x8d145c7d110c5ad5UL, 0x0011ab61404242acUL) /* 3709 */
+P(10, 0x3251a39f5acb5737UL, 0x00119f378ce81d2fUL) /* 3719 */
+P( 8, 0xa66e50171443506fUL, 0x001195889ece79daUL) /* 3727 */
+P( 6, 0x124f69ad91dd4cbdUL, 0x00118e4c65387077UL) /* 3733 */
+P( 6, 0xec24f8f2a61a2793UL, 0x001187161d70e725UL) /* 3739 */
+P(22, 0xb472148e656b7a51UL, 0x00116cd6d1c85239UL) /* 3761 */
+P( 6, 0x0adf9570e1142f07UL, 0x001165bbe7ce86b1UL) /* 3767 */
+P( 2, 0x89bf33b065119789UL, 0x0011635ee344ce36UL) /* 3769 */
+P(10, 0x8f0149803cb291ebUL, 0x0011579767b6d679UL) /* 3779 */
+P(14, 0x8334b63afd190a31UL, 0x00114734711e2b54UL) /* 3793 */
+P( 4, 0x920908d50d6aba7dUL, 0x0011428b90147f05UL) /* 3797 */
+P( 6, 0x57d8b018c5a33d53UL, 0x00113b92f3021636UL) /* 3803 */
+P(18, 0xea1773092dc27ee5UL, 0x001126cabc886884UL) /* 3821 */
+P( 2, 0xcae5f38b7bf2e00fUL, 0x0011247eb1b85976UL) /* 3823 */
+P(10, 0x2bd02df34f695349UL, 0x0011190bb01efd65UL) /* 3833 */
+P(14, 0xddfecd5be62e2eb7UL, 0x0011091de0fd679cUL) /* 3847 */
+P( 4, 0xdbf849ebec96c4a3UL, 0x001104963c7e4e0bUL) /* 3851 */
+P( 2, 0xda31d4d0187357c5UL, 0x00110253516420b0UL) /* 3853 */
+P(10, 0xe34e21cc2d5418a7UL, 0x0010f70db7c41797UL) /* 3863 */
+P(14, 0x68ca5137a9e574adUL, 0x0010e75ee2bf9ecdUL) /* 3877 */
+P( 4, 0x3eaa0d0f804bfd19UL, 0x0010e2e91c6e0676UL) /* 3881 */
+P( 8, 0x554fb753cc20e9d1UL, 0x0010da049b9d428dUL) /* 3889 */
+P(18, 0x797afcca1300756bUL, 0x0010c6248fe3b1a2UL) /* 3907 */
+P( 4, 0x8b8d950b52eeea77UL, 0x0010c1c03ed690ebUL) /* 3911 */
+P( 6, 0xfb6cd166acabc185UL, 0x0010bb2e1379e3a2UL) /* 3917 */
+P( 2, 0x4eb6c5ed9437a7afUL, 0x0010b8fe7f61228eUL) /* 3919 */
+P( 4, 0xd1eddbd91b790cdbUL, 0x0010b4a10d60a4f7UL) /* 3923 */
+P( 6, 0x93d714ea4d8948e9UL, 0x0010ae192681ec0fUL) /* 3929 */
+P( 2, 0x3ca13ed8145188d3UL, 0x0010abecfbe5b0aeUL) /* 3931 */
+P(12, 0x829086016da89c57UL, 0x00109eefd568b96dUL) /* 3943 */
+P( 4, 0xd7da1f432124a543UL, 0x00109a9ff178b40cUL) /* 3947 */
+P(20, 0x7ead5581632fb07fUL, 0x00108531e22f9ff9UL) /* 3967 */
+P(22, 0x35443837f63ec3bdUL, 0x00106ddec1af4417UL) /* 3989 */
+
+#undef FIRST_OMITTED_PRIME
+#define FIRST_OMITTED_PRIME 4001
diff --git a/doc/Makefile.in b/doc/Makefile.in

index 1e92f65fc3e6cc9e04aeb023ee62d153974ad6be..145fa08a64adc3beb69fd1018ed94c511270674d 100644 (file)
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -32,6 +32,23 @@
  # You should have received a copy of the GNU Lesser General Public License
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -56,7 +73,7 @@ DIST_COMMON = $(gmp_TEXINFOS) $(srcdir)/Makefile.am \
         $(srcdir)/version.texi mdate-sh texinfo.tex
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -79,6 +96,11 @@ TEXI2PDF = $(TEXI2DVI) --pdf --batch
  MAKEINFOHTML = $(MAKEINFO) --html
  AM_MAKEINFOHTMLFLAGS = $(AM_MAKEINFOFLAGS)
  DVIPS = dvips
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  am__installdirs = "$(DESTDIR)$(infodir)"
  am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
  am__vpath_adj = case $$p in \
@@ -101,6 +123,12 @@ am__nobase_list = $(am__nobase_strip_setup); \
  am__base_list = \
    sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
    sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
  ABI = @ABI@
  ACLOCAL = @ACLOCAL@
@@ -200,8 +228,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -248,7 +276,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -398,9 +425,7 @@ uninstall-html-am:
  
  uninstall-info-am:
         @$(PRE_UNINSTALL)
-       @if test -d '$(DESTDIR)$(infodir)' && \
-           (install-info --version && \
-            install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \
+       @if test -d '$(DESTDIR)$(infodir)' && $(am__can_run_installinfo); then \
           list='$(INFO_DEPS)'; \
           for file in $$list; do \
             relfile=`echo "$$file" | sed 's|^.*/||'`; \
@@ -527,10 +552,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -568,8 +598,11 @@ install-dvi: install-dvi-am
  
  install-dvi-am: $(DVIS)
         @$(NORMAL_INSTALL)
-       test -z "$(dvidir)" || $(MKDIR_P) "$(DESTDIR)$(dvidir)"
         @list='$(DVIS)'; test -n "$(dvidir)" || list=; \
+       if test -n "$$list"; then \
+         echo " $(MKDIR_P) '$(DESTDIR)$(dvidir)'"; \
+         $(MKDIR_P) "$(DESTDIR)$(dvidir)" || exit 1; \
+       fi; \
         for p in $$list; do \
           if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
           echo "$$d$$p"; \
@@ -584,18 +617,22 @@ install-html: install-html-am
  
  install-html-am: $(HTMLS)
         @$(NORMAL_INSTALL)
-       test -z "$(htmldir)" || $(MKDIR_P) "$(DESTDIR)$(htmldir)"
         @list='$(HTMLS)'; list2=; test -n "$(htmldir)" || list=; \
+       if test -n "$$list"; then \
+         echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \
+         $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \
+       fi; \
         for p in $$list; do \
           if test -f "$$p" || test -d "$$p"; then d=; else d="$(srcdir)/"; fi; \
           $(am__strip_dir) \
-         if test -d "$$d$$p"; then \
+         d2=$$d$$p; \
+         if test -d "$$d2"; then \
             echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)/$$f'"; \
             $(MKDIR_P) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \
-           echo " $(INSTALL_DATA) '$$d$$p'/* '$(DESTDIR)$(htmldir)/$$f'"; \
-           $(INSTALL_DATA) "$$d$$p"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \
+           echo " $(INSTALL_DATA) '$$d2'/* '$(DESTDIR)$(htmldir)/$$f'"; \
+           $(INSTALL_DATA) "$$d2"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \
           else \
-           list2="$$list2 $$d$$p"; \
+           list2="$$list2 $$d2"; \
           fi; \
         done; \
         test -z "$$list2" || { echo "$$list2" | $(am__base_list) | \
@@ -607,9 +644,12 @@ install-info: install-info-am
  
  install-info-am: $(INFO_DEPS)
         @$(NORMAL_INSTALL)
-       test -z "$(infodir)" || $(MKDIR_P) "$(DESTDIR)$(infodir)"
         @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
         list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \
+       if test -n "$$list"; then \
+         echo " $(MKDIR_P) '$(DESTDIR)$(infodir)'"; \
+         $(MKDIR_P) "$(DESTDIR)$(infodir)" || exit 1; \
+       fi; \
         for file in $$list; do \
           case $$file in \
             $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
@@ -627,8 +667,7 @@ install-info-am: $(INFO_DEPS)
           echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(infodir)'"; \
           $(INSTALL_DATA) $$files "$(DESTDIR)$(infodir)" || exit $$?; done
         @$(POST_INSTALL)
-       @if (install-info --version && \
-            install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \
+       @if $(am__can_run_installinfo); then \
           list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \
           for file in $$list; do \
             relfile=`echo "$$file" | sed 's|^.*/||'`; \
@@ -642,8 +681,11 @@ install-pdf: install-pdf-am
  
  install-pdf-am: $(PDFS)
         @$(NORMAL_INSTALL)
-       test -z "$(pdfdir)" || $(MKDIR_P) "$(DESTDIR)$(pdfdir)"
         @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \
+       if test -n "$$list"; then \
+         echo " $(MKDIR_P) '$(DESTDIR)$(pdfdir)'"; \
+         $(MKDIR_P) "$(DESTDIR)$(pdfdir)" || exit 1; \
+       fi; \
         for p in $$list; do \
           if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
           echo "$$d$$p"; \
@@ -655,8 +697,11 @@ install-ps: install-ps-am
  
  install-ps-am: $(PSS)
         @$(NORMAL_INSTALL)
-       test -z "$(psdir)" || $(MKDIR_P) "$(DESTDIR)$(psdir)"
         @list='$(PSS)'; test -n "$(psdir)" || list=; \
+       if test -n "$$list"; then \
+         echo " $(MKDIR_P) '$(DESTDIR)$(psdir)'"; \
+         $(MKDIR_P) "$(DESTDIR)$(psdir)" || exit 1; \
+       fi; \
         for p in $$list; do \
           if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
           echo "$$d$$p"; \
diff --git a/doc/configuration b/doc/configuration

index b28e2177722e140e6ae64406cf0926882b059629..903067cc5af46df826deb0f86b0007794c01a575 100644 (file)
--- a/doc/configuration
+++ b/doc/configuration
@@ -121,14 +121,9 @@ To add a completely new mpn function file, do the following,
    iii) If HAVE_NATIVE_func is going to be used, then add a #undef to
         the AH_VERBATIM([HAVE_NATIVE] block in configure.in.
  
-  iv) Add file.c to nodist_libdummy_la_SOURCES in mpn/Makefile.am (in
-      order to get an ansi2knr rule).  If the file is only in
-      assembler then this step is unnecessary, but do it anyway so as
-      not to forget if later a .c version is added.
-
-  v) If the function can be provided by a multi-function file, then
-     add to the "case" statement in configure.in which lists each
-     multi-function filename and what function files it can provide.
+  iv) If the function can be provided by a multi-function file, then
+      add to the "case" statement in configure.in which lists each
+      multi-function filename and what function files it can provide.
  
  
  ** Adding a test program
@@ -245,10 +240,6 @@ INSTALL.autoconf can be copied from INSTALL in autoconf.
  ltmain.sh comes from libtool.  Remove it and run "libtoolize --copy",
  or just copy the file by hand.
  
-ansi2knr.c, ansi2knr.1, install-sh and doc/mdate-sh come from automake
-and can be updated by copying or by removing and running "automake
---add-missing --copy".
-
  texinfo.tex can be updated from ftp.gnu.org.  Check it still works
  with "make gmp.dvi", "make gmp.ps" and "make gmp.pdf".
  
@@ -332,10 +323,6 @@ errors rather than mysterious failures from a mismatch.
  --disable-shared will make builds go much faster, though of course
  shared or shared+static should be tested too.
  
---enable-mpbsd grabs various bits of mpz, which might need to be
-adjusted if things in those routines are changed.  Building mpbsd all
-the time doesn't cost much.
-
  --prefix to a dummy directory followed by "make install" will show
  what's installed.
  
@@ -367,30 +354,6 @@ they're the same size, which is unfortunate because casts should be
  used in such cases, for the benefit of K&R compilers with int!=long
  and where the difference matters in function calls.
  
-** K&R support
-
-Function definitions must be in the GNU stylized form to work.  See
-the ansi2knr.1 man page (included in the GMP sources).
-
-__GMP_PROTO is used for function prototypes, other ANSI / K&R
-differences are conditionalized in various places.
-
-Proper testing of the K&R support requires a compiler which gives an
-error for ANSI-isms.  Configuring with --host=none is a good idea, to
-test all the generic C code.
-
-When using an ANSI compiler, the ansi2knr setups can be partially
-tested with
-
-       ./configure am_cv_prog_cc_stdc=no ac_cv_prog_cc_stdc=no
-
-This will test the use of $U and the like in the makefiles, but not
-much else.
-
-Forcing the cache variables can be used with a compiler like HP C
-which is K&R by default but to which configure normally adds ANSI mode
-flags.  This then should be a good full K&R test.
-
  * Other Notes
  
  ** Compatibility
@@ -403,12 +366,6 @@ struct __mpz_struct etc - this must be retained for C++ compatibility.
      will get this in the mangled name because C++ "sees though" the
      typedef mpz_t to the underlying struct.
  
-    Incidentally, this probably means for C++ that our mp.h is not
-    compatible with an original BSD mp.h, since we use struct
-    __mpz_struct for MINT in ours.  Maybe we could change to whatever
-    the original did, but it seems unlikely anyone would be using C++
-    with mp.h.
-
  __gmpn - note that glibc defines some __mpn symbols, old versions of
      some mpn routines, which it uses for floating point printfs.
  
diff --git a/doc/gmp.info b/doc/gmp.info

index d11b20b2800d856eef6d6df03437f16c5855d844..38ad326e1f4dffb865a057786b122c910bbe26b0 100644 (file)
--- a/doc/gmp.info
+++ b/doc/gmp.info
@@ -2,11 +2,11 @@ This is ../../gmp/doc/gmp.info, produced by makeinfo version 4.13 from
  ../../gmp/doc/gmp.texi.
  
  This manual describes how to install and use the GNU multiple precision
-arithmetic library, version 5.0.5.
+arithmetic library, version 5.1.3.
  
     Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-Free Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+2013 Free Software Foundation, Inc.
  
     Permission is granted to copy, distribute and/or modify this
  document under the terms of the GNU Free Documentation License, Version
@@ -23,158 +23,157 @@ END-INFO-DIR-ENTRY
  
  \1f
  Indirect:
-gmp.info-1: 991
-gmp.info-2: 299801
+gmp.info-1: 997
+gmp.info-2: 300733
  \1f
  Tag Table:
  (Indirect)
-Node: Top\7f991
-Node: Copying\7f3233
-Node: Introduction to GMP\7f5084
-Node: Installing GMP\7f7795
-Node: Build Options\7f8527
-Node: ABI and ISA\7f24620
-Node: Notes for Package Builds\7f34306
-Node: Notes for Particular Systems\7f37393
-Node: Known Build Problems\7f43950
-Node: Performance optimization\7f47484
-Node: GMP Basics\7f48613
-Node: Headers and Libraries\7f49261
-Node: Nomenclature and Types\7f50685
-Node: Function Classes\7f52681
-Node: Variable Conventions\7f54374
-Node: Parameter Conventions\7f55983
-Node: Memory Management\7f58039
-Node: Reentrancy\7f59167
-Node: Useful Macros and Constants\7f61040
-Node: Compatibility with older versions\7f62038
-Node: Demonstration Programs\7f62999
-Node: Efficiency\7f64864
-Node: Debugging\7f72488
-Node: Profiling\7f79386
-Node: Autoconf\7f83377
-Node: Emacs\7f85156
-Node: Reporting Bugs\7f85762
-Node: Integer Functions\7f88305
-Node: Initializing Integers\7f89081
-Node: Assigning Integers\7f91228
-Node: Simultaneous Integer Init & Assign\7f92815
-Node: Converting Integers\7f94440
-Node: Integer Arithmetic\7f97364
-Node: Integer Division\7f98950
-Node: Integer Exponentiation\7f105260
-Node: Integer Roots\7f106700
-Node: Number Theoretic Functions\7f108374
-Node: Integer Comparisons\7f115063
-Node: Integer Logic and Bit Fiddling\7f116441
-Node: I/O of Integers\7f118988
-Node: Integer Random Numbers\7f121957
-Node: Integer Import and Export\7f124568
-Node: Miscellaneous Integer Functions\7f128578
-Node: Integer Special Functions\7f130438
-Node: Rational Number Functions\7f133525
-Node: Initializing Rationals\7f134718
-Node: Rational Conversions\7f137179
-Node: Rational Arithmetic\7f138911
-Node: Comparing Rationals\7f140215
-Node: Applying Integer Functions\7f141582
-Node: I/O of Rationals\7f143065
-Node: Floating-point Functions\7f145107
-Node: Initializing Floats\7f147992
-Node: Assigning Floats\7f152079
-Node: Simultaneous Float Init & Assign\7f154646
-Node: Converting Floats\7f156174
-Node: Float Arithmetic\7f159424
-Node: Float Comparison\7f161437
-Node: I/O of Floats\7f163018
-Node: Miscellaneous Float Functions\7f165701
-Node: Low-level Functions\7f167643
-Node: Random Number Functions\7f192196
-Node: Random State Initialization\7f193264
-Node: Random State Seeding\7f196123
-Node: Random State Miscellaneous\7f197512
-Node: Formatted Output\7f198154
-Node: Formatted Output Strings\7f198399
-Node: Formatted Output Functions\7f203778
-Node: C++ Formatted Output\7f207853
-Node: Formatted Input\7f210535
-Node: Formatted Input Strings\7f210771
-Node: Formatted Input Functions\7f215423
-Node: C++ Formatted Input\7f218392
-Node: C++ Class Interface\7f220295
-Node: C++ Interface General\7f221296
-Node: C++ Interface Integers\7f224366
-Node: C++ Interface Rationals\7f227711
-Node: C++ Interface Floats\7f231298
-Node: C++ Interface Random Numbers\7f237000
-Node: C++ Interface Limitations\7f239404
-Node: BSD Compatible Functions\7f242224
-Node: Custom Allocation\7f246935
-Node: Language Bindings\7f251295
-Node: Algorithms\7f255250
-Node: Multiplication Algorithms\7f255950
-Node: Basecase Multiplication\7f257039
-Node: Karatsuba Multiplication\7f258947
-Node: Toom 3-Way Multiplication\7f262573
-Node: Toom 4-Way Multiplication\7f268992
-Node: Higher degree Toom'n'half\7f270371
-Node: FFT Multiplication\7f271656
-Node: Other Multiplication\7f276991
-Node: Unbalanced Multiplication\7f279465
-Node: Division Algorithms\7f280253
-Node: Single Limb Division\7f280632
-Node: Basecase Division\7f283522
-Node: Divide and Conquer Division\7f284725
-Node: Block-Wise Barrett Division\7f286794
-Node: Exact Division\7f287446
-Node: Exact Remainder\7f290611
-Node: Small Quotient Division\7f292838
-Node: Greatest Common Divisor Algorithms\7f294436
-Node: Binary GCD\7f294733
-Node: Lehmer's Algorithm\7f297582
-Node: Subquadratic GCD\7f299801
-Node: Extended GCD\7f302258
-Node: Jacobi Symbol\7f303570
-Node: Powering Algorithms\7f304486
-Node: Normal Powering Algorithm\7f304749
-Node: Modular Powering Algorithm\7f305277
-Node: Root Extraction Algorithms\7f306057
-Node: Square Root Algorithm\7f306372
-Node: Nth Root Algorithm\7f308513
-Node: Perfect Square Algorithm\7f309298
-Node: Perfect Power Algorithm\7f311385
-Node: Radix Conversion Algorithms\7f312006
-Node: Binary to Radix\7f312382
-Node: Radix to Binary\7f316312
-Node: Other Algorithms\7f318400
-Node: Prime Testing Algorithm\7f318752
-Node: Factorial Algorithm\7f319936
-Node: Binomial Coefficients Algorithm\7f321339
-Node: Fibonacci Numbers Algorithm\7f322233
-Node: Lucas Numbers Algorithm\7f324707
-Node: Random Number Algorithms\7f325428
-Node: Assembly Coding\7f327549
-Node: Assembly Code Organisation\7f328509
-Node: Assembly Basics\7f329476
-Node: Assembly Carry Propagation\7f330626
-Node: Assembly Cache Handling\7f332457
-Node: Assembly Functional Units\7f334618
-Node: Assembly Floating Point\7f336231
-Node: Assembly SIMD Instructions\7f340009
-Node: Assembly Software Pipelining\7f340991
-Node: Assembly Loop Unrolling\7f342053
-Node: Assembly Writing Guide\7f344268
-Node: Internals\7f347033
-Node: Integer Internals\7f347545
-Node: Rational Internals\7f349801
-Node: Float Internals\7f351039
-Node: Raw Output Internals\7f358453
-Node: C++ Interface Internals\7f359647
-Node: Contributors\7f362933
-Node: References\7f367884
-Node: GNU Free Documentation License\7f373639
-Node: Concept Index\7f398808
-Node: Function Index\7f445058
+Node: Top\7f997
+Node: Copying\7f3183
+Node: Introduction to GMP\7f5034
+Node: Installing GMP\7f7745
+Node: Build Options\7f8477
+Node: ABI and ISA\7f24261
+Node: Notes for Package Builds\7f33832
+Node: Notes for Particular Systems\7f36919
+Node: Known Build Problems\7f43516
+Node: Performance optimization\7f47050
+Node: GMP Basics\7f48179
+Node: Headers and Libraries\7f48827
+Node: Nomenclature and Types\7f50251
+Node: Function Classes\7f52247
+Node: Variable Conventions\7f53781
+Node: Parameter Conventions\7f55390
+Node: Memory Management\7f57446
+Node: Reentrancy\7f58574
+Node: Useful Macros and Constants\7f60447
+Node: Compatibility with older versions\7f61438
+Node: Demonstration Programs\7f62349
+Node: Efficiency\7f64214
+Node: Debugging\7f71838
+Node: Profiling\7f78863
+Node: Autoconf\7f82854
+Node: Emacs\7f84633
+Node: Reporting Bugs\7f85239
+Node: Integer Functions\7f87865
+Node: Initializing Integers\7f88641
+Node: Assigning Integers\7f91017
+Node: Simultaneous Integer Init & Assign\7f92628
+Node: Converting Integers\7f94275
+Node: Integer Arithmetic\7f97239
+Node: Integer Division\7f98975
+Node: Integer Exponentiation\7f105727
+Node: Integer Roots\7f107221
+Node: Number Theoretic Functions\7f108941
+Node: Integer Comparisons\7f116416
+Node: Integer Logic and Bit Fiddling\7f117854
+Node: I/O of Integers\7f120499
+Node: Integer Random Numbers\7f123490
+Node: Integer Import and Export\7f126107
+Node: Miscellaneous Integer Functions\7f130123
+Node: Integer Special Functions\7f132037
+Node: Rational Number Functions\7f135136
+Node: Initializing Rationals\7f136329
+Node: Rational Conversions\7f138808
+Node: Rational Arithmetic\7f140558
+Node: Comparing Rationals\7f141970
+Node: Applying Integer Functions\7f143378
+Node: I/O of Rationals\7f144897
+Node: Floating-point Functions\7f146955
+Node: Initializing Floats\7f149840
+Node: Assigning Floats\7f153933
+Node: Simultaneous Float Init & Assign\7f156524
+Node: Converting Floats\7f158074
+Node: Float Arithmetic\7f161364
+Node: Float Comparison\7f163517
+Node: I/O of Floats\7f165177
+Node: Miscellaneous Float Functions\7f167866
+Node: Low-level Functions\7f169868
+Node: Random Number Functions\7f194385
+Node: Random State Initialization\7f195453
+Node: Random State Seeding\7f198318
+Node: Random State Miscellaneous\7f199723
+Node: Formatted Output\7f200365
+Node: Formatted Output Strings\7f200610
+Node: Formatted Output Functions\7f205989
+Node: C++ Formatted Output\7f210064
+Node: Formatted Input\7f212746
+Node: Formatted Input Strings\7f212982
+Node: Formatted Input Functions\7f217634
+Node: C++ Formatted Input\7f220603
+Node: C++ Class Interface\7f222506
+Node: C++ Interface General\7f223500
+Node: C++ Interface Integers\7f226570
+Node: C++ Interface Rationals\7f230285
+Node: C++ Interface Floats\7f234302
+Node: C++ Interface Random Numbers\7f240306
+Node: C++ Interface Limitations\7f242708
+Node: Custom Allocation\7f245528
+Node: Language Bindings\7f249747
+Node: Algorithms\7f253702
+Node: Multiplication Algorithms\7f254402
+Node: Basecase Multiplication\7f255491
+Node: Karatsuba Multiplication\7f257399
+Node: Toom 3-Way Multiplication\7f261025
+Node: Toom 4-Way Multiplication\7f267444
+Node: Higher degree Toom'n'half\7f268823
+Node: FFT Multiplication\7f270108
+Node: Other Multiplication\7f275443
+Node: Unbalanced Multiplication\7f277917
+Node: Division Algorithms\7f278705
+Node: Single Limb Division\7f279084
+Node: Basecase Division\7f281974
+Node: Divide and Conquer Division\7f283177
+Node: Block-Wise Barrett Division\7f285246
+Node: Exact Division\7f285898
+Node: Exact Remainder\7f289063
+Node: Small Quotient Division\7f291313
+Node: Greatest Common Divisor Algorithms\7f292911
+Node: Binary GCD\7f293208
+Node: Lehmer's Algorithm\7f296057
+Node: Subquadratic GCD\7f298276
+Node: Extended GCD\7f300733
+Node: Jacobi Symbol\7f302045
+Node: Powering Algorithms\7f303060
+Node: Normal Powering Algorithm\7f303323
+Node: Modular Powering Algorithm\7f303851
+Node: Root Extraction Algorithms\7f304633
+Node: Square Root Algorithm\7f304948
+Node: Nth Root Algorithm\7f307089
+Node: Perfect Square Algorithm\7f307874
+Node: Perfect Power Algorithm\7f309961
+Node: Radix Conversion Algorithms\7f310582
+Node: Binary to Radix\7f310958
+Node: Radix to Binary\7f314888
+Node: Other Algorithms\7f316976
+Node: Prime Testing Algorithm\7f317328
+Node: Factorial Algorithm\7f318512
+Node: Binomial Coefficients Algorithm\7f320902
+Node: Fibonacci Numbers Algorithm\7f321796
+Node: Lucas Numbers Algorithm\7f324270
+Node: Random Number Algorithms\7f324991
+Node: Assembly Coding\7f327113
+Node: Assembly Code Organisation\7f328073
+Node: Assembly Basics\7f329040
+Node: Assembly Carry Propagation\7f330190
+Node: Assembly Cache Handling\7f332021
+Node: Assembly Functional Units\7f334182
+Node: Assembly Floating Point\7f335795
+Node: Assembly SIMD Instructions\7f339573
+Node: Assembly Software Pipelining\7f340555
+Node: Assembly Loop Unrolling\7f341617
+Node: Assembly Writing Guide\7f343832
+Node: Internals\7f346597
+Node: Integer Internals\7f347109
+Node: Rational Internals\7f349365
+Node: Float Internals\7f350603
+Node: Raw Output Internals\7f358017
+Node: C++ Interface Internals\7f359211
+Node: Contributors\7f362497
+Node: References\7f368247
+Node: GNU Free Documentation License\7f374002
+Node: Concept Index\7f399171
+Node: Function Index\7f445060
  \1f
  End Tag Table
  
diff --git a/doc/gmp.info-1 b/doc/gmp.info-1

index e8d7b72cef0c4fe9fba3aaf680262f10b7ebffc3..416c1d4b79bf0bc81bf7993b576f9fa1afdb45f7 100644 (file)
--- a/doc/gmp.info-1
+++ b/doc/gmp.info-1
@@ -2,11 +2,11 @@ This is ../../gmp/doc/gmp.info, produced by makeinfo version 4.13 from
  ../../gmp/doc/gmp.texi.
  
  This manual describes how to install and use the GNU multiple precision
-arithmetic library, version 5.0.5.
+arithmetic library, version 5.1.3.
  
     Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-Free Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+2013 Free Software Foundation, Inc.
  
     Permission is granted to copy, distribute and/or modify this
  document under the terms of the GNU Free Documentation License, Version
@@ -28,11 +28,11 @@ GNU MP
  ******
  
     This manual describes how to install and use the GNU multiple
-precision arithmetic library, version 5.0.5.
+precision arithmetic library, version 5.1.3.
  
     Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-Free Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+2013 Free Software Foundation, Inc.
  
     Permission is granted to copy, distribute and/or modify this
  document under the terms of the GNU Free Documentation License, Version
@@ -58,7 +58,6 @@ is included in *note GNU Free Documentation License::.
  * Formatted Output::           `printf' style output.
  * Formatted Input::            `scanf' style input.
  * C++ Class Interface::        Class wrappers around GMP types.
-* BSD Compatible Functions::   All functions found in BSD MP.
  * Custom Allocation::          How to customize the internal allocation.
  * Language Bindings::          Using GMP from other languages.
  * Algorithms::                 What happens behind the scenes.
@@ -230,7 +229,7 @@ Build Directory
       directory.  For example
  
            cd /my/build/dir
-          /my/sources/gmp-5.0.5/configure
+          /my/sources/gmp-5.1.3/configure
  
       Not all `make' programs have the necessary features (`VPATH') to
       support this.  In particular, SunOS and Slowaris `make' have bugs
@@ -321,7 +320,7 @@ CPU types
       The best idea is always to build GMP for the exact machine type
       you intend to run it on.
  
-     The following CPUs have specific support.  See `configure.in' for
+     The following CPUs have specific support.  See `configure.ac' for
       details of what code and compiler options they select.
  
          * Alpha: alpha, alphaev5, alphaev56, alphapca56, alphapca57,
@@ -360,10 +359,8 @@ CPU types
  
  Generic C Build
       If some of the assembly code causes problems, or if otherwise
-     desired, the generic C code can be selected with CPU `none'.  For
-     example,
-
-          ./configure --host=none-unknown-freebsd3.5
+     desired, the generic C code can be selected with the configure
+     `--disable-assembly'.
  
       Note that this will run quite slowly, but it should be portable
       and should at least make it possible to get something running if
@@ -419,8 +416,7 @@ Fat binary, `--enable-fat'
       Compiling is done with both `CPPFLAGS' and `CFLAGS', but
       preprocessing uses just `CPPFLAGS'.  This distinction is because
       most preprocessors won't accept all the flags the compiler does.
-     Preprocessing is done separately in some configure tests, and in
-     the `ansi2knr' support for K&R compilers.
+     Preprocessing is done separately in some configure tests.
  
  `CC_FOR_BUILD'
       Some build-time programs are compiled and run to generate
@@ -528,11 +524,6 @@ FFT Multiplication, `--disable-fft'
       to very large operands and can be disabled to save code size if
       desired.
  
-Berkeley MP, `--enable-mpbsd'
-     The Berkeley MP compatibility library (`libmp') and header file
-     (`mp.h') are built and installed only if `--enable-mpbsd' is used.
-     *Note BSD Compatible Functions::.
-
  Assertion Checking, `--enable-assert'
       This option enables some consistency checking within the library.
       This can be of use while debugging, *note Debugging::.
@@ -736,7 +727,7 @@ MIPS under IRIX 6 (`mips*-*-irix[6789]')
  
  PowerPC 64 (`powerpc64', `powerpc620', `powerpc630', `powerpc970', `power4', `power5')
  
-    `ABI=aix64'
+    `ABI=mode64'
            The AIX 64 ABI uses 64-bit limbs and pointers and is the
            default on PowerPC 64 `*-*-aix*' systems.  Applications must
            be compiled with
@@ -744,10 +735,8 @@ PowerPC 64 (`powerpc64', `powerpc620', `powerpc630', `powerpc970', `power4', `po
                 gcc  -maix64
                 xlc  -q64
  
-    `ABI=mode64'
-          The `mode64' ABI uses 64-bit limbs and pointers, and is the
-          default on 64-bit GNU/Linux, BSD, and Mac OS X/Darwin
-          systems.  Applications must be compiled with
+          On 64-bit GNU/Linux, BSD, and Mac OS X/Darwin systems, the
+          applications must be compiled with
  
                 gcc  -m64
  
@@ -756,16 +745,16 @@ PowerPC 64 (`powerpc64', `powerpc620', `powerpc630', `powerpc970', `power4', `po
            chip still in 32-bit mode and using 32-bit calling
            conventions.  This is the default for systems where the true
            64-bit ABI is unavailable.  No special compiler options are
-          typically needed for applications.
+          typically needed for applications.  This ABI is not available
+          under AIX.
  
      `ABI=32'
            This is the basic 32-bit PowerPC ABI, with a 32-bit limb.  No
            special compiler options are needed for applications.
  
-     GMP's speed is greatest for `aix64' and `mode64'.  In `ABI=32'
-     only the 32-bit ISA is used and this doesn't make full use of a
-     64-bit chip.  On a suitable system we could perhaps use more of
-     the ISA, but there are no plans to do so.
+     GMP's speed is greatest for the `mode64' ABI, the `mode32' ABI is
+     2nd best.  In `ABI=32' only the 32-bit ISA is used and this
+     doesn't make full use of a 64-bit chip.
  
  
  Sparc V9 (`sparc64', `sparcv9', `ultrasparc*')
@@ -980,10 +969,11 @@ Power CPU Types
       choose the right one for the CPU that will be used.  Currently GMP
       has no assembly code support for using just the common instruction
       subset.  To get executables that run on both, the current
-     suggestion is to use the generic C code (CPU `none'), possibly
-     with appropriate compiler options (like `-mcpu=common' for `gcc').
-     CPU `rs6000' (which is not a CPU but a family of workstations) is
-     accepted by `config.sub', but is currently equivalent to `none'.
+     suggestion is to use the generic C code (`--disable-assembly'),
+     possibly with appropriate compiler options (like `-mcpu=common' for
+     `gcc').  CPU `rs6000' (which is not a CPU but a family of
+     workstations) is accepted by `config.sub', but is currently
+     equivalent to `--disable-assembly'.
  
  Sparc CPU Types
       `sparcv8' or `supersparc' on relevant systems will give a
@@ -1000,7 +990,7 @@ Sparc App Regs
       `-mcmodel=embmedany' (which uses `g4' as a data segment pointer),
       and for applications wanting to use those registers for special
       purposes.  In these cases the only suggestion currently is to
-     build GMP with CPU `none' to avoid the assembly code.
+     build GMP with `--disable-assembly' to avoid the assembly code.
  
  SunOS 4
       `/usr/bin/m4' lacks various features needed to process `.asm'
@@ -1288,18 +1278,14 @@ There are six classes of functions in the GMP library:
       `mpf_'.  The associated type is `mpf_t'.  There are about 60
       functions is this class.  (*note Floating-point Functions::)
  
-  4. Functions compatible with Berkeley MP, such as `itom', `madd', and
-     `mult'.  The associated type is `MINT'.  (*note BSD Compatible
-     Functions::)
-
-  5. Fast low-level functions that operate on natural numbers.  These
+  4. Fast low-level functions that operate on natural numbers.  These
       are used by the functions in the preceding groups, and you can
       also call them directly from very time-critical user programs.
       These functions' names begin with `mpn_'.  The associated type is
       array of `mp_limb_t'.  There are about 30 (hard-to-use) functions
       in this class.  (*note Low-level Functions::)
  
-  6. Miscellaneous functions.  Functions for setting up custom
+  5. Miscellaneous functions.  Functions for setting up custom
       allocation and functions for generating random numbers.  (*note
       Custom Allocation::, and *note Random Number Functions::)
  
@@ -1492,8 +1478,8 @@ File: gmp.info,  Node: Useful Macros and Constants,  Next: Compatibility with ol
  
   -- Global Constant: const char * const gmp_version
       The GMP version number, as a null-terminated string, in the form
-     "i.j.k".  This release is "5.0.5".  Note that the format "i.j" was
-     used when k was zero was used before version 4.3.0.
+     "i.j.k".  This release is "5.1.3".  Note that the format "i.j" was
+     used, before version 4.3.0, when k was zero.
  
   -- Macro: __GMP_CC
   -- Macro: __GMP_CFLAGS
@@ -1506,9 +1492,9 @@ File: gmp.info,  Node: Compatibility with older versions,  Next: Demonstration P
  3.9 Compatibility with older versions
  =====================================
  
-This version of GMP is upwardly binary compatible with all 4.x and 3.x
-versions, and upwardly compatible at the source level with all 2.x
-versions, with the following exceptions.
+This version of GMP is upwardly binary compatible with all 5.x, 4.x,
+and 3.x versions, and upwardly compatible at the source level with all
+2.x versions, with the following exceptions.
  
     * `mpn_gcd' had its source arguments swapped as of GMP 3.0, for
       consistency with other `mpn' functions.
@@ -1516,12 +1502,12 @@ versions, with the following exceptions.
     * `mpf_get_prec' counted precision slightly differently in GMP 3.0
       and 3.0.1, but in 3.1 reverted to the 2.x style.
  
+   * `mpn_bdivmod', documented as preliminary in GMP 4, has been
+     removed.
+
     There are a number of compatibility issues between GMP 1 and GMP 2
  that of course also apply when porting applications from GMP 1 to GMP
-4.  Please see the GMP 2 manual for details.
-
-   The Berkeley MP compatibility library (*note BSD Compatible
-Functions::) is source and binary compatible with the standard `libmp'.
+5.  Please see the GMP 2 manual for details.
  
  \1f
  File: gmp.info,  Node: Demonstration Programs,  Next: Efficiency,  Prev: Compatibility with older versions,  Up: GMP Basics
@@ -1808,7 +1794,7 @@ Source File Paths
       path to the source directory.
  
            cd /my/build/dir
-          /my/source/dir/gmp-5.0.5/configure
+          /my/source/dir/gmp-5.1.3/configure
  
       This works via `VPATH', and might require GNU `make'.  Alternately
       it might be possible to change the `.c.lo' rules appropriately.
@@ -1824,8 +1810,8 @@ Assertion Checking
       benefit from `--enable-assert' since it adds checks on the
       parameters of most such functions, many of which have subtle
       restrictions on their usage.  Note however that only the generic C
-     code has checks, not the assembly code, so CPU `none' should be
-     used for maximum checking.
+     code has checks, not the assembly code, so `--disable-assembly'
+     should be used for maximum checking.
  
  Temporary Memory Checking
       The build option `--enable-alloca=debug' arranges that each block
@@ -1845,7 +1831,7 @@ Maximum Debuggability
       would be
  
            ./configure --disable-shared --enable-assert \
-            --enable-alloca=debug --host=none CFLAGS=-g
+            --enable-alloca=debug --disable-assembly CFLAGS=-g
  
       For C++, add `--enable-cxx CXXFLAGS=-g'.
  
@@ -1857,29 +1843,31 @@ Checker
       A build of GMP with checking within GMP itself can be made.  This
       will run very very slowly.  On GNU/Linux for example,
  
-          ./configure --host=none-pc-linux-gnu CC=checkergcc
+          ./configure --disable-assembly CC=checkergcc
  
-     `--host=none' must be used, since the GMP assembly code doesn't
-     support the checking scheme.  The GMP C++ features cannot be used,
-     since current versions of checker (0.9.9.1) don't yet support the
-     standard C++ library.
+     `--disable-assembly' must be used, since the GMP assembly code
+     doesn't support the checking scheme.  The GMP C++ features cannot
+     be used, since current versions of checker (0.9.9.1) don't yet
+     support the standard C++ library.
  
  Valgrind
-     The valgrind program (`http://valgrind.org/') is a memory checker
-     for x86s.  It translates and emulates machine instructions to do
-     strong checks for uninitialized data (at the level of individual
-     bits), memory accesses through bad pointers, and memory leaks.
+     Valgrind (`http://valgrind.org/') is a memory checker for x86,
+     ARM, MIPS, PowerPC, and S/390.  It translates and emulates machine
+     instructions to do strong checks for uninitialized data (at the
+     level of individual bits), memory accesses through bad pointers,
+     and memory leaks.
  
-     Recent versions of Valgrind are getting support for MMX and
-     SSE/SSE2 instructions, for past versions GMP will need to be
-     configured not to use those, i.e. for an x86 without them (for
-     instance plain `i486').
+     Valgrind does not always support every possible instruction, in
+     particular ones recently added to an ISA.  Valgrind might
+     therefore be incompatible with a recent GMP or even a less recent
+     GMP which is compiled using a recent GCC.
  
       GMP's assembly code sometimes promotes a read of the limbs to some
       larger size, for efficiency.  GMP will do this even at the start
-     and end of a multilimb operand, using naturaly aligned operations
+     and end of a multilimb operand, using naturally aligned operations
       on the larger type.  This may lead to benign reads outside of
-     allocated areas, triggering complants from Valgrind.
+     allocated areas, triggering complaints from Valgrind.  Valgrind's
+     option `--partial-loads-ok=yes' should help.
  
  Other Problems
       Any suspected bug in GMP itself should be isolated to make sure
@@ -2073,7 +2061,10 @@ for this release.
  
     * Please do not send core dumps, executables or `strace's.
  
-   * The configuration options you used when building GMP, if any.
+   * The `configure' options you used when building GMP, if any.
+
+   * The output from `configure', as printed to stdout, with any
+     options used.
  
     * The name of the compiler and its version.  For `gcc', get the
       version with `gcc -v', otherwise perhaps `what `which cc`', or
@@ -2178,11 +2169,16 @@ object is initialized.
       necessary; reallocation is handled automatically by GMP when
       needed.
  
-     N is only the initial space, X will grow automatically in the
+     While N defines the initial space, X will grow automatically in the
       normal way, if necessary, for subsequent values stored.
       `mpz_init2' makes it possible to avoid such reallocations if a
       maximum size is known in advance.
  
+     In preparation for an operation, GMP often allocates one limb more
+     than ultimately needed.  To make sure GMP will not perform
+     reallocation for X, you need to add the number of bits in
+     `mp_limb_t' to N.
+
   -- Function: void mpz_clear (mpz_t X)
       Free the space occupied by X.  Call this function for all `mpz_t'
       variables when you are done with them.
@@ -2210,18 +2206,18 @@ File: gmp.info,  Node: Assigning Integers,  Next: Simultaneous Integer Init & As
  These functions assign new values to already initialized integers
  (*note Initializing Integers::).
  
- -- Function: void mpz_set (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_set (mpz_t ROP, const mpz_t OP)
   -- Function: void mpz_set_ui (mpz_t ROP, unsigned long int OP)
   -- Function: void mpz_set_si (mpz_t ROP, signed long int OP)
   -- Function: void mpz_set_d (mpz_t ROP, double OP)
- -- Function: void mpz_set_q (mpz_t ROP, mpq_t OP)
- -- Function: void mpz_set_f (mpz_t ROP, mpf_t OP)
+ -- Function: void mpz_set_q (mpz_t ROP, const mpq_t OP)
+ -- Function: void mpz_set_f (mpz_t ROP, const mpf_t OP)
       Set the value of ROP from OP.
  
       `mpz_set_d', `mpz_set_q' and `mpz_set_f' truncate OP to make it an
       integer.
  
- -- Function: int mpz_set_str (mpz_t ROP, char *STR, int BASE)
+ -- Function: int mpz_set_str (mpz_t ROP, const char *STR, int BASE)
       Set the value of ROP from STR, a null-terminated C string in base
       BASE.  White space is allowed in the string, and is simply ignored.
  
@@ -2266,14 +2262,15 @@ functions, it can be used as the source or destination operand for the
  ordinary integer functions.  Don't use an initialize-and-set function
  on a variable already initialized!
  
- -- Function: void mpz_init_set (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_init_set (mpz_t ROP, const mpz_t OP)
   -- Function: void mpz_init_set_ui (mpz_t ROP, unsigned long int OP)
   -- Function: void mpz_init_set_si (mpz_t ROP, signed long int OP)
   -- Function: void mpz_init_set_d (mpz_t ROP, double OP)
       Initialize ROP with limb space and set the initial numeric value
       from OP.
  
- -- Function: int mpz_init_set_str (mpz_t ROP, char *STR, int BASE)
+ -- Function: int mpz_init_set_str (mpz_t ROP, const char *STR, int
+          BASE)
       Initialize ROP and set its value like `mpz_set_str' (see its
       documentation above for details).
  
@@ -2291,14 +2288,14 @@ This section describes functions for converting GMP integers to
  standard C types.  Functions for converting _to_ GMP integers are
  described in *note Assigning Integers:: and *note I/O of Integers::.
  
- -- Function: unsigned long int mpz_get_ui (mpz_t OP)
+ -- Function: unsigned long int mpz_get_ui (const mpz_t OP)
       Return the value of OP as an `unsigned long'.
  
       If OP is too big to fit an `unsigned long' then just the least
       significant bits that do fit are returned.  The sign of OP is
       ignored, only the absolute value is used.
  
- -- Function: signed long int mpz_get_si (mpz_t OP)
+ -- Function: signed long int mpz_get_si (const mpz_t OP)
       If OP fits into a `signed long int' return the value of OP.
       Otherwise return the least significant part of OP, with the same
       sign as OP.
@@ -2307,7 +2304,7 @@ described in *note Assigning Integers:: and *note I/O of Integers::.
       result is probably not very useful.  To find out if the value will
       fit, use the function `mpz_fits_slong_p'.
  
- -- Function: double mpz_get_d (mpz_t OP)
+ -- Function: double mpz_get_d (const mpz_t OP)
       Convert OP to a `double', truncating if necessary (i.e. rounding
       towards zero).
  
@@ -2315,7 +2312,8 @@ described in *note Assigning Integers:: and *note I/O of Integers::.
       system dependent.  An infinity is returned where available.  A
       hardware overflow trap may or may not occur.
  
- -- Function: double mpz_get_d_2exp (signed long int *EXP, mpz_t OP)
+ -- Function: double mpz_get_d_2exp (signed long int *EXP, const mpz_t
+          OP)
       Convert OP to a `double', truncating if necessary (i.e. rounding
       towards zero), and returning the exponent separately.
  
@@ -2326,7 +2324,7 @@ described in *note Assigning Integers:: and *note I/O of Integers::.
       This is similar to the standard C `frexp' function (*note
       Normalization Functions: (libc)Normalization Functions.).
  
- -- Function: char * mpz_get_str (char *STR, int BASE, mpz_t OP)
+ -- Function: char * mpz_get_str (char *STR, int BASE, const mpz_t OP)
       Convert OP to a string of digits in base BASE.  The base argument
       may vary from 2 to 62 or from -2 to -36.
  
@@ -2354,42 +2352,45 @@ File: gmp.info,  Node: Integer Arithmetic,  Next: Integer Division,  Prev: Conve
  5.5 Arithmetic Functions
  ========================
  
- -- Function: void mpz_add (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_add_ui (mpz_t ROP, mpz_t OP1, unsigned long int
-          OP2)
+ -- Function: void mpz_add (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
+ -- Function: void mpz_add_ui (mpz_t ROP, const mpz_t OP1, unsigned
+          long int OP2)
       Set ROP to OP1 + OP2.
  
- -- Function: void mpz_sub (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_sub_ui (mpz_t ROP, mpz_t OP1, unsigned long int
-          OP2)
- -- Function: void mpz_ui_sub (mpz_t ROP, unsigned long int OP1, mpz_t
-          OP2)
+ -- Function: void mpz_sub (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
+ -- Function: void mpz_sub_ui (mpz_t ROP, const mpz_t OP1, unsigned
+          long int OP2)
+ -- Function: void mpz_ui_sub (mpz_t ROP, unsigned long int OP1, const
+          mpz_t OP2)
       Set ROP to OP1 - OP2.
  
- -- Function: void mpz_mul (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_mul_si (mpz_t ROP, mpz_t OP1, long int OP2)
- -- Function: void mpz_mul_ui (mpz_t ROP, mpz_t OP1, unsigned long int
-          OP2)
+ -- Function: void mpz_mul (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
+ -- Function: void mpz_mul_si (mpz_t ROP, const mpz_t OP1, long int OP2)
+ -- Function: void mpz_mul_ui (mpz_t ROP, const mpz_t OP1, unsigned
+          long int OP2)
       Set ROP to OP1 times OP2.
  
- -- Function: void mpz_addmul (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_addmul_ui (mpz_t ROP, mpz_t OP1, unsigned long
-          int OP2)
+ -- Function: void mpz_addmul (mpz_t ROP, const mpz_t OP1, const mpz_t
+          OP2)
+ -- Function: void mpz_addmul_ui (mpz_t ROP, const mpz_t OP1, unsigned
+          long int OP2)
       Set ROP to ROP + OP1 times OP2.
  
- -- Function: void mpz_submul (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_submul_ui (mpz_t ROP, mpz_t OP1, unsigned long
-          int OP2)
+ -- Function: void mpz_submul (mpz_t ROP, const mpz_t OP1, const mpz_t
+          OP2)
+ -- Function: void mpz_submul_ui (mpz_t ROP, const mpz_t OP1, unsigned
+          long int OP2)
       Set ROP to ROP - OP1 times OP2.
  
- -- Function: void mpz_mul_2exp (mpz_t ROP, mpz_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpz_mul_2exp (mpz_t ROP, const mpz_t OP1,
+          mp_bitcnt_t OP2)
       Set ROP to OP1 times 2 raised to OP2.  This operation can also be
       defined as a left shift by OP2 bits.
  
- -- Function: void mpz_neg (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_neg (mpz_t ROP, const mpz_t OP)
       Set ROP to -OP.
  
- -- Function: void mpz_abs (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_abs (mpz_t ROP, const mpz_t OP)
       Set ROP to the absolute value of OP.
  
  \1f
@@ -2404,47 +2405,56 @@ functions `mpz_powm' and `mpz_powm_ui'), will cause an intentional
  division by zero.  This lets a program handle arithmetic exceptions in
  these functions the same way as for normal C `int' arithmetic.
  
- -- Function: void mpz_cdiv_q (mpz_t Q, mpz_t N, mpz_t D)
- -- Function: void mpz_cdiv_r (mpz_t R, mpz_t N, mpz_t D)
- -- Function: void mpz_cdiv_qr (mpz_t Q, mpz_t R, mpz_t N, mpz_t D)
- -- Function: unsigned long int mpz_cdiv_q_ui (mpz_t Q, mpz_t N,
+ -- Function: void mpz_cdiv_q (mpz_t Q, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_cdiv_r (mpz_t R, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_cdiv_qr (mpz_t Q, mpz_t R, const mpz_t N, const
+          mpz_t D)
+ -- Function: unsigned long int mpz_cdiv_q_ui (mpz_t Q, const mpz_t N,
            unsigned long int D)
- -- Function: unsigned long int mpz_cdiv_r_ui (mpz_t R, mpz_t N,
+ -- Function: unsigned long int mpz_cdiv_r_ui (mpz_t R, const mpz_t N,
            unsigned long int D)
   -- Function: unsigned long int mpz_cdiv_qr_ui (mpz_t Q, mpz_t R,
-          mpz_t N, unsigned long int D)
- -- Function: unsigned long int mpz_cdiv_ui (mpz_t N,
+          const mpz_t N, unsigned long int D)
+ -- Function: unsigned long int mpz_cdiv_ui (const mpz_t N,
            unsigned long int D)
- -- Function: void mpz_cdiv_q_2exp (mpz_t Q, mpz_t N, mp_bitcnt_t B)
- -- Function: void mpz_cdiv_r_2exp (mpz_t R, mpz_t N, mp_bitcnt_t B)
-
- -- Function: void mpz_fdiv_q (mpz_t Q, mpz_t N, mpz_t D)
- -- Function: void mpz_fdiv_r (mpz_t R, mpz_t N, mpz_t D)
- -- Function: void mpz_fdiv_qr (mpz_t Q, mpz_t R, mpz_t N, mpz_t D)
- -- Function: unsigned long int mpz_fdiv_q_ui (mpz_t Q, mpz_t N,
+ -- Function: void mpz_cdiv_q_2exp (mpz_t Q, const mpz_t N,
+          mp_bitcnt_t B)
+ -- Function: void mpz_cdiv_r_2exp (mpz_t R, const mpz_t N,
+          mp_bitcnt_t B)
+
+ -- Function: void mpz_fdiv_q (mpz_t Q, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_fdiv_r (mpz_t R, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_fdiv_qr (mpz_t Q, mpz_t R, const mpz_t N, const
+          mpz_t D)
+ -- Function: unsigned long int mpz_fdiv_q_ui (mpz_t Q, const mpz_t N,
            unsigned long int D)
- -- Function: unsigned long int mpz_fdiv_r_ui (mpz_t R, mpz_t N,
+ -- Function: unsigned long int mpz_fdiv_r_ui (mpz_t R, const mpz_t N,
            unsigned long int D)
   -- Function: unsigned long int mpz_fdiv_qr_ui (mpz_t Q, mpz_t R,
-          mpz_t N, unsigned long int D)
- -- Function: unsigned long int mpz_fdiv_ui (mpz_t N,
+          const mpz_t N, unsigned long int D)
+ -- Function: unsigned long int mpz_fdiv_ui (const mpz_t N,
            unsigned long int D)
- -- Function: void mpz_fdiv_q_2exp (mpz_t Q, mpz_t N, mp_bitcnt_t B)
- -- Function: void mpz_fdiv_r_2exp (mpz_t R, mpz_t N, mp_bitcnt_t B)
-
- -- Function: void mpz_tdiv_q (mpz_t Q, mpz_t N, mpz_t D)
- -- Function: void mpz_tdiv_r (mpz_t R, mpz_t N, mpz_t D)
- -- Function: void mpz_tdiv_qr (mpz_t Q, mpz_t R, mpz_t N, mpz_t D)
- -- Function: unsigned long int mpz_tdiv_q_ui (mpz_t Q, mpz_t N,
+ -- Function: void mpz_fdiv_q_2exp (mpz_t Q, const mpz_t N,
+          mp_bitcnt_t B)
+ -- Function: void mpz_fdiv_r_2exp (mpz_t R, const mpz_t N,
+          mp_bitcnt_t B)
+
+ -- Function: void mpz_tdiv_q (mpz_t Q, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_tdiv_r (mpz_t R, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_tdiv_qr (mpz_t Q, mpz_t R, const mpz_t N, const
+          mpz_t D)
+ -- Function: unsigned long int mpz_tdiv_q_ui (mpz_t Q, const mpz_t N,
            unsigned long int D)
- -- Function: unsigned long int mpz_tdiv_r_ui (mpz_t R, mpz_t N,
+ -- Function: unsigned long int mpz_tdiv_r_ui (mpz_t R, const mpz_t N,
            unsigned long int D)
   -- Function: unsigned long int mpz_tdiv_qr_ui (mpz_t Q, mpz_t R,
-          mpz_t N, unsigned long int D)
- -- Function: unsigned long int mpz_tdiv_ui (mpz_t N,
+          const mpz_t N, unsigned long int D)
+ -- Function: unsigned long int mpz_tdiv_ui (const mpz_t N,
            unsigned long int D)
- -- Function: void mpz_tdiv_q_2exp (mpz_t Q, mpz_t N, mp_bitcnt_t B)
- -- Function: void mpz_tdiv_r_2exp (mpz_t R, mpz_t N, mp_bitcnt_t B)
+ -- Function: void mpz_tdiv_q_2exp (mpz_t Q, const mpz_t N,
+          mp_bitcnt_t B)
+ -- Function: void mpz_tdiv_r_2exp (mpz_t R, const mpz_t N,
+          mp_bitcnt_t B)
  
       Divide N by D, forming a quotient Q and/or remainder R.  For the
       `2exp' functions, D=2^B.  The rounding is in three styles, each
@@ -2482,8 +2492,8 @@ these functions the same way as for normal C `int' arithmetic.
       the same as the bitwise logical functions do, whereas
       `mpz_tdiv_q_2exp' effectively treats N as sign and magnitude.
  
- -- Function: void mpz_mod (mpz_t R, mpz_t N, mpz_t D)
- -- Function: unsigned long int mpz_mod_ui (mpz_t R, mpz_t N,
+ -- Function: void mpz_mod (mpz_t R, const mpz_t N, const mpz_t D)
+ -- Function: unsigned long int mpz_mod_ui (mpz_t R, const mpz_t N,
            unsigned long int D)
       Set R to N `mod' D.  The sign of the divisor is ignored; the
       result is always non-negative.
@@ -2492,8 +2502,9 @@ these functions the same way as for normal C `int' arithmetic.
       remainder as well as setting R.  See `mpz_fdiv_ui' above if only
       the return value is wanted.
  
- -- Function: void mpz_divexact (mpz_t Q, mpz_t N, mpz_t D)
- -- Function: void mpz_divexact_ui (mpz_t Q, mpz_t N, unsigned long D)
+ -- Function: void mpz_divexact (mpz_t Q, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_divexact_ui (mpz_t Q, const mpz_t N, unsigned
+          long D)
       Set Q to N/D.  These functions produce correct results only when
       it is known in advance that D divides N.
  
@@ -2501,9 +2512,10 @@ these functions the same way as for normal C `int' arithmetic.
       and are the best choice when exact division is known to occur, for
       example reducing a rational to lowest terms.
  
- -- Function: int mpz_divisible_p (mpz_t N, mpz_t D)
- -- Function: int mpz_divisible_ui_p (mpz_t N, unsigned long int D)
- -- Function: int mpz_divisible_2exp_p (mpz_t N, mp_bitcnt_t B)
+ -- Function: int mpz_divisible_p (const mpz_t N, const mpz_t D)
+ -- Function: int mpz_divisible_ui_p (const mpz_t N, unsigned long int
+          D)
+ -- Function: int mpz_divisible_2exp_p (const mpz_t N, mp_bitcnt_t B)
       Return non-zero if N is exactly divisible by D, or in the case of
       `mpz_divisible_2exp_p' by 2^B.
  
@@ -2512,10 +2524,12 @@ these functions the same way as for normal C `int' arithmetic.
       following the rule it can be seen that only 0 is considered
       divisible by 0.
  
- -- Function: int mpz_congruent_p (mpz_t N, mpz_t C, mpz_t D)
- -- Function: int mpz_congruent_ui_p (mpz_t N, unsigned long int C,
-          unsigned long int D)
- -- Function: int mpz_congruent_2exp_p (mpz_t N, mpz_t C, mp_bitcnt_t B)
+ -- Function: int mpz_congruent_p (const mpz_t N, const mpz_t C, const
+          mpz_t D)
+ -- Function: int mpz_congruent_ui_p (const mpz_t N, unsigned long int
+          C, unsigned long int D)
+ -- Function: int mpz_congruent_2exp_p (const mpz_t N, const mpz_t C,
+          mp_bitcnt_t B)
       Return non-zero if N is congruent to C modulo D, or in the case of
       `mpz_congruent_2exp_p' modulo 2^B.
  
@@ -2530,18 +2544,18 @@ File: gmp.info,  Node: Integer Exponentiation,  Next: Integer Roots,  Prev: Inte
  5.7 Exponentiation Functions
  ============================
  
- -- Function: void mpz_powm (mpz_t ROP, mpz_t BASE, mpz_t EXP, mpz_t
-          MOD)
- -- Function: void mpz_powm_ui (mpz_t ROP, mpz_t BASE, unsigned long
-          int EXP, mpz_t MOD)
+ -- Function: void mpz_powm (mpz_t ROP, const mpz_t BASE, const mpz_t
+          EXP, const mpz_t MOD)
+ -- Function: void mpz_powm_ui (mpz_t ROP, const mpz_t BASE, unsigned
+          long int EXP, const mpz_t MOD)
       Set ROP to (BASE raised to EXP) modulo MOD.
  
       Negative EXP is supported if an inverse BASE^-1 mod MOD exists
       (see `mpz_invert' in *note Number Theoretic Functions::).  If an
       inverse doesn't exist then a divide by zero is raised.
  
- -- Function: void mpz_powm_sec (mpz_t ROP, mpz_t BASE, mpz_t EXP,
-          mpz_t MOD)
+ -- Function: void mpz_powm_sec (mpz_t ROP, const mpz_t BASE, const
+          mpz_t EXP, const mpz_t MOD)
       Set ROP to (BASE raised to EXP) modulo MOD.
  
       It is required that EXP > 0 and that MOD is odd.
@@ -2553,8 +2567,8 @@ File: gmp.info,  Node: Integer Exponentiation,  Next: Integer Roots,  Prev: Inte
       is intended for cryptographic purposes, where resilience to
       side-channel attacks is desired.
  
- -- Function: void mpz_pow_ui (mpz_t ROP, mpz_t BASE, unsigned long int
-          EXP)
+ -- Function: void mpz_pow_ui (mpz_t ROP, const mpz_t BASE, unsigned
+          long int EXP)
   -- Function: void mpz_ui_pow_ui (mpz_t ROP, unsigned long int BASE,
            unsigned long int EXP)
       Set ROP to BASE raised to EXP.  The case 0^0 yields 1.
@@ -2565,27 +2579,28 @@ File: gmp.info,  Node: Integer Roots,  Next: Number Theoretic Functions,  Prev:
  5.8 Root Extraction Functions
  =============================
  
- -- Function: int mpz_root (mpz_t ROP, mpz_t OP, unsigned long int N)
+ -- Function: int mpz_root (mpz_t ROP, const mpz_t OP, unsigned long
+          int N)
       Set ROP to  the truncated integer part of the Nth root of OP.
       Return non-zero if the computation was exact, i.e., if OP is ROP
       to the Nth power.
  
- -- Function: void mpz_rootrem (mpz_t ROOT, mpz_t REM, mpz_t U,
+ -- Function: void mpz_rootrem (mpz_t ROOT, mpz_t REM, const mpz_t U,
            unsigned long int N)
       Set ROOT to  the truncated integer part of the Nth root of U.  Set
       REM to the remainder, U-ROOT**N.
  
- -- Function: void mpz_sqrt (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_sqrt (mpz_t ROP, const mpz_t OP)
       Set ROP to  the truncated integer part of the square root of OP.
  
- -- Function: void mpz_sqrtrem (mpz_t ROP1, mpz_t ROP2, mpz_t OP)
+ -- Function: void mpz_sqrtrem (mpz_t ROP1, mpz_t ROP2, const mpz_t OP)
       Set ROP1 to the truncated integer part of the square root of OP,
       like `mpz_sqrt'.  Set ROP2 to the remainder OP-ROP1*ROP1, which
       will be zero if OP is a perfect square.
  
       If ROP1 and ROP2 are the same variable, the results are undefined.
  
- -- Function: int mpz_perfect_power_p (mpz_t OP)
+ -- Function: int mpz_perfect_power_p (const mpz_t OP)
       Return non-zero if OP is a perfect power, i.e., if there exist
       integers A and B, with B>1, such that OP equals A raised to the
       power B.
@@ -2594,7 +2609,7 @@ File: gmp.info,  Node: Integer Roots,  Next: Number Theoretic Functions,  Prev:
       powers.  Negative values of OP are accepted, but of course can
       only be odd perfect powers.
  
- -- Function: int mpz_perfect_square_p (mpz_t OP)
+ -- Function: int mpz_perfect_square_p (const mpz_t OP)
       Return non-zero if OP is a perfect square, i.e., if the square
       root of OP is an integer.  Under this definition both 0 and 1 are
       considered to be perfect squares.
@@ -2605,15 +2620,17 @@ File: gmp.info,  Node: Number Theoretic Functions,  Next: Integer Comparisons,
  5.9 Number Theoretic Functions
  ==============================
  
- -- Function: int mpz_probab_prime_p (mpz_t N, int REPS)
+ -- Function: int mpz_probab_prime_p (const mpz_t N, int REPS)
       Determine whether N is prime.  Return 2 if N is definitely prime,
       return 1 if N is probably prime (without being certain), or return
       0 if N is definitely composite.
  
       This function does some trial divisions, then some Miller-Rabin
-     probabilistic primality tests.  REPS controls how many such tests
-     are done, 5 to 10 is a reasonable number, more will reduce the
-     chances of a composite being returned as "probably prime".
+     probabilistic primality tests.  The argument REPS controls how
+     many such tests are done; a higher value will reduce the chances
+     of a composite being returned as "probably prime".  25 is a
+     reasonable number; a composite number will then be identified as a
+     prime with a probability of less than 2^(-50).
  
       Miller-Rabin and similar tests can be more properly called
       compositeness tests.  Numbers which fail are known to be composite
@@ -2621,20 +2638,20 @@ File: gmp.info,  Node: Number Theoretic Functions,  Next: Integer Comparisons,
       few composites pass, hence those which pass are considered
       probably prime.
  
- -- Function: void mpz_nextprime (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_nextprime (mpz_t ROP, const mpz_t OP)
       Set ROP to the next prime greater than OP.
  
       This function uses a probabilistic algorithm to identify primes.
       For practical purposes it's adequate, the chance of a composite
       passing will be extremely small.
  
- -- Function: void mpz_gcd (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_gcd (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
       Set ROP to the greatest common divisor of OP1 and OP2.  The result
       is always positive even if one or both input operands are negative.
       Except if both inputs are zero; then this function defines
       gcd(0,0) = 0.
  
- -- Function: unsigned long int mpz_gcd_ui (mpz_t ROP, mpz_t OP1,
+ -- Function: unsigned long int mpz_gcd_ui (mpz_t ROP, const mpz_t OP1,
            unsigned long int OP2)
       Compute the greatest common divisor of OP1 and OP2.  If ROP is not
       `NULL', store the result there.
@@ -2644,8 +2661,8 @@ File: gmp.info,  Node: Number Theoretic Functions,  Next: Integer Comparisons,
       result is equal to the argument OP1.  Note that the result will
       always fit if OP2 is non-zero.
  
- -- Function: void mpz_gcdext (mpz_t G, mpz_t S, mpz_t T, mpz_t A,
-          mpz_t B)
+ -- Function: void mpz_gcdext (mpz_t G, mpz_t S, mpz_t T, const mpz_t
+          A, const mpz_t B)
       Set G to the greatest common divisor of A and B, and in addition
       set S and T to coefficients satisfying A*S + B*T = G.  The value
       in G is always positive, even if one or both of A and B are
@@ -2664,32 +2681,34 @@ File: gmp.info,  Node: Number Theoretic Functions,  Next: Integer Comparisons,
  
       If T is `NULL' then that value is not computed.
  
- -- Function: void mpz_lcm (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_lcm_ui (mpz_t ROP, mpz_t OP1, unsigned long OP2)
+ -- Function: void mpz_lcm (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
+ -- Function: void mpz_lcm_ui (mpz_t ROP, const mpz_t OP1, unsigned
+          long OP2)
       Set ROP to the least common multiple of OP1 and OP2.  ROP is
       always positive, irrespective of the signs of OP1 and OP2.  ROP
       will be zero if either OP1 or OP2 is zero.
  
- -- Function: int mpz_invert (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: int mpz_invert (mpz_t ROP, const mpz_t OP1, const mpz_t
+          OP2)
       Compute the inverse of OP1 modulo OP2 and put the result in ROP.
       If the inverse exists, the return value is non-zero and ROP will
       satisfy 0 < ROP < abs(OP2).  If an inverse doesn't exist the
       return value is zero and ROP is undefined.  The behaviour of this
       function is undefined when OP2 is zero.
  
- -- Function: int mpz_jacobi (mpz_t A, mpz_t B)
+ -- Function: int mpz_jacobi (const mpz_t A, const mpz_t B)
       Calculate the Jacobi symbol (A/B).  This is defined only for B odd.
  
- -- Function: int mpz_legendre (mpz_t A, mpz_t P)
+ -- Function: int mpz_legendre (const mpz_t A, const mpz_t P)
       Calculate the Legendre symbol (A/P).  This is defined only for P
       an odd positive prime, and for such P it's identical to the Jacobi
       symbol.
  
- -- Function: int mpz_kronecker (mpz_t A, mpz_t B)
- -- Function: int mpz_kronecker_si (mpz_t A, long B)
- -- Function: int mpz_kronecker_ui (mpz_t A, unsigned long B)
- -- Function: int mpz_si_kronecker (long A, mpz_t B)
- -- Function: int mpz_ui_kronecker (unsigned long A, mpz_t B)
+ -- Function: int mpz_kronecker (const mpz_t A, const mpz_t B)
+ -- Function: int mpz_kronecker_si (const mpz_t A, long B)
+ -- Function: int mpz_kronecker_ui (const mpz_t A, unsigned long B)
+ -- Function: int mpz_si_kronecker (long A, const mpz_t B)
+ -- Function: int mpz_ui_kronecker (unsigned long A, const mpz_t B)
       Calculate the Jacobi symbol (A/B) with the Kronecker extension
       (a/2)=(2/a) when a odd, or (a/2)=0 when a even.
  
@@ -2701,15 +2720,26 @@ File: gmp.info,  Node: Number Theoretic Functions,  Next: Integer Comparisons,
       References::), or any number theory textbook.  See also the
       example program `demos/qcn.c' which uses `mpz_kronecker_ui'.
  
- -- Function: mp_bitcnt_t mpz_remove (mpz_t ROP, mpz_t OP, mpz_t F)
+ -- Function: mp_bitcnt_t mpz_remove (mpz_t ROP, const mpz_t OP, const
+          mpz_t F)
       Remove all occurrences of the factor F from OP and store the
       result in ROP.  The return value is how many such occurrences were
       removed.
  
- -- Function: void mpz_fac_ui (mpz_t ROP, unsigned long int OP)
-     Set ROP to OP!, the factorial of OP.
+ -- Function: void mpz_fac_ui (mpz_t ROP, unsigned long int N)
+ -- Function: void mpz_2fac_ui (mpz_t ROP, unsigned long int N)
+ -- Function: void mpz_mfac_uiui (mpz_t ROP, unsigned long int N,
+          unsigned long int M)
+     Set ROP to the factorial of N: `mpz_fac_ui' computes the plain
+     factorial N!, `mpz_2fac_ui' computes the double-factorial N!!, and
+     `mpz_mfac_uiui' the M-multi-factorial N!^(M).
  
- -- Function: void mpz_bin_ui (mpz_t ROP, mpz_t N, unsigned long int K)
+ -- Function: void mpz_primorial_ui (mpz_t ROP, unsigned long int N)
+     Set ROP to the primorial of N, i.e. the product of all positive
+     prime numbers <=N.
+
+ -- Function: void mpz_bin_ui (mpz_t ROP, const mpz_t N, unsigned long
+          int K)
   -- Function: void mpz_bin_uiui (mpz_t ROP, unsigned long int N,
            unsigned long int K)
       Compute the binomial coefficient N over K and store the result in
@@ -2751,10 +2781,10 @@ File: gmp.info,  Node: Integer Comparisons,  Next: Integer Logic and Bit Fiddlin
  5.10 Comparison Functions
  =========================
  
- -- Function: int mpz_cmp (mpz_t OP1, mpz_t OP2)
- -- Function: int mpz_cmp_d (mpz_t OP1, double OP2)
- -- Macro: int mpz_cmp_si (mpz_t OP1, signed long int OP2)
- -- Macro: int mpz_cmp_ui (mpz_t OP1, unsigned long int OP2)
+ -- Function: int mpz_cmp (const mpz_t OP1, const mpz_t OP2)
+ -- Function: int mpz_cmp_d (const mpz_t OP1, double OP2)
+ -- Macro: int mpz_cmp_si (const mpz_t OP1, signed long int OP2)
+ -- Macro: int mpz_cmp_ui (const mpz_t OP1, unsigned long int OP2)
       Compare OP1 and OP2.  Return a positive value if OP1 > OP2, zero
       if OP1 = OP2, or a negative value if OP1 < OP2.
  
@@ -2762,9 +2792,9 @@ File: gmp.info,  Node: Integer Comparisons,  Next: Integer Logic and Bit Fiddlin
       arguments more than once.  `mpz_cmp_d' can be called with an
       infinity, but results are undefined for a NaN.
  
- -- Function: int mpz_cmpabs (mpz_t OP1, mpz_t OP2)
- -- Function: int mpz_cmpabs_d (mpz_t OP1, double OP2)
- -- Function: int mpz_cmpabs_ui (mpz_t OP1, unsigned long int OP2)
+ -- Function: int mpz_cmpabs (const mpz_t OP1, const mpz_t OP2)
+ -- Function: int mpz_cmpabs_d (const mpz_t OP1, double OP2)
+ -- Function: int mpz_cmpabs_ui (const mpz_t OP1, unsigned long int OP2)
       Compare the absolute values of OP1 and OP2.  Return a positive
       value if abs(OP1) > abs(OP2), zero if abs(OP1) = abs(OP2), or a
       negative value if abs(OP1) < abs(OP2).
@@ -2772,7 +2802,7 @@ File: gmp.info,  Node: Integer Comparisons,  Next: Integer Logic and Bit Fiddlin
       `mpz_cmpabs_d' can be called with an infinity, but results are
       undefined for a NaN.
  
- -- Macro: int mpz_sgn (mpz_t OP)
+ -- Macro: int mpz_sgn (const mpz_t OP)
       Return +1 if OP > 0, 0 if OP = 0, and -1 if OP < 0.
  
       This function is actually implemented as a macro.  It evaluates
@@ -2788,25 +2818,25 @@ These functions behave as if twos complement arithmetic were used
  (although sign-magnitude is the actual implementation).  The least
  significant bit is number 0.
  
- -- Function: void mpz_and (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_and (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
       Set ROP to OP1 bitwise-and OP2.
  
- -- Function: void mpz_ior (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_ior (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
       Set ROP to OP1 bitwise inclusive-or OP2.
  
- -- Function: void mpz_xor (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_xor (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
       Set ROP to OP1 bitwise exclusive-or OP2.
  
- -- Function: void mpz_com (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_com (mpz_t ROP, const mpz_t OP)
       Set ROP to the one's complement of OP.
  
- -- Function: mp_bitcnt_t mpz_popcount (mpz_t OP)
+ -- Function: mp_bitcnt_t mpz_popcount (const mpz_t OP)
       If OP>=0, return the population count of OP, which is the number
       of 1 bits in the binary representation.  If OP<0, the number of 1s
       is infinite, and the return value is the largest possible
       `mp_bitcnt_t'.
  
- -- Function: mp_bitcnt_t mpz_hamdist (mpz_t OP1, mpz_t OP2)
+ -- Function: mp_bitcnt_t mpz_hamdist (const mpz_t OP1, const mpz_t OP2)
       If OP1 and OP2 are both >=0 or both <0, return the hamming
       distance between the two operands, which is the number of bit
       positions where OP1 and OP2 have different bit values.  If one
@@ -2814,8 +2844,10 @@ significant bit is number 0.
       is infinite, and the return value is the largest possible
       `mp_bitcnt_t'.
  
- -- Function: mp_bitcnt_t mpz_scan0 (mpz_t OP, mp_bitcnt_t STARTING_BIT)
- -- Function: mp_bitcnt_t mpz_scan1 (mpz_t OP, mp_bitcnt_t STARTING_BIT)
+ -- Function: mp_bitcnt_t mpz_scan0 (const mpz_t OP, mp_bitcnt_t
+          STARTING_BIT)
+ -- Function: mp_bitcnt_t mpz_scan1 (const mpz_t OP, mp_bitcnt_t
+          STARTING_BIT)
       Scan OP, starting from bit STARTING_BIT, towards more significant
       bits, until the first 0 or 1 bit (respectively) is found.  Return
       the index of the found bit.
@@ -2837,7 +2869,7 @@ significant bit is number 0.
   -- Function: void mpz_combit (mpz_t ROP, mp_bitcnt_t BIT_INDEX)
       Complement bit BIT_INDEX in ROP.
  
- -- Function: int mpz_tstbit (mpz_t OP, mp_bitcnt_t BIT_INDEX)
+ -- Function: int mpz_tstbit (const mpz_t OP, mp_bitcnt_t BIT_INDEX)
       Test bit BIT_INDEX in OP and return 0 or 1 accordingly.
  
  \1f
@@ -2857,7 +2889,8 @@ prototypes for these functions.
  
     See also *note Formatted Output:: and *note Formatted Input::.
  
- -- Function: size_t mpz_out_str (FILE *STREAM, int BASE, mpz_t OP)
+ -- Function: size_t mpz_out_str (FILE *STREAM, int BASE, const mpz_t
+          OP)
       Output OP on stdio stream STREAM, as a string of digits in base
       BASE.  The base argument may vary from 2 to 62 or from -2 to -36.
  
@@ -2884,7 +2917,7 @@ prototypes for these functions.
  
       Return the number of bytes read, or if an error occurred, return 0.
  
- -- Function: size_t mpz_out_raw (FILE *STREAM, mpz_t OP)
+ -- Function: size_t mpz_out_raw (FILE *STREAM, const mpz_t OP)
       Output OP on stdio stream STREAM, in raw binary format.  The
       integer is written in a portable format, with 4 bytes of size
       information, and that many bytes of limbs.  Both the size and the
@@ -2931,7 +2964,7 @@ random number functions.
       before invoking this function.
  
   -- Function: void mpz_urandomm (mpz_t ROP, gmp_randstate_t STATE,
-          mpz_t N)
+          const mpz_t N)
       Generate a uniform random integer in the range 0 to N-1, inclusive.
  
       The variable STATE must be initialized by calling one of the
@@ -3012,7 +3045,7 @@ data with the following functions.
       instance `8*sizeof(int)-INT_BIT'.
  
   -- Function: void * mpz_export (void *ROP, size_t *COUNTP, int ORDER,
-          size_t SIZE, int ENDIAN, size_t NAILS, mpz_t OP)
+          size_t SIZE, int ENDIAN, size_t NAILS, const mpz_t OP)
       Fill ROP with word data from OP.
  
       The parameters specify the format of the data produced.  Each word
@@ -3059,23 +3092,23 @@ File: gmp.info,  Node: Miscellaneous Integer Functions,  Next: Integer Special F
  5.15 Miscellaneous Functions
  ============================
  
- -- Function: int mpz_fits_ulong_p (mpz_t OP)
- -- Function: int mpz_fits_slong_p (mpz_t OP)
- -- Function: int mpz_fits_uint_p (mpz_t OP)
- -- Function: int mpz_fits_sint_p (mpz_t OP)
- -- Function: int mpz_fits_ushort_p (mpz_t OP)
- -- Function: int mpz_fits_sshort_p (mpz_t OP)
+ -- Function: int mpz_fits_ulong_p (const mpz_t OP)
+ -- Function: int mpz_fits_slong_p (const mpz_t OP)
+ -- Function: int mpz_fits_uint_p (const mpz_t OP)
+ -- Function: int mpz_fits_sint_p (const mpz_t OP)
+ -- Function: int mpz_fits_ushort_p (const mpz_t OP)
+ -- Function: int mpz_fits_sshort_p (const mpz_t OP)
       Return non-zero iff the value of OP fits in an `unsigned long int',
       `signed long int', `unsigned int', `signed int', `unsigned short
       int', or `signed short int', respectively.  Otherwise, return zero.
  
- -- Macro: int mpz_odd_p (mpz_t OP)
- -- Macro: int mpz_even_p (mpz_t OP)
+ -- Macro: int mpz_odd_p (const mpz_t OP)
+ -- Macro: int mpz_even_p (const mpz_t OP)
       Determine whether OP is odd or even, respectively.  Return
       non-zero if yes, zero if no.  These macros evaluate their argument
       more than once.
  
- -- Function: size_t mpz_sizeinbase (mpz_t OP, int BASE)
+ -- Function: size_t mpz_sizeinbase (const mpz_t OP, int BASE)
       Return the size of OP measured in number of digits in the given
       BASE.  BASE can vary from 2 to 62.  The sign of OP is ignored,
       just the absolute value is used.  The result will be either exact
@@ -3151,7 +3184,7 @@ applications will not need them.
       changes like this.  `mpz_realloc2' and `_mpz_realloc' are the same
       except that `_mpz_realloc' takes its size in limbs.
  
- -- Function: mp_limb_t mpz_getlimbn (mpz_t OP, mp_size_t N)
+ -- Function: mp_limb_t mpz_getlimbn (const mpz_t OP, mp_size_t N)
       Return limb number N from OP.  The sign of OP is ignored, just the
       absolute value is used.  The least significant limb is number 0.
  
@@ -3159,7 +3192,7 @@ applications will not need them.
       `mpz_getlimbn' returns zero if N is outside the range 0 to
       `mpz_size(OP)-1'.
  
- -- Function: size_t mpz_size (mpz_t OP)
+ -- Function: size_t mpz_size (const mpz_t OP)
       Return the size of OP measured in number of limbs.  If OP is zero,
       the returned value will be zero.
  
@@ -3220,8 +3253,8 @@ File: gmp.info,  Node: Initializing Rationals,  Next: Rational Conversions,  Pre
       Free the space occupied by a NULL-terminated list of `mpq_t'
       variables.
  
- -- Function: void mpq_set (mpq_t ROP, mpq_t OP)
- -- Function: void mpq_set_z (mpq_t ROP, mpz_t OP)
+ -- Function: void mpq_set (mpq_t ROP, const mpq_t OP)
+ -- Function: void mpq_set_z (mpq_t ROP, const mpz_t OP)
       Assign ROP from OP.
  
   -- Function: void mpq_set_ui (mpq_t ROP, unsigned long int OP1,
@@ -3232,7 +3265,7 @@ File: gmp.info,  Node: Initializing Rationals,  Next: Rational Conversions,  Pre
       common factors, ROP has to be passed to `mpq_canonicalize' before
       any operations are performed on ROP.
  
- -- Function: int mpq_set_str (mpq_t ROP, char *STR, int BASE)
+ -- Function: int mpq_set_str (mpq_t ROP, const char *STR, int BASE)
       Set ROP from a null-terminated string STR in the given BASE.
  
       The string can be an integer like "41" or a fraction like
@@ -3261,7 +3294,7 @@ File: gmp.info,  Node: Rational Conversions,  Next: Rational Arithmetic,  Prev:
  6.2 Conversion Functions
  ========================
  
- -- Function: double mpq_get_d (mpq_t OP)
+ -- Function: double mpq_get_d (const mpq_t OP)
       Convert OP to a `double', truncating if necessary (i.e. rounding
       towards zero).
  
@@ -3272,11 +3305,11 @@ File: gmp.info,  Node: Rational Conversions,  Next: Rational Arithmetic,  Prev:
       may or may not occur.
  
   -- Function: void mpq_set_d (mpq_t ROP, double OP)
- -- Function: void mpq_set_f (mpq_t ROP, mpf_t OP)
+ -- Function: void mpq_set_f (mpq_t ROP, const mpf_t OP)
       Set ROP to the value of OP.  There is no rounding, this conversion
       is exact.
  
- -- Function: char * mpq_get_str (char *STR, int BASE, mpq_t OP)
+ -- Function: char * mpq_get_str (char *STR, int BASE, const mpq_t OP)
       Convert OP to a string of digits in base BASE.  The base may vary
       from 2 to 36.  The string will be of the form `num/den', or if the
       denominator is 1 then just `num'.
@@ -3304,34 +3337,37 @@ File: gmp.info,  Node: Rational Arithmetic,  Next: Comparing Rationals,  Prev: R
  6.3 Arithmetic Functions
  ========================
  
- -- Function: void mpq_add (mpq_t SUM, mpq_t ADDEND1, mpq_t ADDEND2)
+ -- Function: void mpq_add (mpq_t SUM, const mpq_t ADDEND1, const mpq_t
+          ADDEND2)
       Set SUM to ADDEND1 + ADDEND2.
  
- -- Function: void mpq_sub (mpq_t DIFFERENCE, mpq_t MINUEND, mpq_t
-          SUBTRAHEND)
+ -- Function: void mpq_sub (mpq_t DIFFERENCE, const mpq_t MINUEND,
+          const mpq_t SUBTRAHEND)
       Set DIFFERENCE to MINUEND - SUBTRAHEND.
  
- -- Function: void mpq_mul (mpq_t PRODUCT, mpq_t MULTIPLIER, mpq_t
-          MULTIPLICAND)
+ -- Function: void mpq_mul (mpq_t PRODUCT, const mpq_t MULTIPLIER,
+          const mpq_t MULTIPLICAND)
       Set PRODUCT to MULTIPLIER times MULTIPLICAND.
  
- -- Function: void mpq_mul_2exp (mpq_t ROP, mpq_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpq_mul_2exp (mpq_t ROP, const mpq_t OP1,
+          mp_bitcnt_t OP2)
       Set ROP to OP1 times 2 raised to OP2.
  
- -- Function: void mpq_div (mpq_t QUOTIENT, mpq_t DIVIDEND, mpq_t
-          DIVISOR)
+ -- Function: void mpq_div (mpq_t QUOTIENT, const mpq_t DIVIDEND, const
+          mpq_t DIVISOR)
       Set QUOTIENT to DIVIDEND/DIVISOR.
  
- -- Function: void mpq_div_2exp (mpq_t ROP, mpq_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpq_div_2exp (mpq_t ROP, const mpq_t OP1,
+          mp_bitcnt_t OP2)
       Set ROP to OP1 divided by 2 raised to OP2.
  
- -- Function: void mpq_neg (mpq_t NEGATED_OPERAND, mpq_t OPERAND)
+ -- Function: void mpq_neg (mpq_t NEGATED_OPERAND, const mpq_t OPERAND)
       Set NEGATED_OPERAND to -OPERAND.
  
- -- Function: void mpq_abs (mpq_t ROP, mpq_t OP)
+ -- Function: void mpq_abs (mpq_t ROP, const mpq_t OP)
       Set ROP to the absolute value of OP.
  
- -- Function: void mpq_inv (mpq_t INVERTED_NUMBER, mpq_t NUMBER)
+ -- Function: void mpq_inv (mpq_t INVERTED_NUMBER, const mpq_t NUMBER)
       Set INVERTED_NUMBER to 1/NUMBER.  If the new denominator is zero,
       this routine will divide by zero.
  
@@ -3341,17 +3377,17 @@ File: gmp.info,  Node: Comparing Rationals,  Next: Applying Integer Functions,
  6.4 Comparison Functions
  ========================
  
- -- Function: int mpq_cmp (mpq_t OP1, mpq_t OP2)
+ -- Function: int mpq_cmp (const mpq_t OP1, const mpq_t OP2)
       Compare OP1 and OP2.  Return a positive value if OP1 > OP2, zero
       if OP1 = OP2, and a negative value if OP1 < OP2.
  
       To determine if two rationals are equal, `mpq_equal' is faster than
       `mpq_cmp'.
  
- -- Macro: int mpq_cmp_ui (mpq_t OP1, unsigned long int NUM2, unsigned
+ -- Macro: int mpq_cmp_ui (const mpq_t OP1, unsigned long int NUM2,
+          unsigned long int DEN2)
+ -- Macro: int mpq_cmp_si (const mpq_t OP1, long int NUM2, unsigned
            long int DEN2)
- -- Macro: int mpq_cmp_si (mpq_t OP1, long int NUM2, unsigned long int
-          DEN2)
       Compare OP1 and NUM2/DEN2.  Return a positive value if OP1 >
       NUM2/DEN2, zero if OP1 = NUM2/DEN2, and a negative value if OP1 <
       NUM2/DEN2.
@@ -3361,13 +3397,13 @@ File: gmp.info,  Node: Comparing Rationals,  Next: Applying Integer Functions,
       These functions are implemented as a macros and evaluate their
       arguments multiple times.
  
- -- Macro: int mpq_sgn (mpq_t OP)
+ -- Macro: int mpq_sgn (const mpq_t OP)
       Return +1 if OP > 0, 0 if OP = 0, and -1 if OP < 0.
  
       This function is actually implemented as a macro.  It evaluates its
-     arguments multiple times.
+     argument multiple times.
  
- -- Function: int mpq_equal (mpq_t OP1, mpq_t OP2)
+ -- Function: int mpq_equal (const mpq_t OP1, const mpq_t OP2)
       Return non-zero if OP1 and OP2 are equal, zero if they are
       non-equal.  Although `mpq_cmp' can be used for the same purpose,
       this function is much faster.
@@ -3388,16 +3424,16 @@ this chapter (*note Rational Number Functions::) then
  `mpq_canonicalize' must be called before any other `mpq' functions are
  applied to that `mpq_t'.
  
- -- Macro: mpz_t mpq_numref (mpq_t OP)
- -- Macro: mpz_t mpq_denref (mpq_t OP)
+ -- Macro: mpz_t mpq_numref (const mpq_t OP)
+ -- Macro: mpz_t mpq_denref (const mpq_t OP)
       Return a reference to the numerator and denominator of OP,
       respectively.  The `mpz' functions can be used on the result of
       these macros.
  
- -- Function: void mpq_get_num (mpz_t NUMERATOR, mpq_t RATIONAL)
- -- Function: void mpq_get_den (mpz_t DENOMINATOR, mpq_t RATIONAL)
- -- Function: void mpq_set_num (mpq_t RATIONAL, mpz_t NUMERATOR)
- -- Function: void mpq_set_den (mpq_t RATIONAL, mpz_t DENOMINATOR)
+ -- Function: void mpq_get_num (mpz_t NUMERATOR, const mpq_t RATIONAL)
+ -- Function: void mpq_get_den (mpz_t DENOMINATOR, const mpq_t RATIONAL)
+ -- Function: void mpq_set_num (mpq_t RATIONAL, const mpz_t NUMERATOR)
+ -- Function: void mpq_set_den (mpq_t RATIONAL, const mpz_t DENOMINATOR)
       Get or set the numerator or denominator of a rational.  These
       functions are equivalent to calling `mpz_set' with an appropriate
       `mpq_numref' or `mpq_denref'.  Direct use of `mpq_numref' or
@@ -3420,7 +3456,8 @@ prototypes for these functions.
  
     See also *note Formatted Output:: and *note Formatted Input::.
  
- -- Function: size_t mpq_out_str (FILE *STREAM, int BASE, mpq_t OP)
+ -- Function: size_t mpq_out_str (FILE *STREAM, int BASE, const mpq_t
+          OP)
       Output OP on stdio stream STREAM, as a string of digits in base
       BASE.  The base may vary from 2 to 36.  Output is in the form
       `num/den' or if the denominator is 1 then just `num'.
@@ -3575,7 +3612,7 @@ precision gradually in iterative algorithms like Newton-Raphson, making
  the computation precision closely match the actual accurate part of the
  numbers.
  
- -- Function: mp_bitcnt_t mpf_get_prec (mpf_t OP)
+ -- Function: mp_bitcnt_t mpf_get_prec (const mpf_t OP)
       Return the current precision of OP, in bits.
  
   -- Function: void mpf_set_prec (mpf_t ROP, mp_bitcnt_t PREC)
@@ -3620,15 +3657,15 @@ File: gmp.info,  Node: Assigning Floats,  Next: Simultaneous Float Init & Assign
  These functions assign new values to already initialized floats (*note
  Initializing Floats::).
  
- -- Function: void mpf_set (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_set (mpf_t ROP, const mpf_t OP)
   -- Function: void mpf_set_ui (mpf_t ROP, unsigned long int OP)
   -- Function: void mpf_set_si (mpf_t ROP, signed long int OP)
   -- Function: void mpf_set_d (mpf_t ROP, double OP)
- -- Function: void mpf_set_z (mpf_t ROP, mpz_t OP)
- -- Function: void mpf_set_q (mpf_t ROP, mpq_t OP)
+ -- Function: void mpf_set_z (mpf_t ROP, const mpz_t OP)
+ -- Function: void mpf_set_q (mpf_t ROP, const mpq_t OP)
       Set the value of ROP from OP.
  
- -- Function: int mpf_set_str (mpf_t ROP, char *STR, int BASE)
+ -- Function: int mpf_set_str (mpf_t ROP, const char *STR, int BASE)
       Set the value of ROP from the string in STR.  The string is of the
       form `M@N' or, if the base is 10 or less, alternatively `MeN'.
       `M' is the mantissa and `N' is the exponent.  The mantissa is
@@ -3681,7 +3718,7 @@ functions, it can be used as the source or destination operand for the
  ordinary float functions.  Don't use an initialize-and-set function on
  a variable already initialized!
  
- -- Function: void mpf_init_set (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_init_set (mpf_t ROP, const mpf_t OP)
   -- Function: void mpf_init_set_ui (mpf_t ROP, unsigned long int OP)
   -- Function: void mpf_init_set_si (mpf_t ROP, signed long int OP)
   -- Function: void mpf_init_set_d (mpf_t ROP, double OP)
@@ -3690,7 +3727,8 @@ a variable already initialized!
       The precision of ROP will be taken from the active default
       precision, as set by `mpf_set_default_prec'.
  
- -- Function: int mpf_init_set_str (mpf_t ROP, char *STR, int BASE)
+ -- Function: int mpf_init_set_str (mpf_t ROP, const char *STR, int
+          BASE)
       Initialize ROP and set its value from the string in STR.  See
       `mpf_set_str' above for details on the assignment operation.
  
@@ -3706,7 +3744,7 @@ File: gmp.info,  Node: Converting Floats,  Next: Float Arithmetic,  Prev: Simult
  7.4 Conversion Functions
  ========================
  
- -- Function: double mpf_get_d (mpf_t OP)
+ -- Function: double mpf_get_d (const mpf_t OP)
       Convert OP to a `double', truncating if necessary (i.e. rounding
       towards zero).
  
@@ -3715,7 +3753,8 @@ File: gmp.info,  Node: Converting Floats,  Next: Float Arithmetic,  Prev: Simult
       returned when available.  For too small 0.0 is normally returned.
       Hardware overflow, underflow and denorm traps may or may not occur.
  
- -- Function: double mpf_get_d_2exp (signed long int *EXP, mpf_t OP)
+ -- Function: double mpf_get_d_2exp (signed long int *EXP, const mpf_t
+          OP)
       Convert OP to a `double', truncating if necessary (i.e. rounding
       towards zero), and with an exponent returned separately.
  
@@ -3726,8 +3765,8 @@ File: gmp.info,  Node: Converting Floats,  Next: Float Arithmetic,  Prev: Simult
       This is similar to the standard C `frexp' function (*note
       Normalization Functions: (libc)Normalization Functions.).
  
- -- Function: long mpf_get_si (mpf_t OP)
- -- Function: unsigned long mpf_get_ui (mpf_t OP)
+ -- Function: long mpf_get_si (const mpf_t OP)
+ -- Function: unsigned long mpf_get_ui (const mpf_t OP)
       Convert OP to a `long' or `unsigned long', truncating any fraction
       part.  If OP is too big for the return type, the result is
       undefined.
@@ -3736,7 +3775,7 @@ File: gmp.info,  Node: Converting Floats,  Next: Float Arithmetic,  Prev: Simult
       Miscellaneous Float Functions::).
  
   -- Function: char * mpf_get_str (char *STR, mp_exp_t *EXPPTR, int
-          BASE, size_t N_DIGITS, mpf_t OP)
+          BASE, size_t N_DIGITS, const mpf_t OP)
       Convert OP to a string of digits in base BASE.  The base argument
       may vary from 2 to 62 or from -2 to -36.  Up to N_DIGITS digits
       will be generated.  Trailing zeros are not returned.  No more
@@ -3777,21 +3816,21 @@ File: gmp.info,  Node: Float Arithmetic,  Next: Float Comparison,  Prev: Convert
  7.5 Arithmetic Functions
  ========================
  
- -- Function: void mpf_add (mpf_t ROP, mpf_t OP1, mpf_t OP2)
- -- Function: void mpf_add_ui (mpf_t ROP, mpf_t OP1, unsigned long int
-          OP2)
+ -- Function: void mpf_add (mpf_t ROP, const mpf_t OP1, const mpf_t OP2)
+ -- Function: void mpf_add_ui (mpf_t ROP, const mpf_t OP1, unsigned
+          long int OP2)
       Set ROP to OP1 + OP2.
  
- -- Function: void mpf_sub (mpf_t ROP, mpf_t OP1, mpf_t OP2)
- -- Function: void mpf_ui_sub (mpf_t ROP, unsigned long int OP1, mpf_t
-          OP2)
- -- Function: void mpf_sub_ui (mpf_t ROP, mpf_t OP1, unsigned long int
-          OP2)
+ -- Function: void mpf_sub (mpf_t ROP, const mpf_t OP1, const mpf_t OP2)
+ -- Function: void mpf_ui_sub (mpf_t ROP, unsigned long int OP1, const
+          mpf_t OP2)
+ -- Function: void mpf_sub_ui (mpf_t ROP, const mpf_t OP1, unsigned
+          long int OP2)
       Set ROP to OP1 - OP2.
  
- -- Function: void mpf_mul (mpf_t ROP, mpf_t OP1, mpf_t OP2)
- -- Function: void mpf_mul_ui (mpf_t ROP, mpf_t OP1, unsigned long int
-          OP2)
+ -- Function: void mpf_mul (mpf_t ROP, const mpf_t OP1, const mpf_t OP2)
+ -- Function: void mpf_mul_ui (mpf_t ROP, const mpf_t OP1, unsigned
+          long int OP2)
       Set ROP to OP1 times OP2.
  
     Division is undefined if the divisor is zero, and passing a zero
@@ -3799,31 +3838,33 @@ divisor to the divide functions will make these functions intentionally
  divide by zero.  This lets the user handle arithmetic exceptions in
  these functions in the same manner as other arithmetic exceptions.
  
- -- Function: void mpf_div (mpf_t ROP, mpf_t OP1, mpf_t OP2)
- -- Function: void mpf_ui_div (mpf_t ROP, unsigned long int OP1, mpf_t
-          OP2)
- -- Function: void mpf_div_ui (mpf_t ROP, mpf_t OP1, unsigned long int
-          OP2)
+ -- Function: void mpf_div (mpf_t ROP, const mpf_t OP1, const mpf_t OP2)
+ -- Function: void mpf_ui_div (mpf_t ROP, unsigned long int OP1, const
+          mpf_t OP2)
+ -- Function: void mpf_div_ui (mpf_t ROP, const mpf_t OP1, unsigned
+          long int OP2)
       Set ROP to OP1/OP2.
  
- -- Function: void mpf_sqrt (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_sqrt (mpf_t ROP, const mpf_t OP)
   -- Function: void mpf_sqrt_ui (mpf_t ROP, unsigned long int OP)
       Set ROP to the square root of OP.
  
- -- Function: void mpf_pow_ui (mpf_t ROP, mpf_t OP1, unsigned long int
-          OP2)
+ -- Function: void mpf_pow_ui (mpf_t ROP, const mpf_t OP1, unsigned
+          long int OP2)
       Set ROP to OP1 raised to the power OP2.
  
- -- Function: void mpf_neg (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_neg (mpf_t ROP, const mpf_t OP)
       Set ROP to -OP.
  
- -- Function: void mpf_abs (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_abs (mpf_t ROP, const mpf_t OP)
       Set ROP to the absolute value of OP.
  
- -- Function: void mpf_mul_2exp (mpf_t ROP, mpf_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpf_mul_2exp (mpf_t ROP, const mpf_t OP1,
+          mp_bitcnt_t OP2)
       Set ROP to OP1 times 2 raised to OP2.
  
- -- Function: void mpf_div_2exp (mpf_t ROP, mpf_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpf_div_2exp (mpf_t ROP, const mpf_t OP1,
+          mp_bitcnt_t OP2)
       Set ROP to OP1 divided by 2 raised to OP2.
  
  \1f
@@ -3832,17 +3873,18 @@ File: gmp.info,  Node: Float Comparison,  Next: I/O of Floats,  Prev: Float Arit
  7.6 Comparison Functions
  ========================
  
- -- Function: int mpf_cmp (mpf_t OP1, mpf_t OP2)
- -- Function: int mpf_cmp_d (mpf_t OP1, double OP2)
- -- Function: int mpf_cmp_ui (mpf_t OP1, unsigned long int OP2)
- -- Function: int mpf_cmp_si (mpf_t OP1, signed long int OP2)
+ -- Function: int mpf_cmp (const mpf_t OP1, const mpf_t OP2)
+ -- Function: int mpf_cmp_d (const mpf_t OP1, double OP2)
+ -- Function: int mpf_cmp_ui (const mpf_t OP1, unsigned long int OP2)
+ -- Function: int mpf_cmp_si (const mpf_t OP1, signed long int OP2)
       Compare OP1 and OP2.  Return a positive value if OP1 > OP2, zero
       if OP1 = OP2, and a negative value if OP1 < OP2.
  
       `mpf_cmp_d' can be called with an infinity, but results are
       undefined for a NaN.
  
- -- Function: int mpf_eq (mpf_t OP1, mpf_t OP2, mp_bitcnt_t op3)
+ -- Function: int mpf_eq (const mpf_t OP1, const mpf_t OP2, mp_bitcnt_t
+          op3)
       Return non-zero if the first OP3 bits of OP1 and OP2 are equal,
       zero otherwise.  I.e., test if OP1 and OP2 are approximately equal.
  
@@ -3854,15 +3896,16 @@ File: gmp.info,  Node: Float Comparison,  Next: I/O of Floats,  Prev: Float Arit
       bits.  Such numbers are really just one ulp off, and should be
       considered equal.
  
- -- Function: void mpf_reldiff (mpf_t ROP, mpf_t OP1, mpf_t OP2)
+ -- Function: void mpf_reldiff (mpf_t ROP, const mpf_t OP1, const mpf_t
+          OP2)
       Compute the relative difference between OP1 and OP2 and store the
       result in ROP.  This is abs(OP1-OP2)/OP1.
  
- -- Macro: int mpf_sgn (mpf_t OP)
+ -- Macro: int mpf_sgn (const mpf_t OP)
       Return +1 if OP > 0, 0 if OP = 0, and -1 if OP < 0.
  
       This function is actually implemented as a macro.  It evaluates
-     its arguments multiple times.
+     its argument multiple times.
  
  \1f
  File: gmp.info,  Node: I/O of Floats,  Next: Miscellaneous Float Functions,  Prev: Float Comparison,  Up: Floating-point Functions
@@ -3882,7 +3925,7 @@ prototypes for these functions.
     See also *note Formatted Output:: and *note Formatted Input::.
  
   -- Function: size_t mpf_out_str (FILE *STREAM, int BASE, size_t
-          N_DIGITS, mpf_t OP)
+          N_DIGITS, const mpf_t OP)
       Print OP to STREAM, as a string of digits.  Return the number of
       bytes written, or if an error occurred, return 0.
  
@@ -3927,22 +3970,22 @@ File: gmp.info,  Node: Miscellaneous Float Functions,  Prev: I/O of Floats,  Up:
  7.8 Miscellaneous Functions
  ===========================
  
- -- Function: void mpf_ceil (mpf_t ROP, mpf_t OP)
- -- Function: void mpf_floor (mpf_t ROP, mpf_t OP)
- -- Function: void mpf_trunc (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_ceil (mpf_t ROP, const mpf_t OP)
+ -- Function: void mpf_floor (mpf_t ROP, const mpf_t OP)
+ -- Function: void mpf_trunc (mpf_t ROP, const mpf_t OP)
       Set ROP to OP rounded to an integer.  `mpf_ceil' rounds to the
       next higher integer, `mpf_floor' to the next lower, and `mpf_trunc'
       to the integer towards zero.
  
- -- Function: int mpf_integer_p (mpf_t OP)
+ -- Function: int mpf_integer_p (const mpf_t OP)
       Return non-zero if OP is an integer.
  
- -- Function: int mpf_fits_ulong_p (mpf_t OP)
- -- Function: int mpf_fits_slong_p (mpf_t OP)
- -- Function: int mpf_fits_uint_p (mpf_t OP)
- -- Function: int mpf_fits_sint_p (mpf_t OP)
- -- Function: int mpf_fits_ushort_p (mpf_t OP)
- -- Function: int mpf_fits_sshort_p (mpf_t OP)
+ -- Function: int mpf_fits_ulong_p (const mpf_t OP)
+ -- Function: int mpf_fits_slong_p (const mpf_t OP)
+ -- Function: int mpf_fits_uint_p (const mpf_t OP)
+ -- Function: int mpf_fits_sint_p (const mpf_t OP)
+ -- Function: int mpf_fits_ushort_p (const mpf_t OP)
+ -- Function: int mpf_fits_sshort_p (const mpf_t OP)
       Return non-zero if OP would fit in the respective C data type, when
       truncated to an integer.
  
@@ -4062,7 +4105,7 @@ For example, {S1P, S1N}.
  
       This function requires that S1N is greater than or equal to S2N.
  
- -- Function: void mpn_neg (mp_limb_t *RP, const mp_limb_t *SP,
+ -- Function: mp_limb_t mpn_neg (mp_limb_t *RP, const mp_limb_t *SP,
            mp_size_t N)
       Perform the negation of {SP, N}, and write the result to {RP, N}.
       Return carry-out.
@@ -4259,9 +4302,9 @@ For example, {S1P, S1N}.
       {YP, YN}.  The result can be up to YN limbs, the return value is
       the actual number produced.  Both source operands are destroyed.
  
-     {XP, XN} must have at least as many bits as {YP, YN}.  {YP, YN}
-     must be odd.  Both operands must have non-zero most significant
-     limbs.  No overlap is permitted between {XP, XN} and {YP, YN}.
+     It is required that XN >= YN > 0, and the most significant limb of
+     {YP, YN} must be non-zero.  No overlap is permitted between {XP,
+     XN} and {YP, YN}.
  
   -- Function: mp_limb_t mpn_gcd_1 (const mp_limb_t *XP, mp_size_t XN,
            mp_limb_t YLIMB)
@@ -4314,7 +4357,7 @@ For example, {S1P, S1N}.
       remainder would have been zero or non-zero.
  
       A return value of zero indicates a perfect square.  See also
-     `mpz_perfect_square_p'.
+     `mpn_perfect_square_p'.
  
   -- Function: mp_size_t mpn_get_str (unsigned char *STR, int BASE,
            mp_limb_t *S1P, mp_size_t S1N)
@@ -4557,8 +4600,8 @@ File: gmp.info,  Node: Random State Initialization,  Next: Random State Seeding,
       Initialize STATE for a Mersenne Twister algorithm.  This algorithm
       is fast and has good randomness properties.
  
- -- Function: void gmp_randinit_lc_2exp (gmp_randstate_t STATE, mpz_t
-          A, unsigned long C, mp_bitcnt_t M2EXP)
+ -- Function: void gmp_randinit_lc_2exp (gmp_randstate_t STATE, const
+          mpz_t A, unsigned long C, mp_bitcnt_t M2EXP)
       Initialize STATE with a linear congruential algorithm X = (A*X +
       C) mod 2^M2EXP.
  
@@ -4611,7 +4654,8 @@ File: gmp.info,  Node: Random State Seeding,  Next: Random State Miscellaneous,
  9.2 Random State Seeding
  ========================
  
- -- Function: void gmp_randseed (gmp_randstate_t STATE, mpz_t SEED)
+ -- Function: void gmp_randseed (gmp_randstate_t STATE, const mpz_t
+          SEED)
   -- Function: void gmp_randseed_ui (gmp_randstate_t STATE,
            unsigned long int SEED)
       Set an initial seed value into STATE.
@@ -5203,7 +5247,7 @@ and that for instance using `+' with an `mpz_t' will have unpredictable
  results.  For classes with overloading, see *note C++ Class Interface::.
  
  \1f
-File: gmp.info,  Node: C++ Class Interface,  Next: BSD Compatible Functions,  Prev: Formatted Input,  Up: Top
+File: gmp.info,  Node: C++ Class Interface,  Next: Custom Allocation,  Prev: Formatted Input,  Up: Top
  
  12 C++ Class Interface
  **********************
@@ -5332,9 +5376,10 @@ File: gmp.info,  Node: C++ Interface Integers,  Next: C++ Interface Rationals,
   -- Function:  mpz_class::mpz_class (type N)
       Construct an `mpz_class'.  All the standard C++ types may be used,
       except `long long' and `long double', and all the GMP C++ classes
-     can be used.  Any necessary conversion follows the corresponding C
-     function, for example `double' follows `mpz_set_d' (*note
-     Assigning Integers::).
+     can be used, although conversions from `mpq_class' and `mpf_class'
+     are `explicit'.  Any necessary conversion follows the
+     corresponding C function, for example `double' follows `mpz_set_d'
+     (*note Assigning Integers::).
  
   -- Function: explicit mpz_class::mpz_class (mpz_t Z)
       Construct an `mpz_class' from an `mpz_t'.  The value in Z is
@@ -5351,6 +5396,10 @@ File: gmp.info,  Node: C++ Interface Integers,  Next: C++ Interface Rationals,
       If the string is not a valid integer, an `std::invalid_argument'
       exception is thrown.  The same applies to `operator='.
  
+ -- Function: mpz_class operator"" _mpz (const char *STR)
+     With C++11 compilers, integers can be constructed with the syntax
+     `123_mpz' which is equivalent to `mpz_class("123")'.
+
   -- Function: mpz_class operator/ (mpz_class A, mpz_class D)
   -- Function: mpz_class operator% (mpz_class A, mpz_class D)
       Divisions involving `mpz_class' round towards zero, as per the
@@ -5364,7 +5413,7 @@ File: gmp.info,  Node: C++ Interface Integers,  Next: C++ Interface Rationals,
            ...
            mpz_fdiv_q (q.get_mpz_t(), a.get_mpz_t(), d.get_mpz_t());
  
- -- Function: mpz_class abs (mpz_class OP1)
+ -- Function: mpz_class abs (mpz_class OP)
   -- Function: int cmp (mpz_class OP1, type OP2)
   -- Function: int cmp (type OP1, mpz_class OP2)
   -- Function: bool mpz_class::fits_sint_p (void)
@@ -5381,6 +5430,8 @@ File: gmp.info,  Node: C++ Interface Integers,  Next: C++ Interface Rationals,
   -- Function: int mpz_class::set_str (const string& STR, int BASE)
   -- Function: int sgn (mpz_class OP)
   -- Function: mpz_class sqrt (mpz_class OP)
+ -- Function: void mpz_class::swap (mpz_class& OP)
+ -- Function: void swap (mpz_class& OP1, mpz_class& OP2)
       These functions provide a C++ class interface to the corresponding
       GMP C routines.
  
@@ -5412,9 +5463,10 @@ called.
   -- Function:  mpq_class::mpq_class (type OP)
   -- Function:  mpq_class::mpq_class (integer NUM, integer DEN)
       Construct an `mpq_class'.  The initial value can be a single value
-     of any type, or a pair of integers (`mpz_class' or standard C++
-     integer types) representing a fraction, except that `long long'
-     and `long double' are not supported.  For example,
+     of any type (conversion from `mpf_class' is `explicit'), or a pair
+     of integers (`mpz_class' or standard C++ integer types)
+     representing a fraction, except that `long long' and `long double'
+     are not supported.  For example,
  
            mpq_class q (99);
            mpq_class q (1.75);
@@ -5435,6 +5487,11 @@ called.
       If the string is not a valid rational, an `std::invalid_argument'
       exception is thrown.  The same applies to `operator='.
  
+ -- Function: mpq_class operator"" _mpq (const char *STR)
+     With C++11 compilers, integral rationals can be constructed with
+     the syntax `123_mpq' which is equivalent to `mpq_class(123_mpz)'.
+     Other rationals can be built as `-1_mpq/2' or `0xb_mpq/123456_mpz'.
+
   -- Function: void mpq_class::canonicalize ()
       Put an `mpq_class' into canonical form, as per *note Rational
       Number Functions::.  All arithmetic operators require their
@@ -5449,6 +5506,8 @@ called.
   -- Function: int mpq_class::set_str (const char *STR, int BASE)
   -- Function: int mpq_class::set_str (const string& STR, int BASE)
   -- Function: int sgn (mpq_class OP)
+ -- Function: void mpq_class::swap (mpq_class& OP)
+ -- Function: void swap (mpq_class& OP1, mpq_class& OP2)
       These functions provide a C++ class interface to the corresponding
       GMP C routines.
  
@@ -5537,6 +5596,10 @@ used if this doesn't suit.
       If the string is not a valid float, an `std::invalid_argument'
       exception is thrown.  The same applies to `operator='.
  
+ -- Function: mpf_class operator"" _mpf (const char *STR)
+     With C++11 compilers, floats can be constructed with the syntax
+     `1.23e-1_mpf' which is equivalent to `mpf_class("1.23e-1")'.
+
   -- Function: mpf_class& mpf_class::operator= (type OP)
       Convert and store the given OP value to an `mpf_class' object.  The
       same types are accepted as for the constructors above.
@@ -5581,6 +5644,8 @@ used if this doesn't suit.
   -- Function: int mpf_class::set_str (const string& STR, int BASE)
   -- Function: int sgn (mpf_class OP)
   -- Function: mpf_class sqrt (mpf_class OP)
+ -- Function: void mpf_class::swap (mpf_class& OP)
+ -- Function: void swap (mpf_class& OP1, mpf_class& OP2)
   -- Function: mpf_class trunc (mpf_class OP)
       These functions provide a C++ class interface to the corresponding
       GMP C routines.
@@ -5638,7 +5703,7 @@ File: gmp.info,  Node: C++ Interface Random Numbers,  Next: C++ Interface Limita
       Seed a random number generator.  See *note Random Number
       Functions::, for how to choose a good seed.
  
- -- Function: mpz_class gmp_randclass::get_z_bits (unsigned long BITS)
+ -- Function: mpz_class gmp_randclass::get_z_bits (mp_bitcnt_t BITS)
   -- Function: mpz_class gmp_randclass::get_z_bits (mpz_class BITS)
       Generate a random integer with a specified number of bits.
  
@@ -5734,117 +5799,9 @@ Templated Expressions
            }
  
  \1f
-File: gmp.info,  Node: BSD Compatible Functions,  Next: Custom Allocation,  Prev: C++ Class Interface,  Up: Top
-
-13 Berkeley MP Compatible Functions
-***********************************
-
-These functions are intended to be fully compatible with the Berkeley MP
-library which is available on many BSD derived U*ix systems.  The
-`--enable-mpbsd' option must be used when building GNU MP to make these
-available (*note Installing GMP::).
-
-   The original Berkeley MP library has a usage restriction: you cannot
-use the same variable as both source and destination in a single
-function call.  The compatible functions in GNU MP do not share this
-restriction--inputs and outputs may overlap.
-
-   It is not recommended that new programs are written using these
-functions.  Apart from the incomplete set of functions, the interface
-for initializing `MINT' objects is more error prone, and the `pow'
-function collides with `pow' in `libm.a'.
-
-   Include the header `mp.h' to get the definition of the necessary
-types and functions.  If you are on a BSD derived system, make sure to
-include GNU `mp.h' if you are going to link the GNU `libmp.a' to your
-program.  This means that you probably need to give the `-I<dir>'
-option to the compiler, where `<dir>' is the directory where you have
-GNU `mp.h'.
-
- -- Function: MINT * itom (signed short int INITIAL_VALUE)
-     Allocate an integer consisting of a `MINT' object and dynamic limb
-     space.  Initialize the integer to INITIAL_VALUE.  Return a pointer
-     to the `MINT' object.
-
- -- Function: MINT * xtom (char *INITIAL_VALUE)
-     Allocate an integer consisting of a `MINT' object and dynamic limb
-     space.  Initialize the integer from INITIAL_VALUE, a hexadecimal,
-     null-terminated C string.  Return a pointer to the `MINT' object.
-
- -- Function: void move (MINT *SRC, MINT *DEST)
-     Set DEST to SRC by copying.  Both variables must be previously
-     initialized.
-
- -- Function: void madd (MINT *SRC_1, MINT *SRC_2, MINT *DESTINATION)
-     Add SRC_1 and SRC_2 and put the sum in DESTINATION.
-
- -- Function: void msub (MINT *SRC_1, MINT *SRC_2, MINT *DESTINATION)
-     Subtract SRC_2 from SRC_1 and put the difference in DESTINATION.
-
- -- Function: void mult (MINT *SRC_1, MINT *SRC_2, MINT *DESTINATION)
-     Multiply SRC_1 and SRC_2 and put the product in DESTINATION.
-
- -- Function: void mdiv (MINT *DIVIDEND, MINT *DIVISOR, MINT *QUOTIENT,
-          MINT *REMAINDER)
- -- Function: void sdiv (MINT *DIVIDEND, signed short int DIVISOR, MINT
-          *QUOTIENT, signed short int *REMAINDER)
-     Set QUOTIENT to DIVIDEND/DIVISOR, and REMAINDER to DIVIDEND mod
-     DIVISOR.  The quotient is rounded towards zero; the remainder has
-     the same sign as the dividend unless it is zero.
-
-     Some implementations of these functions work differently--or not
-     at all--for negative arguments.
-
- -- Function: void msqrt (MINT *OP, MINT *ROOT, MINT *REMAINDER)
-     Set ROOT to the truncated integer part of the square root of OP,
-     like `mpz_sqrt'.  Set REMAINDER to OP-ROOT*ROOT, i.e.  zero if OP
-     is a perfect square.
-
-     If ROOT and REMAINDER are the same variable, the results are
-     undefined.
-
- -- Function: void pow (MINT *BASE, MINT *EXP, MINT *MOD, MINT *DEST)
-     Set DEST to (BASE raised to EXP) modulo MOD.
-
-     Note that the name `pow' clashes with `pow' from the standard C
-     math library (*note Exponentiation and Logarithms: (libc)Exponents
-     and Logarithms.).  An application will only be able to use one or
-     the other.
-
- -- Function: void rpow (MINT *BASE, signed short int EXP, MINT *DEST)
-     Set DEST to BASE raised to EXP.
-
- -- Function: void gcd (MINT *OP1, MINT *OP2, MINT *RES)
-     Set RES to the greatest common divisor of OP1 and OP2.
-
- -- Function: int mcmp (MINT *OP1, MINT *OP2)
-     Compare OP1 and OP2.  Return a positive value if OP1 > OP2, zero
-     if OP1 = OP2, and a negative value if OP1 < OP2.
-
- -- Function: void min (MINT *DEST)
-     Input a decimal string from `stdin', and put the read integer in
-     DEST.  SPC and TAB are allowed in the number string, and are
-     ignored.
-
- -- Function: void mout (MINT *SRC)
-     Output SRC to `stdout', as a decimal string.  Also output a
-     newline.
-
- -- Function: char * mtox (MINT *OP)
-     Convert OP to a hexadecimal string, and return a pointer to the
-     string.  The returned string is allocated using the default memory
-     allocation function, `malloc' by default.  It will be
-     `strlen(str)+1' bytes, that being exactly enough for the string
-     and null-terminator.
-
- -- Function: void mfree (MINT *OP)
-     De-allocate, the space used by OP.  *This function should only be
-     passed a value returned by `itom' or `xtom'.*
-
-\1f
-File: gmp.info,  Node: Custom Allocation,  Next: Language Bindings,  Prev: BSD Compatible Functions,  Up: Top
+File: gmp.info,  Node: Custom Allocation,  Next: Language Bindings,  Prev: C++ Class Interface,  Up: Top
  
-14 Custom Allocation
+13 Custom Allocation
  ********************
  
  By default GMP uses `malloc', `realloc' and `free' for memory
@@ -5855,9 +5812,6 @@ output and terminates the program.
  different way or to have a different error action on running out of
  memory.
  
-   This feature is available in the Berkeley compatibility library
-(*note BSD Compatible Functions::) as well as the main GMP library.
-
   -- Function: void mp_set_memory_functions (
            void *(*ALLOC_FUNC_PTR) (size_t),
            void *(*REALLOC_FUNC_PTR) (void *, size_t, size_t),
@@ -5947,7 +5901,7 @@ this is a problem.
  \1f
  File: gmp.info,  Node: Language Bindings,  Next: Algorithms,  Prev: Custom Allocation,  Up: Top
  
-15 Language Bindings
+14 Language Bindings
  ********************
  
  The following packages and projects offer access to GMP from languages
@@ -6090,7 +6044,7 @@ Other
  \1f
  File: gmp.info,  Node: Algorithms,  Next: Internals,  Prev: Language Bindings,  Up: Top
  
-16 Algorithms
+15 Algorithms
  *************
  
  This chapter is an introduction to some of the algorithms used for
@@ -6115,7 +6069,7 @@ documented functions.
  \1f
  File: gmp.info,  Node: Multiplication Algorithms,  Next: Division Algorithms,  Prev: Algorithms,  Up: Algorithms
  
-16.1 Multiplication
+15.1 Multiplication
  ===================
  
  NxN limb multiplications and squares are done using one of seven
@@ -6151,7 +6105,7 @@ Unbalanced Multiplication::).
  \1f
  File: gmp.info,  Node: Basecase Multiplication,  Next: Karatsuba Multiplication,  Prev: Multiplication Algorithms,  Up: Multiplication Algorithms
  
-16.1.1 Basecase Multiplication
+15.1.1 Basecase Multiplication
  ------------------------------
  
  Basecase NxM multiplication is a straightforward rectangular set of
@@ -6196,7 +6150,7 @@ routine should be used always.
  \1f
  File: gmp.info,  Node: Karatsuba Multiplication,  Next: Toom 3-Way Multiplication,  Prev: Basecase Multiplication,  Up: Multiplication Algorithms
  
-16.1.2 Karatsuba Multiplication
+15.1.2 Karatsuba Multiplication
  -------------------------------
  
  The Karatsuba multiplication algorithm is described in Knuth section
@@ -6281,7 +6235,7 @@ that sense the algorithm thresholds are merely of academic interest.
  \1f
  File: gmp.info,  Node: Toom 3-Way Multiplication,  Next: Toom 4-Way Multiplication,  Prev: Karatsuba Multiplication,  Up: Multiplication Algorithms
  
-16.1.3 Toom 3-Way Multiplication
+15.1.3 Toom 3-Way Multiplication
  --------------------------------
  
  The Karatsuba formula is the simplest case of a general approach to
@@ -6424,7 +6378,7 @@ a bit of rearrangement just one division by 6 can be done.
  \1f
  File: gmp.info,  Node: Toom 4-Way Multiplication,  Next: Higher degree Toom'n'half,  Prev: Toom 3-Way Multiplication,  Up: Multiplication Algorithms
  
-16.1.4 Toom 4-Way Multiplication
+15.1.4 Toom 4-Way Multiplication
  --------------------------------
  
  Karatsuba and Toom-3 split the operands into 2 and 3 coefficients,
@@ -6458,7 +6412,7 @@ size each.
  \1f
  File: gmp.info,  Node: Higher degree Toom'n'half,  Next: FFT Multiplication,  Prev: Toom 4-Way Multiplication,  Up: Multiplication Algorithms
  
-16.1.5 Higher degree Toom'n'half
+15.1.5 Higher degree Toom'n'half
  --------------------------------
  
  The Toom algorithms described above (*note Toom 3-Way Multiplication::,
@@ -6485,7 +6439,7 @@ buffer equanl in size to the result of the product.
  \1f
  File: gmp.info,  Node: FFT Multiplication,  Next: Other Multiplication,  Prev: Higher degree Toom'n'half,  Up: Multiplication Algorithms
  
-16.1.6 FFT Multiplication
+15.1.6 FFT Multiplication
  -------------------------
  
  At large to very large sizes a Fermat style FFT multiplication is used,
@@ -6589,7 +6543,7 @@ will be needed.
  \1f
  File: gmp.info,  Node: Other Multiplication,  Next: Unbalanced Multiplication,  Prev: FFT Multiplication,  Up: Multiplication Algorithms
  
-16.1.7 Other Multiplication
+15.1.7 Other Multiplication
  ---------------------------
  
  The Toom algorithms described above (*note Toom 3-Way Multiplication::,
@@ -6634,7 +6588,7 @@ is of course of vital importance to GMP.
  \1f
  File: gmp.info,  Node: Unbalanced Multiplication,  Prev: Other Multiplication,  Up: Multiplication Algorithms
  
-16.1.8 Unbalanced Multiplication
+15.1.8 Unbalanced Multiplication
  --------------------------------
  
  Multiplication of operands with different sizes, both below
@@ -6653,7 +6607,7 @@ coefficients, i.e., a polynomial of degree 1 to 3.
  \1f
  File: gmp.info,  Node: Division Algorithms,  Next: Greatest Common Divisor Algorithms,  Prev: Multiplication Algorithms,  Up: Algorithms
  
-16.2 Division Algorithms
+15.2 Division Algorithms
  ========================
  
  * Menu:
@@ -6669,7 +6623,7 @@ File: gmp.info,  Node: Division Algorithms,  Next: Greatest Common Divisor Algor
  \1f
  File: gmp.info,  Node: Single Limb Division,  Next: Basecase Division,  Prev: Division Algorithms,  Up: Division Algorithms
  
-16.2.1 Single Limb Division
+15.2.1 Single Limb Division
  ---------------------------
  
  Nx1 division is implemented using repeated 2x1 divisions from high to
@@ -6723,7 +6677,7 @@ pipelined.
  \1f
  File: gmp.info,  Node: Basecase Division,  Next: Divide and Conquer Division,  Prev: Single Limb Division,  Up: Division Algorithms
  
-16.2.2 Basecase Division
+15.2.2 Basecase Division
  ------------------------
  
  Basecase NxM division is like long division done by hand, but in base
@@ -6748,7 +6702,7 @@ for each of the Q quotient limbs.
  \1f
  File: gmp.info,  Node: Divide and Conquer Division,  Next: Block-Wise Barrett Division,  Prev: Basecase Division,  Up: Division Algorithms
  
-16.2.3 Divide and Conquer Division
+15.2.3 Divide and Conquer Division
  ----------------------------------
  
  For divisors larger than `DC_DIV_QR_THRESHOLD', division is done by
@@ -6788,7 +6742,7 @@ In practice, at moderate to large sizes, a 2NxN division is about 2 to
  \1f
  File: gmp.info,  Node: Block-Wise Barrett Division,  Next: Exact Division,  Prev: Divide and Conquer Division,  Up: Division Algorithms
  
-16.2.4 Block-Wise Barrett Division
+15.2.4 Block-Wise Barrett Division
  ----------------------------------
  
  For the largest divisions, a block-wise Barrett division algorithm is
@@ -6804,7 +6758,7 @@ ceil(n/2) limbs.
  \1f
  File: gmp.info,  Node: Exact Division,  Next: Exact Remainder,  Prev: Block-Wise Barrett Division,  Up: Division Algorithms
  
-16.2.5 Exact Division
+15.2.5 Exact Division
  ---------------------
  
  A so-called exact division is when the dividend is known to be an exact
@@ -6863,7 +6817,7 @@ pipelined multipliers.
  \1f
  File: gmp.info,  Node: Exact Remainder,  Next: Small Quotient Division,  Prev: Exact Division,  Up: Division Algorithms
  
-16.2.6 Exact Remainder
+15.2.6 Exact Remainder
  ----------------------
  
  If the exact division algorithm is done with a full subtraction at each
@@ -6885,9 +6839,10 @@ some single limb divisions saved.  When d is a single limb some
  simplifications arise, providing good speedups on a number of
  processors.
  
-   `mpn_divexact_by3', `mpn_modexact_1_odd' and the `mpn_redc_X'
-functions differ subtly in how they return r, leading to some negations
-in the above formula, but all are essentially the same.
+   The functions `mpn_divexact_by3', `mpn_modexact_1_odd' and the
+internal `mpn_redc_X' functions differ subtly in how they return r,
+leading to some negations in the above formula, but all are essentially
+the same.
  
     Clearly r is zero when a is a multiple of d, and this leads to
  divisibility or congruence tests which are potentially more efficient
@@ -6913,7 +6868,7 @@ this.
  \1f
  File: gmp.info,  Node: Small Quotient Division,  Prev: Exact Remainder,  Up: Division Algorithms
  
-16.2.7 Small Quotient Division
+15.2.7 Small Quotient Division
  ------------------------------
  
  An NxM division where the number of quotient limbs Q=N-M is small can
@@ -6945,7 +6900,7 @@ established by following the argument of Knuth section 4.3.1 exercise
  \1f
  File: gmp.info,  Node: Greatest Common Divisor Algorithms,  Next: Powering Algorithms,  Prev: Division Algorithms,  Up: Algorithms
  
-16.3 Greatest Common Divisor
+15.3 Greatest Common Divisor
  ============================
  
  * Menu:
@@ -6959,7 +6914,7 @@ File: gmp.info,  Node: Greatest Common Divisor Algorithms,  Next: Powering Algor
  \1f
  File: gmp.info,  Node: Binary GCD,  Next: Lehmer's Algorithm,  Prev: Greatest Common Divisor Algorithms,  Up: Greatest Common Divisor Algorithms
  
-16.3.1 Binary GCD
+15.3.1 Binary GCD
  -----------------
  
  At small sizes GMP uses an O(N^2) binary style GCD.  This is described
@@ -7021,7 +6976,7 @@ tests into a GCD.
  \1f
  File: gmp.info,  Node: Lehmer's Algorithm,  Next: Subquadratic GCD,  Prev: Binary GCD,  Up: Greatest Common Divisor Algorithms
  
-16.3.2 Lehmer's algorithm
+15.3.2 Lehmer's algorithm
  -------------------------
  
  Lehmer's improvement of the Euclidean algorithms is based on the
@@ -7063,6 +7018,58 @@ optimizations:
       reduced in size from two limbs to one and a half.
  
  
+\1f
+File: gmp.info,  Node: Subquadratic GCD,  Next: Extended GCD,  Prev: Lehmer's Algorithm,  Up: Greatest Common Divisor Algorithms
+
+15.3.3 Subquadratic GCD
+-----------------------
+
+For inputs larger than `GCD_DC_THRESHOLD', GCD is computed via the HGCD
+(Half GCD) function, as a generalization to Lehmer's algorithm.
+
+   Let the inputs a,b be of size N limbs each. Put S = floor(N/2) + 1.
+Then HGCD(a,b) returns a transformation matrix T with non-negative
+elements, and reduced numbers (c;d) = T^-1 (a;b). The reduced numbers
+c,d must be larger than S limbs, while their difference abs(c-d) must
+fit in S limbs. The matrix elements will also be of size roughly N/2.
+
+   The HGCD base case uses Lehmer's algorithm, but with the above stop
+condition that returns reduced numbers and the corresponding
+transformation matrix half-way through. For inputs larger than
+`HGCD_THRESHOLD', HGCD is computed recursively, using the divide and
+conquer algorithm in "On Schönhage's algorithm and subquadratic integer
+GCD computation" by Möller (*note References::). The recursive
+algorithm consists of these main steps.
+
+   * Call HGCD recursively, on the most significant N/2 limbs. Apply the
+     resulting matrix T_1 to the full numbers, reducing them to a size
+     just above 3N/2.
+
+   * Perform a small number of division or subtraction steps to reduce
+     the numbers to size below 3N/2. This is essential mainly for the
+     unlikely case of large quotients.
+
+   * Call HGCD recursively, on the most significant N/2 limbs of the
+     reduced numbers. Apply the resulting matrix T_2 to the full
+     numbers, reducing them to a size just above N/2.
+
+   * Compute T = T_1 T_2.
+
+   * Perform a small number of division and subtraction steps to
+     satisfy the requirements, and return.
+
+   GCD is then implemented as a loop around HGCD, similarly to Lehmer's
+algorithm. Where Lehmer repeatedly chops off the top two limbs, calls
+`mpn_hgcd2', and applies the resulting matrix to the full numbers, the
+subquadratic GCD chops off the most significant third of the limbs (the
+proportion is a tuning parameter, and 1/3 seems to be more efficient
+than, e.g, 1/2), calls `mpn_hgcd', and applies the resulting matrix.
+Once the input numbers are reduced to size below `GCD_DC_THRESHOLD',
+Lehmer's algorithm is used for the rest of the work.
+
+   The asymptotic running time of both HGCD and GCD is O(M(N)*log(N)),
+where M(N) is the time for multiplying two N-limb numbers.
+
  
  \1f
  Local Variables:
diff --git a/doc/gmp.info-2 b/doc/gmp.info-2

index 0536cc96f836991977f377defc83b6856cb673fa..52b884c99b61a03971568a5cc04cd95a3c636b20 100644 (file)
--- a/doc/gmp.info-2
+++ b/doc/gmp.info-2
@@ -2,11 +2,11 @@ This is ../../gmp/doc/gmp.info, produced by makeinfo version 4.13 from
  ../../gmp/doc/gmp.texi.
  
  This manual describes how to install and use the GNU multiple precision
-arithmetic library, version 5.0.5.
+arithmetic library, version 5.1.3.
  
     Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-Free Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+2013 Free Software Foundation, Inc.
  
     Permission is granted to copy, distribute and/or modify this
  document under the terms of the GNU Free Documentation License, Version
@@ -21,62 +21,10 @@ START-INFO-DIR-ENTRY
  * gmp: (gmp).                   GNU Multiple Precision Arithmetic Library.
  END-INFO-DIR-ENTRY
  
-\1f
-File: gmp.info,  Node: Subquadratic GCD,  Next: Extended GCD,  Prev: Lehmer's Algorithm,  Up: Greatest Common Divisor Algorithms
-
-16.3.3 Subquadratic GCD
------------------------
-
-For inputs larger than `GCD_DC_THRESHOLD', GCD is computed via the HGCD
-(Half GCD) function, as a generalization to Lehmer's algorithm.
-
-   Let the inputs a,b be of size N limbs each. Put S = floor(N/2) + 1.
-Then HGCD(a,b) returns a transformation matrix T with non-negative
-elements, and reduced numbers (c;d) = T^-1 (a;b). The reduced numbers
-c,d must be larger than S limbs, while their difference abs(c-d) must
-fit in S limbs. The matrix elements will also be of size roughly N/2.
-
-   The HGCD base case uses Lehmer's algorithm, but with the above stop
-condition that returns reduced numbers and the corresponding
-transformation matrix half-way through. For inputs larger than
-`HGCD_THRESHOLD', HGCD is computed recursively, using the divide and
-conquer algorithm in "On Schönhage's algorithm and subquadratic integer
-GCD computation" by Möller (*note References::). The recursive
-algorithm consists of these main steps.
-
-   * Call HGCD recursively, on the most significant N/2 limbs. Apply the
-     resulting matrix T_1 to the full numbers, reducing them to a size
-     just above 3N/2.
-
-   * Perform a small number of division or subtraction steps to reduce
-     the numbers to size below 3N/2. This is essential mainly for the
-     unlikely case of large quotients.
-
-   * Call HGCD recursively, on the most significant N/2 limbs of the
-     reduced numbers. Apply the resulting matrix T_2 to the full
-     numbers, reducing them to a size just above N/2.
-
-   * Compute T = T_1 T_2.
-
-   * Perform a small number of division and subtraction steps to
-     satisfy the requirements, and return.
-
-   GCD is then implemented as a loop around HGCD, similarly to Lehmer's
-algorithm. Where Lehmer repeatedly chops off the top two limbs, calls
-`mpn_hgcd2', and applies the resulting matrix to the full numbers, the
-subquadratic GCD chops off the most significant third of the limbs (the
-proportion is a tuning parameter, and 1/3 seems to be more efficient
-than, e.g, 1/2), calls `mpn_hgcd', and applies the resulting matrix.
-Once the input numbers are reduced to size below `GCD_DC_THRESHOLD',
-Lehmer's algorithm is used for the rest of the work.
-
-   The asymptotic running time of both HGCD and GCD is O(M(N)*log(N)),
-where M(N) is the time for multiplying two N-limb numbers.
-
  \1f
  File: gmp.info,  Node: Extended GCD,  Next: Jacobi Symbol,  Prev: Subquadratic GCD,  Up: Greatest Common Divisor Algorithms
  
-16.3.4 Extended GCD
+15.3.4 Extended GCD
  -------------------
  
  The extended GCD function, or GCDEXT, calculates gcd(a,b) and also
@@ -101,10 +49,13 @@ improved by taking into account the current size of the cofactors.
  \1f
  File: gmp.info,  Node: Jacobi Symbol,  Prev: Extended GCD,  Up: Greatest Common Divisor Algorithms
  
-16.3.5 Jacobi Symbol
+15.3.5 Jacobi Symbol
  --------------------
  
-`mpz_jacobi' and `mpz_kronecker' are currently implemented with a
+[This section is obsolete.  The current Jacobi code actually uses a very
+efficient algorithm.]
+
+   `mpz_jacobi' and `mpz_kronecker' are currently implemented with a
  simple binary algorithm similar to that described for the GCDs (*note
  Binary GCD::).  They're not very fast when both inputs are large.
  Lehmer's multi-step improvement or a binary based multi-step algorithm
@@ -122,7 +73,7 @@ using some bit twiddling, avoiding table lookups or conditional jumps.
  \1f
  File: gmp.info,  Node: Powering Algorithms,  Next: Root Extraction Algorithms,  Prev: Greatest Common Divisor Algorithms,  Up: Algorithms
  
-16.4 Powering Algorithms
+15.4 Powering Algorithms
  ========================
  
  * Menu:
@@ -133,7 +84,7 @@ File: gmp.info,  Node: Powering Algorithms,  Next: Root Extraction Algorithms,
  \1f
  File: gmp.info,  Node: Normal Powering Algorithm,  Next: Modular Powering Algorithm,  Prev: Powering Algorithms,  Up: Powering Algorithms
  
-16.4.1 Normal Powering
+15.4.1 Normal Powering
  ----------------------
  
  Normal `mpz' or `mpf' powering uses a simple binary algorithm,
@@ -145,7 +96,7 @@ just as easy and can be done with somewhat less temporary memory.
  \1f
  File: gmp.info,  Node: Modular Powering Algorithm,  Prev: Normal Powering Algorithm,  Up: Powering Algorithms
  
-16.4.2 Modular Powering
+15.4.2 Modular Powering
  -----------------------
  
  Modular powering is implemented using a 2^k-ary sliding window
@@ -155,7 +106,7 @@ exponent.  Larger exponents use larger values of k, the choice being
  made to minimize the average number of multiplications that must
  supplement the squaring.
  
-   The modular multiplies and squares use either a simple division or
+   The modular multiplies and squarings use either a simple division or
  the REDC method by Montgomery (*note References::).  REDC is a little
  faster, essentially saving N single limb divisions in a fashion similar
  to an exact remainder (*note Exact Remainder::).
@@ -163,7 +114,7 @@ to an exact remainder (*note Exact Remainder::).
  \1f
  File: gmp.info,  Node: Root Extraction Algorithms,  Next: Radix Conversion Algorithms,  Prev: Powering Algorithms,  Up: Algorithms
  
-16.5 Root Extraction Algorithms
+15.5 Root Extraction Algorithms
  ===============================
  
  * Menu:
@@ -176,7 +127,7 @@ File: gmp.info,  Node: Root Extraction Algorithms,  Next: Radix Conversion Algor
  \1f
  File: gmp.info,  Node: Square Root Algorithm,  Next: Nth Root Algorithm,  Prev: Root Extraction Algorithms,  Up: Root Extraction Algorithms
  
-16.5.1 Square Root
+15.5.1 Square Root
  ------------------
  
  Square roots are taken using the "Karatsuba Square Root" algorithm by
@@ -230,7 +181,7 @@ precision given by `mpf_sqrt_ui' is obtained by padding with zero limbs.
  \1f
  File: gmp.info,  Node: Nth Root Algorithm,  Next: Perfect Square Algorithm,  Prev: Square Root Algorithm,  Up: Root Extraction Algorithms
  
-16.5.2 Nth Root
+15.5.2 Nth Root
  ---------------
  
  Integer Nth roots are taken using Newton's method with the following
@@ -250,7 +201,7 @@ particularly well optimized.
  \1f
  File: gmp.info,  Node: Perfect Square Algorithm,  Next: Perfect Power Algorithm,  Prev: Nth Root Algorithm,  Up: Root Extraction Algorithms
  
-16.5.3 Perfect Square
+15.5.3 Perfect Square
  ---------------------
  
  A significant fraction of non-squares can be quickly identified by
@@ -295,7 +246,7 @@ would affect such considerations.
  \1f
  File: gmp.info,  Node: Perfect Power Algorithm,  Prev: Perfect Square Algorithm,  Up: Root Extraction Algorithms
  
-16.5.4 Perfect Power
+15.5.4 Perfect Power
  --------------------
  
  Detecting perfect powers is required by some factorization algorithms.
@@ -311,7 +262,7 @@ checked.
  \1f
  File: gmp.info,  Node: Radix Conversion Algorithms,  Next: Other Algorithms,  Prev: Root Extraction Algorithms,  Up: Algorithms
  
-16.6 Radix Conversion
+15.6 Radix Conversion
  =====================
  
  Radix conversions are less important than other algorithms.  A program
@@ -326,7 +277,7 @@ representation.
  \1f
  File: gmp.info,  Node: Binary to Radix,  Next: Radix to Binary,  Prev: Radix Conversion Algorithms,  Up: Radix Conversion Algorithms
  
-16.6.1 Binary to Radix
+15.6.1 Binary to Radix
  ----------------------
  
  Conversions from binary to a power-of-2 radix use a simple and fast
@@ -401,7 +352,7 @@ be a net speedup.
  \1f
  File: gmp.info,  Node: Radix to Binary,  Prev: Binary to Radix,  Up: Radix Conversion Algorithms
  
-16.6.2 Radix to Binary
+15.6.2 Radix to Binary
  ----------------------
  
  *This section needs to be rewritten, it currently describes the
@@ -446,7 +397,7 @@ or more).
  \1f
  File: gmp.info,  Node: Other Algorithms,  Next: Assembly Coding,  Prev: Radix Conversion Algorithms,  Up: Algorithms
  
-16.7 Other Algorithms
+15.7 Other Algorithms
  =====================
  
  * Menu:
@@ -461,7 +412,7 @@ File: gmp.info,  Node: Other Algorithms,  Next: Assembly Coding,  Prev: Radix Co
  \1f
  File: gmp.info,  Node: Prime Testing Algorithm,  Next: Factorial Algorithm,  Prev: Other Algorithms,  Up: Other Algorithms
  
-16.7.1 Prime Testing
+15.7.1 Prime Testing
  --------------------
  
  The primality testing in `mpz_probab_prime_p' (*note Number Theoretic
@@ -487,43 +438,64 @@ for an arbitrary n.
  \1f
  File: gmp.info,  Node: Factorial Algorithm,  Next: Binomial Coefficients Algorithm,  Prev: Prime Testing Algorithm,  Up: Other Algorithms
  
-16.7.2 Factorial
+15.7.2 Factorial
  ----------------
  
-Factorials are calculated by a combination of removal of twos,
-powering, and binary splitting.  The procedure can be best illustrated
+Factorials are calculated by a combination of two algorithms. An idea is
+shared among them: to compute the odd part of the factorial; a final
+step takes account of the power of 2 term, by shifting.
+
+   For small n, the odd factor of n! is computed with the simple
+observation that it is equal to the product of all positive odd numbers
+smaller than n times the odd factor of [n/2]!, where [x] is the integer
+part of x, and so on recursively. The procedure can be best illustrated
  with an example,
  
-     23! = 1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20.21.22.23
+     23! = (23.21.19.17.15.13.11.9.7.5.3)(11.9.7.5.3)(5.3)2^19
  
-has factors of two removed,
+   Current code collects all the factors in a single list, with a loop
+and no recursion, and compute the product, with no special care for
+repeated chunks.
  
-     23! = 2^19.1.1.3.1.5.3.7.1.9.5.11.3.13.7.15.1.17.9.19.5.21.11.23
+   When n is larger, computation pass trough prime sieving. An helper
+function is used, as suggested by Peter Luschny:
  
-and the resulting terms collected up according to their multiplicity,
+                                 n
+                               -----
+                    n!          | |   L(p,n)
+     msf(n) = -------------- =  | |  p
+               [n/2]!^2.2^k     p=3
  
-     23! = 2^19.(3.5)^3.(7.9.11)^2.(13.15.17.19.21.23)
+   Where p ranges on odd prime numbers. The exponent k is chosen to
+obtain an odd integer number: k is the number of 1 bits in the binary
+representation of [n/2]. The function L(p,n) can be defined as zero
+when p is composite, and, for any prime p, it is computed with:
  
-   Each sequence such as 13.15.17.19.21.23 is evaluated by splitting
-into every second term, as for instance (13.17.21).(15.19.23), and the
-same recursively on each half.  This is implemented iteratively using
-some bit twiddling.
+               ---
+                \    n
+     L(p,n) =   /  [---] mod 2   <=  log (n) .
+               ---  p^i                p
+               i>0
+
+   With this helper function, we are able to compute the odd part of n!
+using the recursion implied by n!=[n/2]!^2*msf(n)*2^k. The recursion
+stops using the small-n algorithm on some [n/2^i].
+
+   Both the above algorithms use binary splitting to compute the
+product of many small factors. At first as many products as possible
+are accumulated in a single register, generating a list of factors that
+fit in a machine word. This list is then split into halves, and the
+product is computed recursively.
  
     Such splitting is more efficient than repeated Nx1 multiplies since
  it forms big multiplies, allowing Karatsuba and higher algorithms to be
  used.  And even below the Karatsuba threshold a big block of work can
  be more efficient for the basecase algorithm.
  
-   Splitting into subsequences of every second term keeps the resulting
-products more nearly equal in size than would the simpler approach of
-say taking the first half and second half of the sequence.  Nearly
-equal products are more efficient for the current multiply
-implementation.
-
  \1f
  File: gmp.info,  Node: Binomial Coefficients Algorithm,  Next: Fibonacci Numbers Algorithm,  Prev: Factorial Algorithm,  Up: Other Algorithms
  
-16.7.3 Binomial Coefficients
+15.7.3 Binomial Coefficients
  ----------------------------
  
  Binomial coefficients C(n,k) are calculated by first arranging k <= n/2
@@ -545,7 +517,7 @@ and n-k+i in general won't fit in a limb at all.
  \1f
  File: gmp.info,  Node: Fibonacci Numbers Algorithm,  Next: Lucas Numbers Algorithm,  Prev: Binomial Coefficients Algorithm,  Up: Other Algorithms
  
-16.7.4 Fibonacci Numbers
+15.7.4 Fibonacci Numbers
  ------------------------
  
  The Fibonacci functions `mpz_fib_ui' and `mpz_fib2_ui' are designed for
@@ -600,7 +572,7 @@ further limbs, which saves some code size.  See comments with
  \1f
  File: gmp.info,  Node: Lucas Numbers Algorithm,  Next: Random Number Algorithms,  Prev: Fibonacci Numbers Algorithm,  Up: Other Algorithms
  
-16.7.5 Lucas Numbers
+15.7.5 Lucas Numbers
  --------------------
  
  `mpz_lucnum2_ui' derives a pair of Lucas numbers from a pair of
@@ -623,7 +595,7 @@ Fibonacci numbers, similar to what `mpz_fib_ui' does.
  \1f
  File: gmp.info,  Node: Random Number Algorithms,  Prev: Lucas Numbers Algorithm,  Up: Other Algorithms
  
-16.7.6 Random Numbers
+15.7.6 Random Numbers
  ---------------------
  
  For the `urandomb' functions, random numbers are generated simply by
@@ -647,7 +619,7 @@ GMP.
  
     Linear congruential generators are described in many text books, for
  instance Knuth volume 2 (*note References::).  With a modulus M and
-parameters A and C, a integer state S is iterated by the formula S <-
+parameters A and C, an integer state S is iterated by the formula S <-
  A*S+C mod M.  At each step the new state is a linear function of the
  previous, mod M, hence the name of the generator.
  
@@ -667,7 +639,7 @@ the like.
  \1f
  File: gmp.info,  Node: Assembly Coding,  Prev: Other Algorithms,  Up: Algorithms
  
-16.8 Assembly Coding
+15.8 Assembly Coding
  ====================
  
  The assembly subroutines in GMP are the most significant source of
@@ -696,7 +668,7 @@ offers a speedup over generic C by a factor of anything from 2 to 10.
  \1f
  File: gmp.info,  Node: Assembly Code Organisation,  Next: Assembly Basics,  Prev: Assembly Coding,  Up: Assembly Coding
  
-16.8.1 Code Organisation
+15.8.1 Code Organisation
  ------------------------
  
  The various `mpn' subdirectories contain machine-dependent code, written
@@ -718,7 +690,7 @@ given CPU.
  \1f
  File: gmp.info,  Node: Assembly Basics,  Next: Assembly Carry Propagation,  Prev: Assembly Code Organisation,  Up: Assembly Coding
  
-16.8.2 Assembly Basics
+15.8.2 Assembly Basics
  ----------------------
  
  `mpn_addmul_1' and `mpn_submul_1' are the most important routines for
@@ -742,7 +714,7 @@ vector processor, depending on the carry handling.
  \1f
  File: gmp.info,  Node: Assembly Carry Propagation,  Next: Assembly Cache Handling,  Prev: Assembly Basics,  Up: Assembly Coding
  
-16.8.3 Carry Propagation
+15.8.3 Carry Propagation
  ------------------------
  
  The problem that presents most challenges in GMP is propagating carries
@@ -779,7 +751,7 @@ results.
  \1f
  File: gmp.info,  Node: Assembly Cache Handling,  Next: Assembly Functional Units,  Prev: Assembly Carry Propagation,  Up: Assembly Coding
  
-16.8.4 Cache Handling
+15.8.4 Cache Handling
  ---------------------
  
  GMP aims to perform well both on operands that fit entirely in L1 cache
@@ -825,7 +797,7 @@ life easy.
  \1f
  File: gmp.info,  Node: Assembly Functional Units,  Next: Assembly Floating Point,  Prev: Assembly Cache Handling,  Up: Assembly Coding
  
-16.8.5 Functional Units
+15.8.5 Functional Units
  -----------------------
  
  When choosing an approach for an assembly loop, consideration is given
@@ -861,7 +833,7 @@ using bit twiddling.
  \1f
  File: gmp.info,  Node: Assembly Floating Point,  Next: Assembly SIMD Instructions,  Prev: Assembly Functional Units,  Up: Assembly Coding
  
-16.8.6 Floating Point
+15.8.6 Floating Point
  ---------------------
  
  Floating point arithmetic is used in GMP for multiplications on CPUs
@@ -958,7 +930,7 @@ limb, generating a low 64-bit result limb and a high 33-bit carry limb
  \1f
  File: gmp.info,  Node: Assembly SIMD Instructions,  Next: Assembly Software Pipelining,  Prev: Assembly Floating Point,  Up: Assembly Coding
  
-16.8.7 SIMD Instructions
+15.8.7 SIMD Instructions
  ------------------------
  
  The single-instruction multiple-data support in current microprocessors
@@ -979,7 +951,7 @@ the P55 `mpn_mul_1'.  SSE2 is used for Pentium 4 `mpn_mul_1',
  \1f
  File: gmp.info,  Node: Assembly Software Pipelining,  Next: Assembly Loop Unrolling,  Prev: Assembly SIMD Instructions,  Up: Assembly Coding
  
-16.8.8 Software Pipelining
+15.8.8 Software Pipelining
  --------------------------
  
  Software pipelining consists of scheduling instructions around the
@@ -1004,7 +976,7 @@ to use while another (or multiple others) are still in progress.
  \1f
  File: gmp.info,  Node: Assembly Loop Unrolling,  Next: Assembly Writing Guide,  Prev: Assembly Software Pipelining,  Up: Assembly Coding
  
-16.8.9 Loop Unrolling
+15.8.9 Loop Unrolling
  ---------------------
  
  Loop unrolling consists of replicating code so that several limbs are
@@ -1051,7 +1023,7 @@ ways, for example
  \1f
  File: gmp.info,  Node: Assembly Writing Guide,  Prev: Assembly Loop Unrolling,  Up: Assembly Coding
  
-16.8.10 Writing Guide
+15.8.10 Writing Guide
  ---------------------
  
  This is a guide to writing software pipelined loops for processing limb
@@ -1110,7 +1082,7 @@ sizes.
  \1f
  File: gmp.info,  Node: Internals,  Next: Contributors,  Prev: Algorithms,  Up: Top
  
-17 Internals
+16 Internals
  ************
  
  *This chapter is provided only for informational purposes and the
@@ -1129,7 +1101,7 @@ only the documented interfaces described in previous chapters.*
  \1f
  File: gmp.info,  Node: Integer Internals,  Next: Rational Internals,  Prev: Internals,  Up: Internals
  
-17.1 Integer Internals
+16.1 Integer Internals
  ======================
  
  `mpz_t' variables represent integers using sign and magnitude, in space
@@ -1179,7 +1151,7 @@ providing plenty of range.
  \1f
  File: gmp.info,  Node: Rational Internals,  Next: Float Internals,  Prev: Integer Internals,  Up: Internals
  
-17.2 Rational Internals
+16.2 Rational Internals
  =======================
  
  `mpq_t' variables represent rationals using an `mpz_t' numerator and
@@ -1207,7 +1179,7 @@ directly.
  \1f
  File: gmp.info,  Node: Float Internals,  Next: Raw Output Internals,  Prev: Rational Internals,  Up: Internals
  
-17.3 Float Internals
+16.3 Float Internals
  ====================
  
  Efficient calculation is the primary aim of GMP floats and the use of
@@ -1371,7 +1343,7 @@ Application Precisions
  \1f
  File: gmp.info,  Node: Raw Output Internals,  Next: C++ Interface Internals,  Prev: Float Internals,  Up: Internals
  
-17.4 Raw Output Internals
+16.4 Raw Output Internals
  =========================
  
  `mpz_out_raw' uses the following format.
@@ -1401,7 +1373,7 @@ can just read and write `_mp_d'.
  \1f
  File: gmp.info,  Node: C++ Interface Internals,  Prev: Raw Output Internals,  Up: Internals
  
-17.5 C++ Interface Internals
+16.5 C++ Interface Internals
  ============================
  
  A system of expression templates is used to ensure something like
@@ -1571,12 +1543,13 @@ elsewhere.
     Pedro Gimeno implemented the Mersenne Twister and made other random
  number improvements.
  
-   Niels Möller wrote the sub-quadratic GCD and extended GCD code, the
-quadratic Hensel division code, and (with Torbjörn) the new divide and
-conquer division code for GMP 4.3.  Niels also helped implement the new
-Toom multiply code for GMP 4.3 and implemented helper functions to
-simplify Toom evaluations for GMP 5.0.  He wrote the original version
-of mpn_mulmod_bnm1.
+   Niels Möller wrote the sub-quadratic GCD, extended GCD and jacobi
+code, the quadratic Hensel division code, and (with Torbjörn) the new
+divide and conquer division code for GMP 4.3.  Niels also helped
+implement the new Toom multiply code for GMP 4.3 and implemented helper
+functions to simplify Toom evaluations for GMP 5.0.  He wrote the
+original version of mpn_mulmod_bnm1, and he is the main author of the
+mini-gmp package used for gmp bootstrapping.
  
     Alberto Zanoni and Marco Bodrato suggested the unbalanced multiply
  strategy, and found the optimal strategies for evaluation and
@@ -1586,15 +1559,26 @@ interpolation in Toom multiplication.
  4.3 and implemented most of the new Toom multiply and squaring code for
  5.0.  He is the main author of the current mpn_mulmod_bnm1 and
  mpn_mullo_n.  Marco also wrote the functions mpn_invert and
-mpn_invertappr.
+mpn_invertappr.  He is the author of the current combinatorial
+functions: binomial, factorial, multifactorial, primorial.
  
     David Harvey suggested the internal function `mpn_bdiv_dbm1',
  implementing division relevant to Toom multiplication.  He also worked
  on fast assembly sequences, in particular on a fast AMD64
-`mpn_mul_basecase'.
+`mpn_mul_basecase'. He wrote the internal middle product functions
+`mpn_mulmid_basecase', `mpn_toom42_mulmid', `mpn_mulmid_n' and related
+helper routines.
  
     Martin Boij wrote `mpn_perfect_power_p'.
  
+   Marc Glisse improved `gmpxx.h': use fewer temporaries (faster),
+specializations of `numeric_limits' and `common_type', C++11 features
+(move constructors, explicit bool conversion, UDL), make the conversion
+from `mpq_class' to `mpz_class' explicit, optimize operations where one
+argument is a small compile-time constant, replace some heap
+allocations by stack allocations.  He also fixed the eofbit handling of
+C++ streams, and removed one division from `mpq/aors.c'.
+
     (This list is chronological, not ordered after significance.  If you
  have contributed to GMP but are not listed above, please tell
  <gmp-devel@gmplib.org> about the omission!)
@@ -2238,16 +2222,15 @@ Concept Index
  * #include:                              Headers and Libraries.
                                                                (line   6)
  * --build:                               Build Options.       (line  52)
-* --disable-fft:                         Build Options.       (line 317)
+* --disable-fft:                         Build Options.       (line 314)
  * --disable-shared:                      Build Options.       (line  45)
  * --disable-static:                      Build Options.       (line  45)
-* --enable-alloca:                       Build Options.       (line 278)
-* --enable-assert:                       Build Options.       (line 328)
-* --enable-cxx:                          Build Options.       (line 230)
-* --enable-fat:                          Build Options.       (line 164)
-* --enable-mpbsd:                        Build Options.       (line 323)
-* --enable-profiling <1>:                Profiling.           (line   6)
-* --enable-profiling:                    Build Options.       (line 332)
+* --enable-alloca:                       Build Options.       (line 275)
+* --enable-assert:                       Build Options.       (line 320)
+* --enable-cxx:                          Build Options.       (line 227)
+* --enable-fat:                          Build Options.       (line 162)
+* --enable-profiling <1>:                Build Options.       (line 324)
+* --enable-profiling:                    Profiling.           (line   6)
  * --exec-prefix:                         Build Options.       (line  32)
  * --host:                                Build Options.       (line  66)
  * --prefix:                              Build Options.       (line  32)
@@ -2256,23 +2239,23 @@ Concept Index
  * 68000:                                 Notes for Particular Systems.
                                                                (line  80)
  * 80x86:                                 Notes for Particular Systems.
-                                                              (line 126)
-* ABI <1>:                               Build Options.       (line 171)
+                                                              (line 127)
+* ABI <1>:                               Build Options.       (line 169)
  * ABI:                                   ABI and ISA.         (line   6)
  * About this manual:                     Introduction to GMP. (line  58)
  * AC_CHECK_LIB:                          Autoconf.            (line  11)
-* AIX <1>:                               ABI and ISA.         (line 169)
-* AIX:                                   Notes for Particular Systems.
+* AIX <1>:                               Notes for Particular Systems.
                                                                (line   7)
+* AIX:                                   ABI and ISA.         (line 169)
  * Algorithms:                            Algorithms.          (line   6)
-* alloca:                                Build Options.       (line 278)
+* alloca:                                Build Options.       (line 275)
  * Allocation of memory:                  Custom Allocation.   (line   6)
  * AMD64:                                 ABI and ISA.         (line  44)
  * Anonymous FTP of latest version:       Introduction to GMP. (line  38)
  * Application Binary Interface:          ABI and ISA.         (line   6)
-* Arithmetic functions <1>:              Float Arithmetic.    (line   6)
-* Arithmetic functions <2>:              Integer Arithmetic.  (line   6)
-* Arithmetic functions:                  Rational Arithmetic. (line   6)
+* Arithmetic functions <1>:              Rational Arithmetic. (line   6)
+* Arithmetic functions <2>:              Float Arithmetic.    (line   6)
+* Arithmetic functions:                  Integer Arithmetic.  (line   6)
  * ARM:                                   Notes for Particular Systems.
                                                                (line  20)
  * Assembly cache handling:               Assembly Cache Handling.
@@ -2292,38 +2275,32 @@ Concept Index
                                                                (line   6)
  * Assembly writing guide:                Assembly Writing Guide.
                                                                (line   6)
-* Assertion checking <1>:                Debugging.           (line  79)
-* Assertion checking:                    Build Options.       (line 328)
+* Assertion checking <1>:                Build Options.       (line 320)
+* Assertion checking:                    Debugging.           (line  79)
  * Assignment functions <1>:              Assigning Integers.  (line   6)
-* Assignment functions <2>:              Simultaneous Float Init & Assign.
+* Assignment functions <2>:              Initializing Rationals.
                                                                (line   6)
  * Assignment functions <3>:              Assigning Floats.    (line   6)
-* Assignment functions <4>:              Initializing Rationals.
+* Assignment functions <4>:              Simultaneous Float Init & Assign.
                                                                (line   6)
  * Assignment functions:                  Simultaneous Integer Init & Assign.
                                                                (line   6)
  * Autoconf:                              Autoconf.            (line   6)
  * Basics:                                GMP Basics.          (line   6)
-* Berkeley MP compatible functions <1>:  Build Options.       (line 323)
-* Berkeley MP compatible functions:      BSD Compatible Functions.
-                                                              (line   6)
  * Binomial coefficient algorithm:        Binomial Coefficients Algorithm.
                                                                (line   6)
  * Binomial coefficient functions:        Number Theoretic Functions.
-                                                              (line 113)
+                                                              (line 128)
  * Binutils strip:                        Known Build Problems.
                                                                (line  28)
  * Bit manipulation functions:            Integer Logic and Bit Fiddling.
                                                                (line   6)
  * Bit scanning functions:                Integer Logic and Bit Fiddling.
-                                                              (line  38)
-* Bit shift left:                        Integer Arithmetic.  (line  35)
-* Bit shift right:                       Integer Division.    (line  53)
+                                                              (line  40)
+* Bit shift left:                        Integer Arithmetic.  (line  38)
+* Bit shift right:                       Integer Division.    (line  62)
  * Bits per limb:                         Useful Macros and Constants.
                                                                (line   7)
-* BSD MP compatible functions <1>:       BSD Compatible Functions.
-                                                              (line   6)
-* BSD MP compatible functions:           Build Options.       (line 323)
  * Bug reporting:                         Reporting Bugs.      (line   6)
  * Build directory:                       Build Options.       (line  19)
  * Build notes for binary packaging:      Notes for Package Builds.
@@ -2336,33 +2313,33 @@ Concept Index
  * Build system:                          Build Options.       (line  52)
  * Building GMP:                          Installing GMP.      (line   6)
  * Bus error:                             Debugging.           (line   7)
-* C compiler:                            Build Options.       (line 182)
-* C++ compiler:                          Build Options.       (line 254)
+* C compiler:                            Build Options.       (line 180)
+* C++ compiler:                          Build Options.       (line 251)
  * C++ interface:                         C++ Class Interface. (line   6)
  * C++ interface internals:               C++ Interface Internals.
                                                                (line   6)
  * C++ istream input:                     C++ Formatted Input. (line   6)
  * C++ ostream output:                    C++ Formatted Output.
                                                                (line   6)
-* C++ support:                           Build Options.       (line 230)
-* CC:                                    Build Options.       (line 182)
-* CC_FOR_BUILD:                          Build Options.       (line 217)
-* CFLAGS:                                Build Options.       (line 182)
+* C++ support:                           Build Options.       (line 227)
+* CC:                                    Build Options.       (line 180)
+* CC_FOR_BUILD:                          Build Options.       (line 214)
+* CFLAGS:                                Build Options.       (line 180)
  * Checker:                               Debugging.           (line 115)
  * checkergcc:                            Debugging.           (line 122)
  * Code organisation:                     Assembly Code Organisation.
                                                                (line   6)
  * Compaq C++:                            Notes for Particular Systems.
                                                                (line  25)
-* Comparison functions <1>:              Float Comparison.    (line   6)
-* Comparison functions <2>:              Integer Comparisons. (line   6)
-* Comparison functions:                  Comparing Rationals. (line   6)
+* Comparison functions <1>:              Comparing Rationals. (line   6)
+* Comparison functions <2>:              Float Comparison.    (line   6)
+* Comparison functions:                  Integer Comparisons. (line   6)
  * Compatibility with older versions:     Compatibility with older versions.
                                                                (line   6)
  * Conditions for copying GNU MP:         Copying.             (line   6)
  * Configuring GMP:                       Installing GMP.      (line   6)
-* Congruence algorithm:                  Exact Remainder.     (line  29)
-* Congruence functions:                  Integer Division.    (line 124)
+* Congruence algorithm:                  Exact Remainder.     (line  30)
+* Congruence functions:                  Integer Division.    (line 137)
  * Constants:                             Useful Macros and Constants.
                                                                (line   6)
  * Contributors:                          Contributors.        (line   6)
@@ -2370,18 +2347,18 @@ Concept Index
                                                                (line   6)
  * Conventions for variables:             Variable Conventions.
                                                                (line   6)
-* Conversion functions <1>:              Rational Conversions.
+* Conversion functions <1>:              Converting Integers. (line   6)
+* Conversion functions <2>:              Converting Floats.   (line   6)
+* Conversion functions:                  Rational Conversions.
                                                                (line   6)
-* Conversion functions <2>:              Converting Integers. (line   6)
-* Conversion functions:                  Converting Floats.   (line   6)
  * Copying conditions:                    Copying.             (line   6)
-* CPPFLAGS:                              Build Options.       (line 208)
+* CPPFLAGS:                              Build Options.       (line 206)
  * CPU types <1>:                         Introduction to GMP. (line  24)
  * CPU types:                             Build Options.       (line 108)
  * Cross compiling:                       Build Options.       (line  66)
  * Custom allocation:                     Custom Allocation.   (line   6)
-* CXX:                                   Build Options.       (line 254)
-* CXXFLAGS:                              Build Options.       (line 254)
+* CXX:                                   Build Options.       (line 251)
+* CXXFLAGS:                              Build Options.       (line 251)
  * Cygwin:                                Notes for Particular Systems.
                                                                (line  43)
  * Darwin:                                Known Build Problems.
@@ -2391,12 +2368,12 @@ Concept Index
                                                                (line   6)
  * Digits in an integer:                  Miscellaneous Integer Functions.
                                                                (line  23)
-* Divisibility algorithm:                Exact Remainder.     (line  29)
-* Divisibility functions:                Integer Division.    (line 112)
+* Divisibility algorithm:                Exact Remainder.     (line  30)
+* Divisibility functions:                Integer Division.    (line 137)
  * Divisibility testing:                  Efficiency.          (line  91)
  * Division algorithms:                   Division Algorithms. (line   6)
-* Division functions <1>:                Float Arithmetic.    (line  33)
-* Division functions <2>:                Rational Arithmetic. (line  22)
+* Division functions <1>:                Rational Arithmetic. (line  24)
+* Division functions <2>:                Float Arithmetic.    (line  33)
  * Division functions:                    Integer Division.    (line   6)
  * DJGPP <1>:                             Notes for Particular Systems.
                                                                (line  43)
@@ -2404,44 +2381,44 @@ Concept Index
                                                                (line  18)
  * DLLs:                                  Notes for Particular Systems.
                                                                (line  56)
-* DocBook:                               Build Options.       (line 355)
-* Documentation formats:                 Build Options.       (line 348)
+* DocBook:                               Build Options.       (line 347)
+* Documentation formats:                 Build Options.       (line 340)
  * Documentation license:                 GNU Free Documentation License.
                                                                (line   6)
-* DVI:                                   Build Options.       (line 351)
+* DVI:                                   Build Options.       (line 343)
  * Efficiency:                            Efficiency.          (line   6)
  * Emacs:                                 Emacs.               (line   6)
-* Exact division functions:              Integer Division.    (line 102)
+* Exact division functions:              Integer Division.    (line 112)
  * Exact remainder:                       Exact Remainder.     (line   6)
  * Example programs:                      Demonstration Programs.
                                                                (line   6)
  * Exec prefix:                           Build Options.       (line  32)
  * Execution profiling <1>:               Profiling.           (line   6)
-* Execution profiling:                   Build Options.       (line 332)
-* Exponentiation functions <1>:          Integer Exponentiation.
+* Execution profiling:                   Build Options.       (line 324)
+* Exponentiation functions <1>:          Float Arithmetic.    (line  41)
+* Exponentiation functions:              Integer Exponentiation.
                                                                (line   6)
-* Exponentiation functions:              Float Arithmetic.    (line  41)
  * Export:                                Integer Import and Export.
                                                                (line  45)
  * Expression parsing demo:               Demonstration Programs.
-                                                              (line  15)
+                                                              (line  18)
  * Extended GCD:                          Number Theoretic Functions.
-                                                              (line  47)
+                                                              (line  49)
  * Factor removal functions:              Number Theoretic Functions.
-                                                              (line 103)
+                                                              (line 108)
  * Factorial algorithm:                   Factorial Algorithm. (line   6)
  * Factorial functions:                   Number Theoretic Functions.
-                                                              (line 108)
+                                                              (line 116)
  * Factorization demo:                    Demonstration Programs.
                                                                (line  25)
  * Fast Fourier Transform:                FFT Multiplication.  (line   6)
-* Fat binary:                            Build Options.       (line 164)
-* FFT multiplication <1>:                Build Options.       (line 317)
+* Fat binary:                            Build Options.       (line 162)
+* FFT multiplication <1>:                Build Options.       (line 314)
  * FFT multiplication:                    FFT Multiplication.  (line   6)
  * Fibonacci number algorithm:            Fibonacci Numbers Algorithm.
                                                                (line   6)
  * Fibonacci sequence functions:          Number Theoretic Functions.
-                                                              (line 121)
+                                                              (line 136)
  * Float arithmetic functions:            Float Arithmetic.    (line   6)
  * Float assignment functions <1>:        Simultaneous Float Init & Assign.
                                                                (line   6)
@@ -2461,7 +2438,7 @@ Concept Index
                                                                (line  27)
  * Float rounding functions:              Miscellaneous Float Functions.
                                                                (line   9)
-* Float sign tests:                      Float Comparison.    (line  33)
+* Float sign tests:                      Float Comparison.    (line  35)
  * Floating point mode:                   Notes for Particular Systems.
                                                                (line  34)
  * Floating-point functions:              Floating-point Functions.
@@ -2473,8 +2450,8 @@ Concept Index
  * Formatted output:                      Formatted Output.    (line   6)
  * Free Documentation License:            GNU Free Documentation License.
                                                                (line   6)
-* frexp <1>:                             Converting Floats.   (line  23)
-* frexp:                                 Converting Integers. (line  42)
+* frexp <1>:                             Converting Integers. (line  43)
+* frexp:                                 Converting Floats.   (line  24)
  * FTP of latest version:                 Introduction to GMP. (line  38)
  * Function classes:                      Function Classes.    (line   6)
  * FunctionCheck:                         Profiling.           (line  77)
@@ -2482,9 +2459,9 @@ Concept Index
  * GCD algorithms:                        Greatest Common Divisor Algorithms.
                                                                (line   6)
  * GCD extended:                          Number Theoretic Functions.
-                                                              (line  47)
+                                                              (line  49)
  * GCD functions:                         Number Theoretic Functions.
-                                                              (line  30)
+                                                              (line  32)
  * GDB:                                   Debugging.           (line  58)
  * Generic C:                             Build Options.       (line 153)
  * GMP Perl module:                       Demonstration Programs.
@@ -2504,7 +2481,7 @@ Concept Index
  * Greatest common divisor algorithms:    Greatest Common Divisor Algorithms.
                                                                (line   6)
  * Greatest common divisor functions:     Number Theoretic Functions.
-                                                              (line  30)
+                                                              (line  32)
  * Hardware floating point mode:          Notes for Particular Systems.
                                                                (line  34)
  * Headers:                               Headers and Libraries.
@@ -2515,10 +2492,10 @@ Concept Index
  * HP-UX:                                 ABI and ISA.         (line 107)
  * HPPA:                                  ABI and ISA.         (line  68)
  * I/O functions <1>:                     I/O of Floats.       (line   6)
-* I/O functions <2>:                     I/O of Integers.     (line   6)
-* I/O functions:                         I/O of Rationals.    (line   6)
+* I/O functions <2>:                     I/O of Rationals.    (line   6)
+* I/O functions:                         I/O of Integers.     (line   6)
  * i386:                                  Notes for Particular Systems.
-                                                              (line 126)
+                                                              (line 127)
  * IA-64:                                 ABI and ISA.         (line 107)
  * Import:                                Integer Import and Export.
                                                                (line  11)
@@ -2526,23 +2503,23 @@ Concept Index
  * Include files:                         Headers and Libraries.
                                                                (line   6)
  * info-lookup-symbol:                    Emacs.               (line   6)
-* Initialization functions <1>:          Initializing Integers.
+* Initialization functions <1>:          Simultaneous Float Init & Assign.
                                                                (line   6)
  * Initialization functions <2>:          Random State Initialization.
                                                                (line   6)
-* Initialization functions <3>:          Initializing Rationals.
+* Initialization functions <3>:          Initializing Floats. (line   6)
+* Initialization functions <4>:          Simultaneous Integer Init & Assign.
                                                                (line   6)
-* Initialization functions <4>:          Initializing Floats. (line   6)
-* Initialization functions <5>:          Simultaneous Float Init & Assign.
+* Initialization functions <5>:          Initializing Rationals.
                                                                (line   6)
-* Initialization functions:              Simultaneous Integer Init & Assign.
+* Initialization functions:              Initializing Integers.
                                                                (line   6)
  * Initializing and clearing:             Efficiency.          (line  21)
-* Input functions <1>:                   I/O of Floats.       (line   6)
+* Input functions <1>:                   I/O of Integers.     (line   6)
  * Input functions <2>:                   I/O of Rationals.    (line   6)
-* Input functions <3>:                   I/O of Integers.     (line   6)
-* Input functions:                       Formatted Input Functions.
+* Input functions <3>:                   Formatted Input Functions.
                                                                (line   6)
+* Input functions:                       I/O of Floats.       (line   6)
  * Install prefix:                        Build Options.       (line  32)
  * Installing GMP:                        Installing GMP.      (line   6)
  * Instruction Set Architecture:          ABI and ISA.         (line   6)
@@ -2565,9 +2542,9 @@ Concept Index
  * Integer functions:                     Integer Functions.   (line   6)
  * Integer import:                        Integer Import and Export.
                                                                (line  11)
-* Integer initialization functions <1>:  Initializing Integers.
+* Integer initialization functions <1>:  Simultaneous Integer Init & Assign.
                                                                (line   6)
-* Integer initialization functions:      Simultaneous Integer Init & Assign.
+* Integer initialization functions:      Initializing Integers.
                                                                (line   6)
  * Integer input and output functions:    I/O of Integers.     (line   6)
  * Integer internals:                     Integer Internals.   (line   6)
@@ -2586,7 +2563,7 @@ Concept Index
  * Internals:                             Internals.           (line   6)
  * Introduction:                          Introduction to GMP. (line   6)
  * Inverse modulo functions:              Number Theoretic Functions.
-                                                              (line  72)
+                                                              (line  76)
  * IRIX <1>:                              Known Build Problems.
                                                                (line  38)
  * IRIX:                                  ABI and ISA.         (line 132)
@@ -2594,21 +2571,21 @@ Concept Index
  * istream input:                         C++ Formatted Input. (line   6)
  * Jacobi symbol algorithm:               Jacobi Symbol.       (line   6)
  * Jacobi symbol functions:               Number Theoretic Functions.
-                                                              (line  79)
+                                                              (line  83)
  * Karatsuba multiplication:              Karatsuba Multiplication.
                                                                (line   6)
  * Karatsuba square root algorithm:       Square Root Algorithm.
                                                                (line   6)
  * Kronecker symbol functions:            Number Theoretic Functions.
-                                                              (line  91)
+                                                              (line  95)
  * Language bindings:                     Language Bindings.   (line   6)
  * Latest version of GMP:                 Introduction to GMP. (line  38)
  * LCM functions:                         Number Theoretic Functions.
-                                                              (line  67)
+                                                              (line  70)
  * Least common multiple functions:       Number Theoretic Functions.
-                                                              (line  67)
+                                                              (line  70)
  * Legendre symbol functions:             Number Theoretic Functions.
-                                                              (line  82)
+                                                              (line  86)
  * libgmp:                                Headers and Libraries.
                                                                (line  22)
  * libgmpxx:                              Headers and Libraries.
@@ -2627,7 +2604,7 @@ Concept Index
  * Linear congruential algorithm:         Random Number Algorithms.
                                                                (line  25)
  * Linear congruential random numbers:    Random State Initialization.
-                                                              (line  18)
+                                                              (line  32)
  * Linking:                               Headers and Libraries.
                                                                (line  22)
  * Logical functions:                     Integer Logic and Bit Fiddling.
@@ -2636,7 +2613,7 @@ Concept Index
  * Lucas number algorithm:                Lucas Numbers Algorithm.
                                                                (line   6)
  * Lucas number functions:                Number Theoretic Functions.
-                                                              (line 132)
+                                                              (line 147)
  * MacOS X:                               Known Build Problems.
                                                                (line  51)
  * Mailing lists:                         Introduction to GMP. (line  45)
@@ -2656,14 +2633,12 @@ Concept Index
  * Miscellaneous integer functions:       Miscellaneous Integer Functions.
                                                                (line   6)
  * MMX:                                   Notes for Particular Systems.
-                                                              (line 132)
+                                                              (line 133)
  * Modular inverse functions:             Number Theoretic Functions.
-                                                              (line  72)
+                                                              (line  76)
  * Most significant bit:                  Miscellaneous Integer Functions.
                                                                (line  34)
-* mp.h:                                  BSD Compatible Functions.
-                                                              (line  21)
-* MPN_PATH:                              Build Options.       (line 336)
+* MPN_PATH:                              Build Options.       (line 328)
  * MS Windows:                            Notes for Particular Systems.
                                                                (line  43)
  * MS-DOS:                                Notes for Particular Systems.
@@ -2676,7 +2651,7 @@ Concept Index
  * NeXT:                                  Known Build Problems.
                                                                (line  57)
  * Next prime function:                   Number Theoretic Functions.
-                                                              (line  23)
+                                                              (line  25)
  * Nomenclature:                          Nomenclature and Types.
                                                                (line   6)
  * Non-Unix systems:                      Build Options.       (line  11)
@@ -2695,41 +2670,41 @@ Concept Index
  * ostream output:                        C++ Formatted Output.
                                                                (line   6)
  * Other languages:                       Language Bindings.   (line   6)
-* Output functions <1>:                  I/O of Integers.     (line   6)
-* Output functions <2>:                  I/O of Rationals.    (line   6)
-* Output functions <3>:                  Formatted Output Functions.
+* Output functions <1>:                  Formatted Output Functions.
                                                                (line   6)
-* Output functions:                      I/O of Floats.       (line   6)
+* Output functions <2>:                  I/O of Rationals.    (line   6)
+* Output functions <3>:                  I/O of Floats.       (line   6)
+* Output functions:                      I/O of Integers.     (line   6)
  * Packaged builds:                       Notes for Package Builds.
                                                                (line   6)
  * Parameter conventions:                 Parameter Conventions.
                                                                (line   6)
  * Parsing expressions demo:              Demonstration Programs.
-                                                              (line  21)
+                                                              (line  15)
  * Particular systems:                    Notes for Particular Systems.
                                                                (line   6)
  * Past GMP versions:                     Compatibility with older versions.
                                                                (line   6)
-* PDF:                                   Build Options.       (line 351)
+* PDF:                                   Build Options.       (line 343)
  * Perfect power algorithm:               Perfect Power Algorithm.
                                                                (line   6)
-* Perfect power functions:               Integer Roots.       (line  27)
+* Perfect power functions:               Integer Roots.       (line  28)
  * Perfect square algorithm:              Perfect Square Algorithm.
                                                                (line   6)
-* Perfect square functions:              Integer Roots.       (line  36)
+* Perfect square functions:              Integer Roots.       (line  37)
  * perl:                                  Demonstration Programs.
                                                                (line  35)
  * Perl module:                           Demonstration Programs.
                                                                (line  35)
-* Postscript:                            Build Options.       (line 351)
-* Power/PowerPC <1>:                     Known Build Problems.
-                                                              (line  63)
-* Power/PowerPC:                         Notes for Particular Systems.
+* Postscript:                            Build Options.       (line 343)
+* Power/PowerPC <1>:                     Notes for Particular Systems.
                                                                (line  92)
+* Power/PowerPC:                         Known Build Problems.
+                                                              (line  63)
  * Powering algorithms:                   Powering Algorithms. (line   6)
-* Powering functions <1>:                Float Arithmetic.    (line  41)
-* Powering functions:                    Integer Exponentiation.
+* Powering functions <1>:                Integer Exponentiation.
                                                                (line   6)
+* Powering functions:                    Float Arithmetic.    (line  41)
  * PowerPC:                               ABI and ISA.         (line 167)
  * Precision of floats:                   Floating-point Functions.
                                                                (line   6)
@@ -2740,6 +2715,8 @@ Concept Index
                                                                (line   6)
  * Prime testing functions:               Number Theoretic Functions.
                                                                (line   7)
+* Primorial functions:                   Number Theoretic Functions.
+                                                              (line 121)
  * printf formatted output:               Formatted Output.    (line   6)
  * Probable prime testing functions:      Number Theoretic Functions.
                                                                (line   7)
@@ -2749,10 +2726,10 @@ Concept Index
                                                                (line   6)
  * Random number algorithms:              Random Number Algorithms.
                                                                (line   6)
-* Random number functions <1>:           Random Number Functions.
-                                                              (line   6)
-* Random number functions <2>:           Miscellaneous Float Functions.
+* Random number functions <1>:           Miscellaneous Float Functions.
                                                                (line  27)
+* Random number functions <2>:           Random Number Functions.
+                                                              (line   6)
  * Random number functions:               Integer Random Numbers.
                                                                (line   6)
  * Random number seeding:                 Random State Seeding.
@@ -2785,20 +2762,20 @@ Concept Index
  * Reentrancy:                            Reentrancy.          (line   6)
  * References:                            References.          (line   6)
  * Remove factor functions:               Number Theoretic Functions.
-                                                              (line 103)
+                                                              (line 108)
  * Reporting bugs:                        Reporting Bugs.      (line   6)
  * Root extraction algorithm:             Nth Root Algorithm.  (line   6)
  * Root extraction algorithms:            Root Extraction Algorithms.
                                                                (line   6)
-* Root extraction functions <1>:         Float Arithmetic.    (line  37)
-* Root extraction functions:             Integer Roots.       (line   6)
-* Root testing functions:                Integer Roots.       (line  27)
+* Root extraction functions <1>:         Integer Roots.       (line   6)
+* Root extraction functions:             Float Arithmetic.    (line  37)
+* Root testing functions:                Integer Roots.       (line  37)
  * Rounding functions:                    Miscellaneous Float Functions.
                                                                (line   9)
  * Sample programs:                       Demonstration Programs.
                                                                (line   6)
  * Scan bit functions:                    Integer Logic and Bit Fiddling.
-                                                              (line  38)
+                                                              (line  40)
  * scanf formatted input:                 Formatted Input.     (line   6)
  * SCO:                                   Known Build Problems.
                                                                (line  38)
@@ -2813,24 +2790,26 @@ Concept Index
                                                                (line   9)
  * Sign tests <1>:                        Integer Comparisons. (line  28)
  * Sign tests <2>:                        Comparing Rationals. (line  27)
-* Sign tests:                            Float Comparison.    (line  33)
+* Sign tests:                            Float Comparison.    (line  35)
  * Size in digits:                        Miscellaneous Integer Functions.
                                                                (line  23)
  * Small operands:                        Efficiency.          (line   7)
  * Solaris <1>:                           Known Build Problems.
+                                                              (line  72)
+* Solaris <2>:                           ABI and ISA.         (line 199)
+* Solaris:                               Known Build Problems.
                                                                (line  78)
-* Solaris:                               ABI and ISA.         (line 201)
  * Sparc:                                 Notes for Particular Systems.
-                                                              (line 103)
-* Sparc V9:                              ABI and ISA.         (line 201)
+                                                              (line 109)
+* Sparc V9:                              ABI and ISA.         (line 199)
  * Special integer functions:             Integer Special Functions.
                                                                (line   6)
  * Square root algorithm:                 Square Root Algorithm.
                                                                (line   6)
  * SSE2:                                  Notes for Particular Systems.
-                                                              (line 132)
+                                                              (line 133)
  * Stack backtrace:                       Debugging.           (line  50)
-* Stack overflow <1>:                    Build Options.       (line 278)
+* Stack overflow <1>:                    Build Options.       (line 275)
  * Stack overflow:                        Debugging.           (line   7)
  * Static linking:                        Efficiency.          (line  14)
  * stdarg.h:                              Headers and Libraries.
@@ -2839,22 +2818,22 @@ Concept Index
                                                                (line  11)
  * Stripped libraries:                    Known Build Problems.
                                                                (line  28)
-* Sun:                                   ABI and ISA.         (line 201)
+* Sun:                                   ABI and ISA.         (line 199)
  * SunOS:                                 Notes for Particular Systems.
-                                                              (line 120)
+                                                              (line 121)
  * Systems:                               Notes for Particular Systems.
                                                                (line   6)
-* Temporary memory:                      Build Options.       (line 278)
-* Texinfo:                               Build Options.       (line 348)
+* Temporary memory:                      Build Options.       (line 275)
+* Texinfo:                               Build Options.       (line 340)
  * Text input/output:                     Efficiency.          (line 153)
  * Thread safety:                         Reentrancy.          (line   6)
  * Toom multiplication <1>:               Other Multiplication.
                                                                (line   6)
  * Toom multiplication <2>:               Toom 3-Way Multiplication.
                                                                (line   6)
-* Toom multiplication <3>:               Toom 4-Way Multiplication.
+* Toom multiplication <3>:               Higher degree Toom'n'half.
                                                                (line   6)
-* Toom multiplication:                   Higher degree Toom'n'half.
+* Toom multiplication:                   Toom 4-Way Multiplication.
                                                                (line   6)
  * Types:                                 Nomenclature and Types.
                                                                (line   6)
@@ -2874,12 +2853,12 @@ Concept Index
                                                                (line  12)
  * Web page:                              Introduction to GMP. (line  34)
  * Windows:                               Notes for Particular Systems.
-                                                              (line  43)
+                                                              (line  56)
  * x86:                                   Notes for Particular Systems.
-                                                              (line 126)
+                                                              (line 127)
  * x87:                                   Notes for Particular Systems.
                                                                (line  34)
-* XML:                                   Build Options.       (line 355)
+* XML:                                   Build Options.       (line 347)
  
  \1f
  File: gmp.info,  Node: Function Index,  Prev: Concept Index,  Up: Top
@@ -2902,28 +2881,24 @@ Function and Type Index
                                                                (line  12)
  * _mpz_realloc:                          Integer Special Functions.
                                                                (line  51)
-* abs <1>:                               C++ Interface Floats.
-                                                              (line  79)
-* abs <2>:                               C++ Interface Rationals.
-                                                              (line  43)
+* abs <1>:                               C++ Interface Rationals.
+                                                              (line  49)
+* abs <2>:                               C++ Interface Floats.
+                                                              (line  83)
  * abs:                                   C++ Interface Integers.
-                                                              (line  42)
+                                                              (line  47)
  * ceil:                                  C++ Interface Floats.
-                                                              (line  80)
-* cmp <1>:                               C++ Interface Floats.
-                                                              (line  81)
+                                                              (line  84)
+* cmp <1>:                               C++ Interface Rationals.
+                                                              (line  51)
  * cmp <2>:                               C++ Interface Integers.
-                                                              (line  43)
-* cmp <3>:                               C++ Interface Floats.
-                                                              (line  82)
-* cmp <4>:                               C++ Interface Rationals.
-                                                              (line  45)
-* cmp:                                   C++ Interface Integers.
-                                                              (line  44)
+                                                              (line  49)
+* cmp <3>:                               C++ Interface Rationals.
+                                                              (line  50)
+* cmp:                                   C++ Interface Floats.
+                                                              (line  86)
  * floor:                                 C++ Interface Floats.
-                                                              (line  89)
-* gcd:                                   BSD Compatible Functions.
-                                                              (line  82)
+                                                              (line  93)
  * gmp_asprintf:                          Formatted Output Functions.
                                                                (line  65)
  * gmp_errno:                             Random State Initialization.
@@ -2955,15 +2930,15 @@ Function and Type Index
  * gmp_randclass:                         C++ Interface Random Numbers.
                                                                (line   7)
  * gmp_randclass::get_f:                  C++ Interface Random Numbers.
-                                                              (line  45)
+                                                              (line  46)
  * gmp_randclass::get_z_bits:             C++ Interface Random Numbers.
                                                                (line  38)
  * gmp_randclass::get_z_range:            C++ Interface Random Numbers.
                                                                (line  42)
  * gmp_randclass::gmp_randclass:          C++ Interface Random Numbers.
-                                                              (line  27)
+                                                              (line  13)
  * gmp_randclass::seed:                   C++ Interface Random Numbers.
-                                                              (line  34)
+                                                              (line  33)
  * gmp_randclear:                         Random State Initialization.
                                                                (line  62)
  * gmp_randinit:                          Random State Initialization.
@@ -2979,9 +2954,9 @@ Function and Type Index
  * gmp_randinit_set:                      Random State Initialization.
                                                                (line  43)
  * gmp_randseed:                          Random State Seeding.
-                                                              (line   7)
+                                                              (line   8)
  * gmp_randseed_ui:                       Random State Seeding.
-                                                              (line   9)
+                                                              (line  10)
  * gmp_randstate_t:                       Nomenclature and Types.
                                                                (line  46)
  * gmp_scanf:                             Formatted Input Functions.
@@ -3015,35 +2990,17 @@ Function and Type Index
  * gmp_vsscanf:                           Formatted Input Functions.
                                                                (line  31)
  * hypot:                                 C++ Interface Floats.
-                                                              (line  90)
-* itom:                                  BSD Compatible Functions.
-                                                              (line  29)
-* madd:                                  BSD Compatible Functions.
-                                                              (line  43)
-* mcmp:                                  BSD Compatible Functions.
-                                                              (line  85)
-* mdiv:                                  BSD Compatible Functions.
-                                                              (line  53)
-* mfree:                                 BSD Compatible Functions.
-                                                              (line 105)
-* min:                                   BSD Compatible Functions.
-                                                              (line  89)
-* MINT:                                  BSD Compatible Functions.
-                                                              (line  21)
-* mout:                                  BSD Compatible Functions.
                                                                (line  94)
-* move:                                  BSD Compatible Functions.
-                                                              (line  39)
  * mp_bitcnt_t:                           Nomenclature and Types.
                                                                (line  42)
  * mp_bits_per_limb:                      Useful Macros and Constants.
                                                                (line   7)
  * mp_exp_t:                              Nomenclature and Types.
                                                                (line  27)
-* mp_get_memory_functions:               Custom Allocation.   (line  93)
+* mp_get_memory_functions:               Custom Allocation.   (line  90)
  * mp_limb_t:                             Nomenclature and Types.
                                                                (line  31)
-* mp_set_memory_functions:               Custom Allocation.   (line  21)
+* mp_set_memory_functions:               Custom Allocation.   (line  18)
  * mp_size_t:                             Nomenclature and Types.
                                                                (line  37)
  * mpf_abs:                               Float Arithmetic.    (line  47)
@@ -3054,39 +3011,41 @@ Function and Type Index
  * mpf_class:                             C++ Interface General.
                                                                (line  20)
  * mpf_class::fits_sint_p:                C++ Interface Floats.
-                                                              (line  83)
+                                                              (line  87)
  * mpf_class::fits_slong_p:               C++ Interface Floats.
-                                                              (line  84)
+                                                              (line  88)
  * mpf_class::fits_sshort_p:              C++ Interface Floats.
-                                                              (line  85)
+                                                              (line  89)
  * mpf_class::fits_uint_p:                C++ Interface Floats.
-                                                              (line  86)
+                                                              (line  90)
  * mpf_class::fits_ulong_p:               C++ Interface Floats.
-                                                              (line  87)
+                                                              (line  91)
  * mpf_class::fits_ushort_p:              C++ Interface Floats.
-                                                              (line  88)
+                                                              (line  92)
  * mpf_class::get_d:                      C++ Interface Floats.
-                                                              (line  91)
+                                                              (line  95)
  * mpf_class::get_mpf_t:                  C++ Interface General.
                                                                (line  66)
  * mpf_class::get_prec:                   C++ Interface Floats.
-                                                              (line 109)
+                                                              (line 115)
  * mpf_class::get_si:                     C++ Interface Floats.
-                                                              (line  92)
+                                                              (line  96)
  * mpf_class::get_str:                    C++ Interface Floats.
-                                                              (line  94)
+                                                              (line  98)
  * mpf_class::get_ui:                     C++ Interface Floats.
-                                                              (line  95)
+                                                              (line  99)
  * mpf_class::mpf_class:                  C++ Interface Floats.
                                                                (line  12)
  * mpf_class::operator=:                  C++ Interface Floats.
-                                                              (line  56)
+                                                              (line  60)
  * mpf_class::set_prec:                   C++ Interface Floats.
-                                                              (line 110)
+                                                              (line 116)
  * mpf_class::set_prec_raw:               C++ Interface Floats.
-                                                              (line 111)
+                                                              (line 117)
  * mpf_class::set_str:                    C++ Interface Floats.
-                                                              (line  97)
+                                                              (line 101)
+* mpf_class::swap:                       C++ Interface Floats.
+                                                              (line 104)
  * mpf_clear:                             Initializing Floats. (line  37)
  * mpf_clears:                            Initializing Floats. (line  41)
  * mpf_cmp:                               Float Comparison.    (line   7)
@@ -3094,9 +3053,9 @@ Function and Type Index
  * mpf_cmp_si:                            Float Comparison.    (line  10)
  * mpf_cmp_ui:                            Float Comparison.    (line   9)
  * mpf_div:                               Float Arithmetic.    (line  29)
-* mpf_div_2exp:                          Float Arithmetic.    (line  53)
+* mpf_div_2exp:                          Float Arithmetic.    (line  55)
  * mpf_div_ui:                            Float Arithmetic.    (line  33)
-* mpf_eq:                                Float Comparison.    (line  17)
+* mpf_eq:                                Float Comparison.    (line  18)
  * mpf_fits_sint_p:                       Miscellaneous Float Functions.
                                                                (line  20)
  * mpf_fits_slong_p:                      Miscellaneous Float Functions.
@@ -3112,12 +3071,12 @@ Function and Type Index
  * mpf_floor:                             Miscellaneous Float Functions.
                                                                (line   8)
  * mpf_get_d:                             Converting Floats.   (line   7)
-* mpf_get_d_2exp:                        Converting Floats.   (line  16)
+* mpf_get_d_2exp:                        Converting Floats.   (line  17)
  * mpf_get_default_prec:                  Initializing Floats. (line  12)
  * mpf_get_prec:                          Initializing Floats. (line  62)
-* mpf_get_si:                            Converting Floats.   (line  27)
-* mpf_get_str:                           Converting Floats.   (line  37)
-* mpf_get_ui:                            Converting Floats.   (line  28)
+* mpf_get_si:                            Converting Floats.   (line  28)
+* mpf_get_str:                           Converting Floats.   (line  38)
+* mpf_get_ui:                            Converting Floats.   (line  29)
  * mpf_init:                              Initializing Floats. (line  19)
  * mpf_init2:                             Initializing Floats. (line  26)
  * mpf_init_set:                          Simultaneous Float Init & Assign.
@@ -3127,7 +3086,7 @@ Function and Type Index
  * mpf_init_set_si:                       Simultaneous Float Init & Assign.
                                                                (line  18)
  * mpf_init_set_str:                      Simultaneous Float Init & Assign.
-                                                              (line  25)
+                                                              (line  26)
  * mpf_init_set_ui:                       Simultaneous Float Init & Assign.
                                                                (line  17)
  * mpf_inits:                             Initializing Floats. (line  31)
@@ -3135,14 +3094,14 @@ Function and Type Index
  * mpf_integer_p:                         Miscellaneous Float Functions.
                                                                (line  14)
  * mpf_mul:                               Float Arithmetic.    (line  19)
-* mpf_mul_2exp:                          Float Arithmetic.    (line  50)
+* mpf_mul_2exp:                          Float Arithmetic.    (line  51)
  * mpf_mul_ui:                            Float Arithmetic.    (line  21)
  * mpf_neg:                               Float Arithmetic.    (line  44)
  * mpf_out_str:                           I/O of Floats.       (line  19)
  * mpf_pow_ui:                            Float Arithmetic.    (line  41)
  * mpf_random2:                           Miscellaneous Float Functions.
                                                                (line  37)
-* mpf_reldiff:                           Float Comparison.    (line  29)
+* mpf_reldiff:                           Float Comparison.    (line  31)
  * mpf_set:                               Assigning Floats.    (line  10)
  * mpf_set_d:                             Assigning Floats.    (line  13)
  * mpf_set_default_prec:                  Initializing Floats. (line   7)
@@ -3153,7 +3112,7 @@ Function and Type Index
  * mpf_set_str:                           Assigning Floats.    (line  18)
  * mpf_set_ui:                            Assigning Floats.    (line  11)
  * mpf_set_z:                             Assigning Floats.    (line  14)
-* mpf_sgn:                               Float Comparison.    (line  33)
+* mpf_sgn:                               Float Comparison.    (line  35)
  * mpf_sqrt:                              Float Arithmetic.    (line  36)
  * mpf_sqrt_ui:                           Float Arithmetic.    (line  37)
  * mpf_sub:                               Float Arithmetic.    (line  12)
@@ -3216,32 +3175,34 @@ Function and Type Index
  * mpn_xnor_n:                            Low-level Functions. (line 462)
  * mpn_xor_n:                             Low-level Functions. (line 437)
  * mpn_zero:                              Low-level Functions. (line 479)
-* mpq_abs:                               Rational Arithmetic. (line  31)
-* mpq_add:                               Rational Arithmetic. (line   7)
+* mpq_abs:                               Rational Arithmetic. (line  34)
+* mpq_add:                               Rational Arithmetic. (line   8)
  * mpq_canonicalize:                      Rational Number Functions.
                                                                (line  22)
  * mpq_class:                             C++ Interface General.
                                                                (line  19)
  * mpq_class::canonicalize:               C++ Interface Rationals.
-                                                              (line  37)
+                                                              (line  43)
  * mpq_class::get_d:                      C++ Interface Rationals.
-                                                              (line  46)
+                                                              (line  52)
  * mpq_class::get_den:                    C++ Interface Rationals.
-                                                              (line  58)
+                                                              (line  66)
  * mpq_class::get_den_mpz_t:              C++ Interface Rationals.
-                                                              (line  68)
+                                                              (line  76)
  * mpq_class::get_mpq_t:                  C++ Interface General.
                                                                (line  65)
  * mpq_class::get_num:                    C++ Interface Rationals.
-                                                              (line  57)
+                                                              (line  65)
  * mpq_class::get_num_mpz_t:              C++ Interface Rationals.
-                                                              (line  67)
+                                                              (line  75)
  * mpq_class::get_str:                    C++ Interface Rationals.
-                                                              (line  47)
+                                                              (line  53)
  * mpq_class::mpq_class:                  C++ Interface Rationals.
-                                                              (line  30)
+                                                              (line  23)
  * mpq_class::set_str:                    C++ Interface Rationals.
-                                                              (line  48)
+                                                              (line  54)
+* mpq_class::swap:                       C++ Interface Rationals.
+                                                              (line  57)
  * mpq_clear:                             Initializing Rationals.
                                                                (line  16)
  * mpq_clears:                            Initializing Rationals.
@@ -3251,8 +3212,8 @@ Function and Type Index
  * mpq_cmp_ui:                            Comparing Rationals. (line  15)
  * mpq_denref:                            Applying Integer Functions.
                                                                (line  18)
-* mpq_div:                               Rational Arithmetic. (line  22)
-* mpq_div_2exp:                          Rational Arithmetic. (line  25)
+* mpq_div:                               Rational Arithmetic. (line  24)
+* mpq_div_2exp:                          Rational Arithmetic. (line  28)
  * mpq_equal:                             Comparing Rationals. (line  33)
  * mpq_get_d:                             Rational Conversions.
                                                                (line   7)
@@ -3266,14 +3227,14 @@ Function and Type Index
                                                                (line   7)
  * mpq_inits:                             Initializing Rationals.
                                                                (line  12)
-* mpq_inp_str:                           I/O of Rationals.    (line  26)
-* mpq_inv:                               Rational Arithmetic. (line  34)
-* mpq_mul:                               Rational Arithmetic. (line  15)
-* mpq_mul_2exp:                          Rational Arithmetic. (line  18)
-* mpq_neg:                               Rational Arithmetic. (line  28)
+* mpq_inp_str:                           I/O of Rationals.    (line  27)
+* mpq_inv:                               Rational Arithmetic. (line  37)
+* mpq_mul:                               Rational Arithmetic. (line  16)
+* mpq_mul_2exp:                          Rational Arithmetic. (line  20)
+* mpq_neg:                               Rational Arithmetic. (line  31)
  * mpq_numref:                            Applying Integer Functions.
                                                                (line  17)
-* mpq_out_str:                           I/O of Rationals.    (line  18)
+* mpq_out_str:                           I/O of Rationals.    (line  19)
  * mpq_set:                               Initializing Rationals.
                                                                (line  24)
  * mpq_set_d:                             Rational Conversions.
@@ -3293,67 +3254,71 @@ Function and Type Index
  * mpq_set_z:                             Initializing Rationals.
                                                                (line  25)
  * mpq_sgn:                               Comparing Rationals. (line  27)
-* mpq_sub:                               Rational Arithmetic. (line  11)
+* mpq_sub:                               Rational Arithmetic. (line  12)
  * mpq_swap:                              Initializing Rationals.
                                                                (line  56)
  * mpq_t:                                 Nomenclature and Types.
                                                                (line  16)
-* mpz_abs:                               Integer Arithmetic.  (line  42)
+* mpz_2fac_ui:                           Number Theoretic Functions.
+                                                              (line 114)
+* mpz_abs:                               Integer Arithmetic.  (line  45)
  * mpz_add:                               Integer Arithmetic.  (line   7)
  * mpz_add_ui:                            Integer Arithmetic.  (line   9)
-* mpz_addmul:                            Integer Arithmetic.  (line  25)
-* mpz_addmul_ui:                         Integer Arithmetic.  (line  27)
+* mpz_addmul:                            Integer Arithmetic.  (line  26)
+* mpz_addmul_ui:                         Integer Arithmetic.  (line  28)
  * mpz_and:                               Integer Logic and Bit Fiddling.
                                                                (line  11)
  * mpz_array_init:                        Integer Special Functions.
                                                                (line  11)
  * mpz_bin_ui:                            Number Theoretic Functions.
-                                                              (line 111)
+                                                              (line 126)
  * mpz_bin_uiui:                          Number Theoretic Functions.
-                                                              (line 113)
+                                                              (line 128)
  * mpz_cdiv_q:                            Integer Division.    (line  13)
-* mpz_cdiv_q_2exp:                       Integer Division.    (line  24)
-* mpz_cdiv_q_ui:                         Integer Division.    (line  17)
-* mpz_cdiv_qr:                           Integer Division.    (line  15)
-* mpz_cdiv_qr_ui:                        Integer Division.    (line  21)
+* mpz_cdiv_q_2exp:                       Integer Division.    (line  26)
+* mpz_cdiv_q_ui:                         Integer Division.    (line  18)
+* mpz_cdiv_qr:                           Integer Division.    (line  16)
+* mpz_cdiv_qr_ui:                        Integer Division.    (line  22)
  * mpz_cdiv_r:                            Integer Division.    (line  14)
-* mpz_cdiv_r_2exp:                       Integer Division.    (line  25)
-* mpz_cdiv_r_ui:                         Integer Division.    (line  19)
-* mpz_cdiv_ui:                           Integer Division.    (line  23)
+* mpz_cdiv_r_2exp:                       Integer Division.    (line  28)
+* mpz_cdiv_r_ui:                         Integer Division.    (line  20)
+* mpz_cdiv_ui:                           Integer Division.    (line  24)
  * mpz_class:                             C++ Interface General.
                                                                (line  18)
  * mpz_class::fits_sint_p:                C++ Interface Integers.
-                                                              (line  45)
+                                                              (line  50)
  * mpz_class::fits_slong_p:               C++ Interface Integers.
-                                                              (line  46)
+                                                              (line  51)
  * mpz_class::fits_sshort_p:              C++ Interface Integers.
-                                                              (line  47)
+                                                              (line  52)
  * mpz_class::fits_uint_p:                C++ Interface Integers.
-                                                              (line  48)
+                                                              (line  53)
  * mpz_class::fits_ulong_p:               C++ Interface Integers.
-                                                              (line  49)
+                                                              (line  54)
  * mpz_class::fits_ushort_p:              C++ Interface Integers.
-                                                              (line  50)
+                                                              (line  55)
  * mpz_class::get_d:                      C++ Interface Integers.
-                                                              (line  51)
+                                                              (line  56)
  * mpz_class::get_mpz_t:                  C++ Interface General.
                                                                (line  64)
  * mpz_class::get_si:                     C++ Interface Integers.
-                                                              (line  52)
+                                                              (line  57)
  * mpz_class::get_str:                    C++ Interface Integers.
-                                                              (line  53)
+                                                              (line  58)
  * mpz_class::get_ui:                     C++ Interface Integers.
-                                                              (line  54)
+                                                              (line  59)
  * mpz_class::mpz_class:                  C++ Interface Integers.
-                                                              (line  20)
+                                                              (line   7)
  * mpz_class::set_str:                    C++ Interface Integers.
-                                                              (line  55)
+                                                              (line  60)
+* mpz_class::swap:                       C++ Interface Integers.
+                                                              (line  64)
  * mpz_clear:                             Initializing Integers.
-                                                              (line  44)
+                                                              (line  49)
  * mpz_clears:                            Initializing Integers.
-                                                              (line  48)
+                                                              (line  53)
  * mpz_clrbit:                            Integer Logic and Bit Fiddling.
-                                                              (line  54)
+                                                              (line  56)
  * mpz_cmp:                               Integer Comparisons. (line   7)
  * mpz_cmp_d:                             Integer Comparisons. (line   8)
  * mpz_cmp_si:                            Integer Comparisons. (line   9)
@@ -3364,34 +3329,34 @@ Function and Type Index
  * mpz_com:                               Integer Logic and Bit Fiddling.
                                                                (line  20)
  * mpz_combit:                            Integer Logic and Bit Fiddling.
-                                                              (line  57)
-* mpz_congruent_2exp_p:                  Integer Division.    (line 124)
-* mpz_congruent_p:                       Integer Division.    (line 121)
-* mpz_congruent_ui_p:                    Integer Division.    (line 123)
-* mpz_divexact:                          Integer Division.    (line 101)
-* mpz_divexact_ui:                       Integer Division.    (line 102)
-* mpz_divisible_2exp_p:                  Integer Division.    (line 112)
-* mpz_divisible_p:                       Integer Division.    (line 110)
-* mpz_divisible_ui_p:                    Integer Division.    (line 111)
+                                                              (line  59)
+* mpz_congruent_2exp_p:                  Integer Division.    (line 137)
+* mpz_congruent_p:                       Integer Division.    (line 133)
+* mpz_congruent_ui_p:                    Integer Division.    (line 135)
+* mpz_divexact:                          Integer Division.    (line 110)
+* mpz_divexact_ui:                       Integer Division.    (line 112)
+* mpz_divisible_2exp_p:                  Integer Division.    (line 123)
+* mpz_divisible_p:                       Integer Division.    (line 120)
+* mpz_divisible_ui_p:                    Integer Division.    (line 122)
  * mpz_even_p:                            Miscellaneous Integer Functions.
                                                                (line  18)
  * mpz_export:                            Integer Import and Export.
                                                                (line  45)
  * mpz_fac_ui:                            Number Theoretic Functions.
-                                                              (line 108)
-* mpz_fdiv_q:                            Integer Division.    (line  27)
-* mpz_fdiv_q_2exp:                       Integer Division.    (line  38)
-* mpz_fdiv_q_ui:                         Integer Division.    (line  31)
-* mpz_fdiv_qr:                           Integer Division.    (line  29)
-* mpz_fdiv_qr_ui:                        Integer Division.    (line  35)
-* mpz_fdiv_r:                            Integer Division.    (line  28)
-* mpz_fdiv_r_2exp:                       Integer Division.    (line  39)
-* mpz_fdiv_r_ui:                         Integer Division.    (line  33)
-* mpz_fdiv_ui:                           Integer Division.    (line  37)
+                                                              (line 113)
+* mpz_fdiv_q:                            Integer Division.    (line  30)
+* mpz_fdiv_q_2exp:                       Integer Division.    (line  43)
+* mpz_fdiv_q_ui:                         Integer Division.    (line  35)
+* mpz_fdiv_qr:                           Integer Division.    (line  33)
+* mpz_fdiv_qr_ui:                        Integer Division.    (line  39)
+* mpz_fdiv_r:                            Integer Division.    (line  31)
+* mpz_fdiv_r_2exp:                       Integer Division.    (line  45)
+* mpz_fdiv_r_ui:                         Integer Division.    (line  37)
+* mpz_fdiv_ui:                           Integer Division.    (line  41)
  * mpz_fib2_ui:                           Number Theoretic Functions.
-                                                              (line 121)
+                                                              (line 136)
  * mpz_fib_ui:                            Number Theoretic Functions.
-                                                              (line 119)
+                                                              (line 134)
  * mpz_fits_sint_p:                       Miscellaneous Integer Functions.
                                                                (line  10)
  * mpz_fits_slong_p:                      Miscellaneous Integer Functions.
@@ -3405,15 +3370,15 @@ Function and Type Index
  * mpz_fits_ushort_p:                     Miscellaneous Integer Functions.
                                                                (line  11)
  * mpz_gcd:                               Number Theoretic Functions.
-                                                              (line  30)
+                                                              (line  32)
  * mpz_gcd_ui:                            Number Theoretic Functions.
-                                                              (line  37)
+                                                              (line  39)
  * mpz_gcdext:                            Number Theoretic Functions.
-                                                              (line  47)
+                                                              (line  49)
  * mpz_get_d:                             Converting Integers. (line  27)
-* mpz_get_d_2exp:                        Converting Integers. (line  35)
+* mpz_get_d_2exp:                        Converting Integers. (line  36)
  * mpz_get_si:                            Converting Integers. (line  18)
-* mpz_get_str:                           Converting Integers. (line  46)
+* mpz_get_str:                           Converting Integers. (line  47)
  * mpz_get_ui:                            Converting Integers. (line  11)
  * mpz_getlimbn:                          Integer Special Functions.
                                                                (line  60)
@@ -3432,50 +3397,52 @@ Function and Type Index
  * mpz_init_set_si:                       Simultaneous Integer Init & Assign.
                                                                (line  29)
  * mpz_init_set_str:                      Simultaneous Integer Init & Assign.
-                                                              (line  34)
+                                                              (line  35)
  * mpz_init_set_ui:                       Simultaneous Integer Init & Assign.
                                                                (line  28)
  * mpz_inits:                             Initializing Integers.
                                                                (line  29)
-* mpz_inp_raw:                           I/O of Integers.     (line  61)
-* mpz_inp_str:                           I/O of Integers.     (line  30)
+* mpz_inp_raw:                           I/O of Integers.     (line  62)
+* mpz_inp_str:                           I/O of Integers.     (line  31)
  * mpz_invert:                            Number Theoretic Functions.
-                                                              (line  72)
+                                                              (line  76)
  * mpz_ior:                               Integer Logic and Bit Fiddling.
                                                                (line  14)
  * mpz_jacobi:                            Number Theoretic Functions.
-                                                              (line  79)
+                                                              (line  83)
  * mpz_kronecker:                         Number Theoretic Functions.
-                                                              (line  87)
+                                                              (line  91)
  * mpz_kronecker_si:                      Number Theoretic Functions.
-                                                              (line  88)
+                                                              (line  92)
  * mpz_kronecker_ui:                      Number Theoretic Functions.
-                                                              (line  89)
+                                                              (line  93)
  * mpz_lcm:                               Number Theoretic Functions.
-                                                              (line  66)
+                                                              (line  68)
  * mpz_lcm_ui:                            Number Theoretic Functions.
-                                                              (line  67)
+                                                              (line  70)
  * mpz_legendre:                          Number Theoretic Functions.
-                                                              (line  82)
+                                                              (line  86)
  * mpz_lucnum2_ui:                        Number Theoretic Functions.
-                                                              (line 132)
+                                                              (line 147)
  * mpz_lucnum_ui:                         Number Theoretic Functions.
-                                                              (line 130)
-* mpz_mod:                               Integer Division.    (line  91)
-* mpz_mod_ui:                            Integer Division.    (line  93)
+                                                              (line 145)
+* mpz_mfac_uiui:                         Number Theoretic Functions.
+                                                              (line 116)
+* mpz_mod:                               Integer Division.    (line 100)
+* mpz_mod_ui:                            Integer Division.    (line 102)
  * mpz_mul:                               Integer Arithmetic.  (line  19)
-* mpz_mul_2exp:                          Integer Arithmetic.  (line  35)
+* mpz_mul_2exp:                          Integer Arithmetic.  (line  38)
  * mpz_mul_si:                            Integer Arithmetic.  (line  20)
  * mpz_mul_ui:                            Integer Arithmetic.  (line  22)
-* mpz_neg:                               Integer Arithmetic.  (line  39)
+* mpz_neg:                               Integer Arithmetic.  (line  42)
  * mpz_nextprime:                         Number Theoretic Functions.
-                                                              (line  23)
+                                                              (line  25)
  * mpz_odd_p:                             Miscellaneous Integer Functions.
                                                                (line  17)
-* mpz_out_raw:                           I/O of Integers.     (line  45)
-* mpz_out_str:                           I/O of Integers.     (line  18)
-* mpz_perfect_power_p:                   Integer Roots.       (line  27)
-* mpz_perfect_square_p:                  Integer Roots.       (line  36)
+* mpz_out_raw:                           I/O of Integers.     (line  46)
+* mpz_out_str:                           I/O of Integers.     (line  19)
+* mpz_perfect_power_p:                   Integer Roots.       (line  28)
+* mpz_perfect_square_p:                  Integer Roots.       (line  37)
  * mpz_popcount:                          Integer Logic and Bit Fiddling.
                                                                (line  23)
  * mpz_pow_ui:                            Integer Exponentiation.
@@ -3486,6 +3453,8 @@ Function and Type Index
                                                                (line  18)
  * mpz_powm_ui:                           Integer Exponentiation.
                                                                (line  10)
+* mpz_primorial_ui:                      Number Theoretic Functions.
+                                                              (line 121)
  * mpz_probab_prime_p:                    Number Theoretic Functions.
                                                                (line   7)
  * mpz_random:                            Integer Random Numbers.
@@ -3493,17 +3462,17 @@ Function and Type Index
  * mpz_random2:                           Integer Random Numbers.
                                                                (line  51)
  * mpz_realloc2:                          Initializing Integers.
-                                                              (line  52)
+                                                              (line  57)
  * mpz_remove:                            Number Theoretic Functions.
-                                                              (line 103)
-* mpz_root:                              Integer Roots.       (line   7)
-* mpz_rootrem:                           Integer Roots.       (line  13)
+                                                              (line 108)
+* mpz_root:                              Integer Roots.       (line   8)
+* mpz_rootrem:                           Integer Roots.       (line  14)
  * mpz_rrandomb:                          Integer Random Numbers.
                                                                (line  31)
  * mpz_scan0:                             Integer Logic and Bit Fiddling.
-                                                              (line  37)
-* mpz_scan1:                             Integer Logic and Bit Fiddling.
                                                                (line  38)
+* mpz_scan1:                             Integer Logic and Bit Fiddling.
+                                                              (line  40)
  * mpz_set:                               Assigning Integers.  (line  10)
  * mpz_set_d:                             Assigning Integers.  (line  13)
  * mpz_set_f:                             Assigning Integers.  (line  15)
@@ -3512,36 +3481,36 @@ Function and Type Index
  * mpz_set_str:                           Assigning Integers.  (line  21)
  * mpz_set_ui:                            Assigning Integers.  (line  11)
  * mpz_setbit:                            Integer Logic and Bit Fiddling.
-                                                              (line  51)
+                                                              (line  53)
  * mpz_sgn:                               Integer Comparisons. (line  28)
  * mpz_si_kronecker:                      Number Theoretic Functions.
-                                                              (line  90)
+                                                              (line  94)
  * mpz_size:                              Integer Special Functions.
                                                                (line  68)
  * mpz_sizeinbase:                        Miscellaneous Integer Functions.
                                                                (line  23)
-* mpz_sqrt:                              Integer Roots.       (line  17)
-* mpz_sqrtrem:                           Integer Roots.       (line  20)
+* mpz_sqrt:                              Integer Roots.       (line  18)
+* mpz_sqrtrem:                           Integer Roots.       (line  21)
  * mpz_sub:                               Integer Arithmetic.  (line  12)
  * mpz_sub_ui:                            Integer Arithmetic.  (line  14)
-* mpz_submul:                            Integer Arithmetic.  (line  30)
-* mpz_submul_ui:                         Integer Arithmetic.  (line  32)
+* mpz_submul:                            Integer Arithmetic.  (line  32)
+* mpz_submul_ui:                         Integer Arithmetic.  (line  34)
  * mpz_swap:                              Assigning Integers.  (line  37)
  * mpz_t:                                 Nomenclature and Types.
                                                                (line   6)
-* mpz_tdiv_q:                            Integer Division.    (line  41)
-* mpz_tdiv_q_2exp:                       Integer Division.    (line  52)
-* mpz_tdiv_q_ui:                         Integer Division.    (line  45)
-* mpz_tdiv_qr:                           Integer Division.    (line  43)
-* mpz_tdiv_qr_ui:                        Integer Division.    (line  49)
-* mpz_tdiv_r:                            Integer Division.    (line  42)
-* mpz_tdiv_r_2exp:                       Integer Division.    (line  53)
-* mpz_tdiv_r_ui:                         Integer Division.    (line  47)
-* mpz_tdiv_ui:                           Integer Division.    (line  51)
+* mpz_tdiv_q:                            Integer Division.    (line  47)
+* mpz_tdiv_q_2exp:                       Integer Division.    (line  60)
+* mpz_tdiv_q_ui:                         Integer Division.    (line  52)
+* mpz_tdiv_qr:                           Integer Division.    (line  50)
+* mpz_tdiv_qr_ui:                        Integer Division.    (line  56)
+* mpz_tdiv_r:                            Integer Division.    (line  48)
+* mpz_tdiv_r_2exp:                       Integer Division.    (line  62)
+* mpz_tdiv_r_ui:                         Integer Division.    (line  54)
+* mpz_tdiv_ui:                           Integer Division.    (line  58)
  * mpz_tstbit:                            Integer Logic and Bit Fiddling.
-                                                              (line  60)
+                                                              (line  62)
  * mpz_ui_kronecker:                      Number Theoretic Functions.
-                                                              (line  91)
+                                                              (line  95)
  * mpz_ui_pow_ui:                         Integer Exponentiation.
                                                                (line  33)
  * mpz_ui_sub:                            Integer Arithmetic.  (line  16)
@@ -3551,44 +3520,39 @@ Function and Type Index
                                                                (line  23)
  * mpz_xor:                               Integer Logic and Bit Fiddling.
                                                                (line  17)
-* msqrt:                                 BSD Compatible Functions.
-                                                              (line  63)
-* msub:                                  BSD Compatible Functions.
-                                                              (line  46)
-* mtox:                                  BSD Compatible Functions.
-                                                              (line  98)
-* mult:                                  BSD Compatible Functions.
-                                                              (line  49)
-* operator%:                             C++ Interface Integers.
+* operator"" <1>:                        C++ Interface Integers.
                                                                (line  30)
+* operator"" <2>:                        C++ Interface Floats.
+                                                              (line  56)
+* operator"":                            C++ Interface Rationals.
+                                                              (line  38)
+* operator%:                             C++ Interface Integers.
+                                                              (line  35)
  * operator/:                             C++ Interface Integers.
-                                                              (line  29)
+                                                              (line  34)
  * operator<<:                            C++ Formatted Output.
-                                                              (line  11)
-* operator>> <1>:                        C++ Formatted Input. (line  11)
-* operator>> <2>:                        C++ Interface Rationals.
-                                                              (line  77)
-* operator>>:                            C++ Formatted Input. (line  14)
-* pow:                                   BSD Compatible Functions.
-                                                              (line  71)
-* rpow:                                  BSD Compatible Functions.
-                                                              (line  79)
-* sdiv:                                  BSD Compatible Functions.
-                                                              (line  55)
+                                                              (line  20)
+* operator>> <1>:                        C++ Interface Rationals.
+                                                              (line  85)
+* operator>>:                            C++ Formatted Input. (line  25)
  * sgn <1>:                               C++ Interface Rationals.
-                                                              (line  50)
+                                                              (line  56)
  * sgn <2>:                               C++ Interface Integers.
-                                                              (line  57)
+                                                              (line  62)
  * sgn:                                   C++ Interface Floats.
-                                                              (line  98)
-* sqrt <1>:                              C++ Interface Floats.
-                                                              (line  99)
-* sqrt:                                  C++ Interface Integers.
+                                                              (line 102)
+* sqrt <1>:                              C++ Interface Integers.
+                                                              (line  63)
+* sqrt:                                  C++ Interface Floats.
+                                                              (line 103)
+* swap <1>:                              C++ Interface Floats.
+                                                              (line 105)
+* swap <2>:                              C++ Interface Integers.
+                                                              (line  65)
+* swap:                                  C++ Interface Rationals.
                                                                (line  58)
  * trunc:                                 C++ Interface Floats.
-                                                              (line 100)
-* xtom:                                  BSD Compatible Functions.
-                                                              (line  34)
+                                                              (line 106)
  
  
  
diff --git a/doc/gmp.texi b/doc/gmp.texi

index 933df3934ce6ba4f4526ec5058658c05d760bd30..2dce73976d7f215bcb80cf46f7505ee638e9b777 100644 (file)
--- a/doc/gmp.texi
+++ b/doc/gmp.texi
@@ -15,7 +15,7 @@ This manual describes how to install and use the GNU multiple precision
  arithmetic library, version @value{VERSION}.
  
  Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software
+2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software
  Foundation, Inc.
  
  Permission is granted to copy, distribute and/or modify this document under
@@ -137,7 +137,6 @@ How to install and use the GNU multiple precision arithmetic library, version @v
  * Formatted Output::           @code{printf} style output.
  * Formatted Input::            @code{scanf} style input.
  * C++ Class Interface::        Class wrappers around GMP types.
-* BSD Compatible Functions::   All functions found in BSD MP.
  * Custom Allocation::          How to customize the internal allocation.
  * Language Bindings::          Using GMP from other languages.
  * Algorithms::                 What happens behind the scenes.
@@ -714,7 +713,7 @@ the binaries won't run on older members of the family, and might run slower on
  other members, older or newer.  The best idea is always to build GMP for the
  exact machine type you intend to run it on.
  
-The following CPUs have specific support.  See @file{configure.in} for details
+The following CPUs have specific support.  See @file{configure.ac} for details
  of what code and compiler options they select.
  
  @itemize @bullet
@@ -860,11 +859,7 @@ CPUs not listed will use generic C code.
  @item Generic C Build
  @cindex Generic C
  If some of the assembly code causes problems, or if otherwise desired, the
-generic C code can be selected with CPU @samp{none}.  For example,
-
-@example
-./configure --host=none-unknown-freebsd3.5
-@end example
+generic C code can be selected with the configure @option{--disable-assembly}.
  
  Note that this will run quite slowly, but it should be portable and should at
  least make it possible to get something running if all else fails.
@@ -924,8 +919,7 @@ preprocessor should be set in @samp{CPPFLAGS} rather than @samp{CFLAGS}.
  Compiling is done with both @samp{CPPFLAGS} and @samp{CFLAGS}, but
  preprocessing uses just @samp{CPPFLAGS}.  This distinction is because most
  preprocessors won't accept all the flags the compiler does.  Preprocessing is
-done separately in some configure tests, and in the @samp{ansi2knr} support
-for K&R compilers.
+done separately in some configure tests.
  
  @item @option{CC_FOR_BUILD}
  @cindex @code{CC_FOR_BUILD}
@@ -1044,14 +1038,6 @@ By default multiplications are done using Karatsuba, 3-way Toom, higher degree
  Toom, and Fermat FFT@.  The FFT is only used on large to very large operands
  and can be disabled to save code size if desired.
  
-@item Berkeley MP, @option{--enable-mpbsd}
-@cindex Berkeley MP compatible functions
-@cindex BSD MP compatible functions
-@cindex @code{--enable-mpbsd}
-The Berkeley MP compatibility library (@file{libmp}) and header file
-(@file{mp.h}) are built and installed only if @option{--enable-mpbsd} is used.
-@xref{BSD Compatible Functions}.
-
  @item Assertion Checking, @option{--enable-assert}
  @cindex Assertion checking
  @cindex @code{--enable-assert}
@@ -1299,7 +1285,7 @@ support for n32 or 64 and so only gets a 32-bit limb and the MIPS 2 code.
  @item PowerPC 64 (@samp{powerpc64}, @samp{powerpc620}, @samp{powerpc630}, @samp{powerpc970}, @samp{power4}, @samp{power5})
  @cindex PowerPC
  @table @asis
-@item @samp{ABI=aix64}
+@item @samp{ABI=mode64}
  @cindex AIX
  The AIX 64 ABI uses 64-bit limbs and pointers and is the default on PowerPC 64
  @samp{*-*-aix*} systems.  Applications must be compiled with
@@ -1309,31 +1295,28 @@ gcc  -maix64
  xlc  -q64
  @end example
  
-@item @samp{ABI=mode64}
-The @samp{mode64} ABI uses 64-bit limbs and pointers, and is the default on
-64-bit GNU/Linux, BSD, and Mac OS X/Darwin systems.  Applications must be
-compiled with
+On 64-bit GNU/Linux, BSD, and Mac OS X/Darwin systems, the applications must
+be compiled with
  
  @example
  gcc  -m64
  @end example
  
  @item @samp{ABI=mode32}
-@cindex AIX
  The @samp{mode32} ABI uses a 64-bit @code{long long} limb but with the chip
  still in 32-bit mode and using 32-bit calling conventions.  This is the default
  for systems where the true 64-bit ABI is unavailable.  No special compiler
-options are typically needed for applications.
+options are typically needed for applications.  This ABI is not available under
+AIX.
  
  @item @samp{ABI=32}
  This is the basic 32-bit PowerPC ABI, with a 32-bit limb.  No special compiler
  options are needed for applications.
  @end table
  
-GMP's speed is greatest for @samp{aix64} and @samp{mode64}.  In @samp{ABI=32}
-only the 32-bit ISA is used and this doesn't make full use of a 64-bit chip.
-On a suitable system we could perhaps use more of the ISA, but there are no
-plans to do so.
+GMP's speed is greatest for the @samp{mode64} ABI, the @samp{mode32} ABI is 2nd
+best.  In @samp{ABI=32} only the 32-bit ISA is used and this doesn't make full
+use of a 64-bit chip.
  
  @sp 1
  @need 1000
@@ -1602,11 +1585,11 @@ In GMP, CPU types @samp{power*} and @samp{powerpc*} will each use instructions
  not available on the other, so it's important to choose the right one for the
  CPU that will be used.  Currently GMP has no assembly code support for using
  just the common instruction subset.  To get executables that run on both, the
-current suggestion is to use the generic C code (CPU @samp{none}), possibly
-with appropriate compiler options (like @samp{-mcpu=common} for
+current suggestion is to use the generic C code (@option{--disable-assembly}),
+possibly with appropriate compiler options (like @samp{-mcpu=common} for
  @command{gcc}).  CPU @samp{rs6000} (which is not a CPU but a family of
  workstations) is accepted by @file{config.sub}, but is currently equivalent to
-@samp{none}.
+@option{--disable-assembly}.
  
  @item Sparc CPU Types
  @cindex Sparc
@@ -1624,8 +1607,8 @@ Options, gcc, Using the GNU Compiler Collection (GCC)}).
  This makes that code unsuitable for use with the special V9
  @samp{-mcmodel=embmedany} (which uses @code{g4} as a data segment pointer), and
  for applications wanting to use those registers for special purposes.  In these
-cases the only suggestion currently is to build GMP with CPU @samp{none} to
-avoid the assembly code.
+cases the only suggestion currently is to build GMP with
+@option{--disable-assembly} to avoid the assembly code.
  
  @item SunOS 4
  @cindex SunOS
@@ -1981,11 +1964,6 @@ Functions for floating-point arithmetic, with names beginning with
  @code{mpf_}.  The associated type is @code{mpf_t}.  There are about 60
  functions is this class.  (@pxref{Floating-point Functions})
  
-@item
-Functions compatible with Berkeley MP, such as @code{itom}, @code{madd}, and
-@code{mult}.  The associated type is @code{MINT}.  (@pxref{BSD Compatible
-Functions})
-
  @item
  Fast low-level functions that operate on natural numbers.  These are used by
  the functions in the preceding groups, and you can also call them directly
@@ -2209,7 +2187,7 @@ For GMP i.j.k, these numbers will be i, j, and k, respectively.
  @findex gmp_version
  The GMP version number, as a null-terminated string, in the form ``i.j.k''.
  This release is @nicode{"@value{VERSION}"}.  Note that the format ``i.j'' was
-used when k was zero was used before version 4.3.0.
+used, before version 4.3.0, when k was zero.
  @end deftypevr
  
  @defmac __GMP_CC
@@ -2225,7 +2203,7 @@ strings.
  @cindex Past GMP versions
  @cindex Upward compatibility
  
-This version of GMP is upwardly binary compatible with all 4.x and 3.x
+This version of GMP is upwardly binary compatible with all 5.x, 4.x, and 3.x
  versions, and upwardly compatible at the source level with all 2.x versions,
  with the following exceptions.
  
@@ -2237,16 +2215,15 @@ with other @code{mpn} functions.
  @item
  @code{mpf_get_prec} counted precision slightly differently in GMP 3.0 and
  3.0.1, but in 3.1 reverted to the 2.x style.
+
+@item
+@code{mpn_bdivmod}, documented as preliminary in GMP 4, has been removed.
  @end itemize
  
  There are a number of compatibility issues between GMP 1 and GMP 2 that of
-course also apply when porting applications from GMP 1 to GMP 4.  Please
+course also apply when porting applications from GMP 1 to GMP 5.  Please
  see the GMP 2 manual for details.
  
-The Berkeley MP compatibility library (@pxref{BSD Compatible Functions}) is
-source and binary compatible with the standard @file{libmp}.
-
-@c @enumerate
  @c @item Integer division functions round the result differently.  The obsolete
  @c functions (@code{mpz_div}, @code{mpz_divmod}, @code{mpz_mdiv},
  @c @code{mpz_mdivmod}, etc) now all use floor rounding (i.e., they round the
@@ -2625,7 +2602,7 @@ Applications using the low-level @code{mpn} functions, however, will benefit
  from @option{--enable-assert} since it adds checks on the parameters of most
  such functions, many of which have subtle restrictions on their usage.  Note
  however that only the generic C code has checks, not the assembly code, so
-CPU @samp{none} should be used for maximum checking.
+@option{--disable-assembly} should be used for maximum checking.
  
  @item Temporary Memory Checking
  The build option @option{--enable-alloca=debug} arranges that each block of
@@ -2643,7 +2620,7 @@ To summarize the above, a GMP build for maximum debuggability would be
  
  @example
  ./configure --disable-shared --enable-assert \
-  --enable-alloca=debug --host=none CFLAGS=-g
+  --enable-alloca=debug --disable-assembly CFLAGS=-g
  @end example
  
  For C++, add @samp{--enable-cxx CXXFLAGS=-g}.
@@ -2660,28 +2637,30 @@ very very slowly.  On GNU/Linux for example,
  
  @cindex @command{checkergcc}
  @example
-./configure --host=none-pc-linux-gnu CC=checkergcc
+./configure --disable-assembly CC=checkergcc
  @end example
  
-@samp{--host=none} must be used, since the GMP assembly code doesn't support
-the checking scheme.  The GMP C++ features cannot be used, since current
-versions of checker (0.9.9.1) don't yet support the standard C++ library.
+@option{--disable-assembly} must be used, since the GMP assembly code doesn't
+support the checking scheme.  The GMP C++ features cannot be used, since
+current versions of checker (0.9.9.1) don't yet support the standard C++
+library.
  
  @item Valgrind
  @cindex Valgrind
-The valgrind program (@uref{http://valgrind.org/}) is a memory
-checker for x86s.  It translates and emulates machine instructions to do
+Valgrind (@uref{http://valgrind.org/}) is a memory checker for x86, ARM, MIPS,
+PowerPC, and S/390.  It translates and emulates machine instructions to do
  strong checks for uninitialized data (at the level of individual bits), memory
  accesses through bad pointers, and memory leaks.
  
-Recent versions of Valgrind are getting support for MMX and SSE/SSE2
-instructions, for past versions GMP will need to be configured not to use
-those, i.e.@: for an x86 without them (for instance plain @samp{i486}).
+Valgrind does not always support every possible instruction, in particular
+ones recently added to an ISA.  Valgrind might therefore be incompatible with
+a recent GMP or even a less recent GMP which is compiled using a recent GCC.
  
  GMP's assembly code sometimes promotes a read of the limbs to some larger size,
  for efficiency.  GMP will do this even at the start and end of a multilimb
-operand, using naturaly aligned operations on the larger type.  This may lead
-to benign reads outside of allocated areas, triggering complants from Valgrind.
+operand, using naturally aligned operations on the larger type.  This may lead
+to benign reads outside of allocated areas, triggering complaints from
+Valgrind.  Valgrind's option @samp{--partial-loads-ok=yes} should help.
  
  @item Other Problems
  Any suspected bug in GMP itself should be isolated to make sure it's not an
@@ -2904,7 +2883,10 @@ informative (@samp{where} in @command{gdb}, or @samp{$C} in @command{adb}).
  Please do not send core dumps, executables or @command{strace}s.
  
  @item
-The configuration options you used when building GMP, if any.
+The @samp{configure} options you used when building GMP, if any.
+
+@item
+The output from @samp{configure}, as printed to stdout, with any options used.
  
  @item
  The name of the compiler and its version.  For @command{gcc}, get the version
@@ -3014,10 +2996,13 @@ Initialize @var{x}, with space for @var{n}-bit numbers, and set its value to 0.
  Calling this function instead of @code{mpz_init} or @code{mpz_inits} is never
  necessary; reallocation is handled automatically by GMP when needed.
  
-@var{n} is only the initial space, @var{x} will grow automatically in
-the normal way, if necessary, for subsequent values stored.  @code{mpz_init2}
-makes it possible to avoid such reallocations if a maximum size is known in
-advance.
+While @var{n} defines the initial space, @var{x} will grow automatically in the
+normal way, if necessary, for subsequent values stored.  @code{mpz_init2} makes
+it possible to avoid such reallocations if a maximum size is known in advance.
+
+In preparation for an operation, GMP often allocates one limb more than
+ultimately needed.  To make sure GMP will not perform reallocation for
+@var{x}, you need to add the number of bits in @code{mp_limb_t} to @var{n}.
  @end deftypefun
  
  @deftypefun void mpz_clear (mpz_t @var{x})
@@ -3049,19 +3034,19 @@ to give memory back to the heap.
  These functions assign new values to already initialized integers
  (@pxref{Initializing Integers}).
  
-@deftypefun void mpz_set (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_set (mpz_t @var{rop}, const mpz_t @var{op})
  @deftypefunx void mpz_set_ui (mpz_t @var{rop}, unsigned long int @var{op})
  @deftypefunx void mpz_set_si (mpz_t @var{rop}, signed long int @var{op})
  @deftypefunx void mpz_set_d (mpz_t @var{rop}, double @var{op})
-@deftypefunx void mpz_set_q (mpz_t @var{rop}, mpq_t @var{op})
-@deftypefunx void mpz_set_f (mpz_t @var{rop}, mpf_t @var{op})
+@deftypefunx void mpz_set_q (mpz_t @var{rop}, const mpq_t @var{op})
+@deftypefunx void mpz_set_f (mpz_t @var{rop}, const mpf_t @var{op})
  Set the value of @var{rop} from @var{op}.
  
  @code{mpz_set_d}, @code{mpz_set_q} and @code{mpz_set_f} truncate @var{op} to
  make it an integer.
  @end deftypefun
  
-@deftypefun int mpz_set_str (mpz_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpz_set_str (mpz_t @var{rop}, const char *@var{str}, int @var{base})
  Set the value of @var{rop} from @var{str}, a null-terminated C string in base
  @var{base}.  White space is allowed in the string, and is simply ignored.
  
@@ -3120,7 +3105,7 @@ functions, it can be used as the source or destination operand for the ordinary
  integer functions.  Don't use an initialize-and-set function on a variable
  already initialized!
  
-@deftypefun void mpz_init_set (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_init_set (mpz_t @var{rop}, const mpz_t @var{op})
  @deftypefunx void mpz_init_set_ui (mpz_t @var{rop}, unsigned long int @var{op})
  @deftypefunx void mpz_init_set_si (mpz_t @var{rop}, signed long int @var{op})
  @deftypefunx void mpz_init_set_d (mpz_t @var{rop}, double @var{op})
@@ -3128,7 +3113,7 @@ Initialize @var{rop} with limb space and set the initial numeric value from
  @var{op}.
  @end deftypefun
  
-@deftypefun int mpz_init_set_str (mpz_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpz_init_set_str (mpz_t @var{rop}, const char *@var{str}, int @var{base})
  Initialize @var{rop} and set its value like @code{mpz_set_str} (see its
  documentation above for details).
  
@@ -3148,7 +3133,7 @@ This section describes functions for converting GMP integers to standard C
  types.  Functions for converting @emph{to} GMP integers are described in
  @ref{Assigning Integers} and @ref{I/O of Integers}.
  
-@deftypefun {unsigned long int} mpz_get_ui (mpz_t @var{op})
+@deftypefun {unsigned long int} mpz_get_ui (const mpz_t @var{op})
  Return the value of @var{op} as an @code{unsigned long}.
  
  If @var{op} is too big to fit an @code{unsigned long} then just the least
@@ -3156,7 +3141,7 @@ significant bits that do fit are returned.  The sign of @var{op} is ignored,
  only the absolute value is used.
  @end deftypefun
  
-@deftypefun {signed long int} mpz_get_si (mpz_t @var{op})
+@deftypefun {signed long int} mpz_get_si (const mpz_t @var{op})
  If @var{op} fits into a @code{signed long int} return the value of @var{op}.
  Otherwise return the least significant part of @var{op}, with the same sign
  as @var{op}.
@@ -3166,7 +3151,7 @@ result is probably not very useful.  To find out if the value will fit, use
  the function @code{mpz_fits_slong_p}.
  @end deftypefun
  
-@deftypefun double mpz_get_d (mpz_t @var{op})
+@deftypefun double mpz_get_d (const mpz_t @var{op})
  Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
  towards zero).
  
@@ -3175,7 +3160,7 @@ dependent.  An infinity is returned where available.  A hardware overflow trap
  may or may not occur.
  @end deftypefun
  
-@deftypefun double mpz_get_d_2exp (signed long int *@var{exp}, mpz_t @var{op})
+@deftypefun double mpz_get_d_2exp (signed long int *@var{exp}, const mpz_t @var{op})
  Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
  towards zero), and returning the exponent separately.
  
@@ -3189,7 +3174,7 @@ This is similar to the standard C @code{frexp} function (@pxref{Normalization
  Functions,,, libc, The GNU C Library Reference Manual}).
  @end deftypefun
  
-@deftypefun {char *} mpz_get_str (char *@var{str}, int @var{base}, mpz_t @var{op})
+@deftypefun {char *} mpz_get_str (char *@var{str}, int @var{base}, const mpz_t @var{op})
  Convert @var{op} to a string of digits in base @var{base}.  The base argument
  may vary from 2 to 62 or from @minus{}2 to @minus{}36.
  
@@ -3220,45 +3205,45 @@ or the given @var{str}.
  @cindex Integer arithmetic functions
  @cindex Arithmetic functions
  
-@deftypefun void mpz_add (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_add_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpz_add (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_add_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
  Set @var{rop} to @math{@var{op1} + @var{op2}}.
  @end deftypefun
  
-@deftypefun void mpz_sub (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_sub_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
-@deftypefunx void mpz_ui_sub (mpz_t @var{rop}, unsigned long int @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_sub (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_sub_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefunx void mpz_ui_sub (mpz_t @var{rop}, unsigned long int @var{op1}, const mpz_t @var{op2})
  Set @var{rop} to @var{op1} @minus{} @var{op2}.
  @end deftypefun
  
-@deftypefun void mpz_mul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_mul_si (mpz_t @var{rop}, mpz_t @var{op1}, long int @var{op2})
-@deftypefunx void mpz_mul_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpz_mul (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_mul_si (mpz_t @var{rop}, const mpz_t @var{op1}, long int @var{op2})
+@deftypefunx void mpz_mul_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
  Set @var{rop} to @math{@var{op1} @GMPtimes{} @var{op2}}.
  @end deftypefun
  
-@deftypefun void mpz_addmul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_addmul_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpz_addmul (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_addmul_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
  Set @var{rop} to @math{@var{rop} + @var{op1} @GMPtimes{} @var{op2}}.
  @end deftypefun
  
-@deftypefun void mpz_submul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_submul_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpz_submul (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_submul_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
  Set @var{rop} to @math{@var{rop} - @var{op1} @GMPtimes{} @var{op2}}.
  @end deftypefun
  
-@deftypefun void mpz_mul_2exp (mpz_t @var{rop}, mpz_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpz_mul_2exp (mpz_t @var{rop}, const mpz_t @var{op1}, mp_bitcnt_t @var{op2})
  @cindex Bit shift left
  Set @var{rop} to @m{@var{op1} \times 2^{op2}, @var{op1} times 2 raised to
  @var{op2}}.  This operation can also be defined as a left shift by @var{op2}
  bits.
  @end deftypefun
  
-@deftypefun void mpz_neg (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_neg (mpz_t @var{rop}, const mpz_t @var{op})
  Set @var{rop} to @minus{}@var{op}.
  @end deftypefun
  
-@deftypefun void mpz_abs (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_abs (mpz_t @var{rop}, const mpz_t @var{op})
  Set @var{rop} to the absolute value of @var{op}.
  @end deftypefun
  
@@ -3279,43 +3264,43 @@ same way as for normal C @code{int} arithmetic.
  @c  between each, and seem to let tex do a better job of page breaks than an
  @c  @sp 1 in the middle of one big set.
  
-@deftypefun void mpz_cdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_cdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_cdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefun void mpz_cdiv_q (mpz_t @var{q}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_cdiv_r (mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_cdiv_qr (mpz_t @var{q}, mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
  @maybepagebreak
-@deftypefunx {unsigned long int} mpz_cdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_cdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_cdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_cdiv_ui (mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_q_ui (mpz_t @var{q}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_r_ui (mpz_t @var{r}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{const mpz_t @var{n}}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_ui (const mpz_t @var{n}, @w{unsigned long int @var{d}})
  @maybepagebreak
-@deftypefunx void mpz_cdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
-@deftypefunx void mpz_cdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_cdiv_q_2exp (mpz_t @var{q}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_cdiv_r_2exp (mpz_t @var{r}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
  @end deftypefun
  
-@deftypefun void mpz_fdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_fdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_fdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefun void mpz_fdiv_q (mpz_t @var{q}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_fdiv_r (mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_fdiv_qr (mpz_t @var{q}, mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
  @maybepagebreak
-@deftypefunx {unsigned long int} mpz_fdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_fdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_fdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_fdiv_ui (mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_q_ui (mpz_t @var{q}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_r_ui (mpz_t @var{r}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{const mpz_t @var{n}}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_ui (const mpz_t @var{n}, @w{unsigned long int @var{d}})
  @maybepagebreak
-@deftypefunx void mpz_fdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
-@deftypefunx void mpz_fdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_fdiv_q_2exp (mpz_t @var{q}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_fdiv_r_2exp (mpz_t @var{r}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
  @end deftypefun
  
-@deftypefun void mpz_tdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_tdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_tdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefun void mpz_tdiv_q (mpz_t @var{q}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_tdiv_r (mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_tdiv_qr (mpz_t @var{q}, mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
  @maybepagebreak
-@deftypefunx {unsigned long int} mpz_tdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_tdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_tdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_tdiv_ui (mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_q_ui (mpz_t @var{q}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_r_ui (mpz_t @var{r}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{const mpz_t @var{n}}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_ui (const mpz_t @var{n}, @w{unsigned long int @var{d}})
  @maybepagebreak
-@deftypefunx void mpz_tdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
-@deftypefunx void mpz_tdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_tdiv_q_2exp (mpz_t @var{q}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_tdiv_r_2exp (mpz_t @var{r}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
  @cindex Bit shift right
  
  @sp 1
@@ -3363,8 +3348,8 @@ the same as the bitwise logical functions do, whereas @code{mpz_tdiv_q_2exp}
  effectively treats @var{n} as sign and magnitude.
  @end deftypefun
  
-@deftypefun void mpz_mod (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx {unsigned long int} mpz_mod_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefun void mpz_mod (mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx {unsigned long int} mpz_mod_ui (mpz_t @var{r}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
  Set @var{r} to @var{n} @code{mod} @var{d}.  The sign of the divisor is
  ignored; the result is always non-negative.
  
@@ -3373,8 +3358,8 @@ remainder as well as setting @var{r}.  See @code{mpz_fdiv_ui} above if only
  the return value is wanted.
  @end deftypefun
  
-@deftypefun void mpz_divexact (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_divexact_ui (mpz_t @var{q}, mpz_t @var{n}, unsigned long @var{d})
+@deftypefun void mpz_divexact (mpz_t @var{q}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_divexact_ui (mpz_t @var{q}, const mpz_t @var{n}, unsigned long @var{d})
  @cindex Exact division functions
  Set @var{q} to @var{n}/@var{d}.  These functions produce correct results only
  when it is known in advance that @var{d} divides @var{n}.
@@ -3384,9 +3369,9 @@ best choice when exact division is known to occur, for example reducing a
  rational to lowest terms.
  @end deftypefun
  
-@deftypefun int mpz_divisible_p (mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx int mpz_divisible_ui_p (mpz_t @var{n}, unsigned long int @var{d})
-@deftypefunx int mpz_divisible_2exp_p (mpz_t @var{n}, mp_bitcnt_t @var{b})
+@deftypefun int mpz_divisible_p (const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx int mpz_divisible_ui_p (const mpz_t @var{n}, unsigned long int @var{d})
+@deftypefunx int mpz_divisible_2exp_p (const mpz_t @var{n}, mp_bitcnt_t @var{b})
  @cindex Divisibility functions
  Return non-zero if @var{n} is exactly divisible by @var{d}, or in the case of
  @code{mpz_divisible_2exp_p} by @m{2^b,2^@var{b}}.
@@ -3397,9 +3382,9 @@ functions, @math{@var{d}=0} is accepted and following the rule it can be seen
  that only 0 is considered divisible by 0.
  @end deftypefun
  
-@deftypefun int mpz_congruent_p (mpz_t @var{n}, mpz_t @var{c}, mpz_t @var{d})
-@deftypefunx int mpz_congruent_ui_p (mpz_t @var{n}, unsigned long int @var{c}, unsigned long int @var{d})
-@deftypefunx int mpz_congruent_2exp_p (mpz_t @var{n}, mpz_t @var{c}, mp_bitcnt_t @var{b})
+@deftypefun int mpz_congruent_p (const mpz_t @var{n}, const mpz_t @var{c}, const mpz_t @var{d})
+@deftypefunx int mpz_congruent_ui_p (const mpz_t @var{n}, unsigned long int @var{c}, unsigned long int @var{d})
+@deftypefunx int mpz_congruent_2exp_p (const mpz_t @var{n}, const mpz_t @var{c}, mp_bitcnt_t @var{b})
  @cindex Divisibility functions
  @cindex Congruence functions
  Return non-zero if @var{n} is congruent to @var{c} modulo @var{d}, or in the
@@ -3420,8 +3405,8 @@ only when exactly equal.
  @cindex Exponentiation functions
  @cindex Powering functions
  
-@deftypefun void mpz_powm (mpz_t @var{rop}, mpz_t @var{base}, mpz_t @var{exp}, mpz_t @var{mod})
-@deftypefunx void mpz_powm_ui (mpz_t @var{rop}, mpz_t @var{base}, unsigned long int @var{exp}, mpz_t @var{mod})
+@deftypefun void mpz_powm (mpz_t @var{rop}, const mpz_t @var{base}, const mpz_t @var{exp}, const mpz_t @var{mod})
+@deftypefunx void mpz_powm_ui (mpz_t @var{rop}, const mpz_t @var{base}, unsigned long int @var{exp}, const mpz_t @var{mod})
  Set @var{rop} to @m{base^{exp} \bmod mod, (@var{base} raised to @var{exp})
  modulo @var{mod}}.
  
@@ -3430,7 +3415,7 @@ Negative @var{exp} is supported if an inverse @math{@var{base}^@W{-1} @bmod
  If an inverse doesn't exist then a divide by zero is raised.
  @end deftypefun
  
-@deftypefun void mpz_powm_sec (mpz_t @var{rop}, mpz_t @var{base}, mpz_t @var{exp}, mpz_t @var{mod})
+@deftypefun void mpz_powm_sec (mpz_t @var{rop}, const mpz_t @var{base}, const mpz_t @var{exp}, const mpz_t @var{mod})
  Set @var{rop} to @m{base^{exp} \bmod mod, (@var{base} raised to @var{exp})
  modulo @var{mod}}.
  
@@ -3443,7 +3428,7 @@ function entry.  This function is intended for cryptographic purposes, where
  resilience to side-channel attacks is desired.
  @end deftypefun
  
-@deftypefun void mpz_pow_ui (mpz_t @var{rop}, mpz_t @var{base}, unsigned long int @var{exp})
+@deftypefun void mpz_pow_ui (mpz_t @var{rop}, const mpz_t @var{base}, unsigned long int @var{exp})
  @deftypefunx void mpz_ui_pow_ui (mpz_t @var{rop}, unsigned long int @var{base}, unsigned long int @var{exp})
  Set @var{rop} to @m{base^{exp}, @var{base} raised to @var{exp}}.  The case
  @math{0^0} yields 1.
@@ -3456,25 +3441,25 @@ Set @var{rop} to @m{base^{exp}, @var{base} raised to @var{exp}}.  The case
  @cindex Integer root functions
  @cindex Root extraction functions
  
-@deftypefun int mpz_root (mpz_t @var{rop}, mpz_t @var{op}, unsigned long int @var{n})
+@deftypefun int mpz_root (mpz_t @var{rop}, const mpz_t @var{op}, unsigned long int @var{n})
  Set @var{rop} to @m{\lfloor\root n \of {op}\rfloor@C{},} the truncated integer
  part of the @var{n}th root of @var{op}.  Return non-zero if the computation
  was exact, i.e., if @var{op} is @var{rop} to the @var{n}th power.
  @end deftypefun
  
-@deftypefun void mpz_rootrem (mpz_t @var{root}, mpz_t @var{rem}, mpz_t @var{u}, unsigned long int @var{n})
+@deftypefun void mpz_rootrem (mpz_t @var{root}, mpz_t @var{rem}, const mpz_t @var{u}, unsigned long int @var{n})
  Set @var{root} to @m{\lfloor\root n \of {u}\rfloor@C{},} the truncated
  integer part of the @var{n}th root of @var{u}.  Set @var{rem} to the
  remainder, @m{(@var{u} - @var{root}^n),
  @var{u}@minus{}@var{root}**@var{n}}.
  @end deftypefun
  
-@deftypefun void mpz_sqrt (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_sqrt (mpz_t @var{rop}, const mpz_t @var{op})
  Set @var{rop} to @m{\lfloor\sqrt{@var{op}}\rfloor@C{},} the truncated
  integer part of the square root of @var{op}.
  @end deftypefun
  
-@deftypefun void mpz_sqrtrem (mpz_t @var{rop1}, mpz_t @var{rop2}, mpz_t @var{op})
+@deftypefun void mpz_sqrtrem (mpz_t @var{rop1}, mpz_t @var{rop2}, const mpz_t @var{op})
  Set @var{rop1} to @m{\lfloor\sqrt{@var{op}}\rfloor, the truncated integer part
  of the square root of @var{op}}, like @code{mpz_sqrt}.  Set @var{rop2} to the
  remainder @m{(@var{op} - @var{rop1}^2),
@@ -3485,7 +3470,7 @@ If @var{rop1} and @var{rop2} are the same variable, the results are
  undefined.
  @end deftypefun
  
-@deftypefun int mpz_perfect_power_p (mpz_t @var{op})
+@deftypefun int mpz_perfect_power_p (const mpz_t @var{op})
  @cindex Perfect power functions
  @cindex Root testing functions
  Return non-zero if @var{op} is a perfect power, i.e., if there exist integers
@@ -3497,7 +3482,7 @@ Negative values of @var{op} are accepted, but of course can only be odd
  perfect powers.
  @end deftypefun
  
-@deftypefun int mpz_perfect_square_p (mpz_t @var{op})
+@deftypefun int mpz_perfect_square_p (const mpz_t @var{op})
  @cindex Perfect square functions
  @cindex Root testing functions
  Return non-zero if @var{op} is a perfect square, i.e., if the square root of
@@ -3511,7 +3496,7 @@ be perfect squares.
  @section Number Theoretic Functions
  @cindex Number theoretic functions
  
-@deftypefun int mpz_probab_prime_p (mpz_t @var{n}, int @var{reps})
+@deftypefun int mpz_probab_prime_p (const mpz_t @var{n}, int @var{reps})
  @cindex Prime testing functions
  @cindex Probable prime testing functions
  Determine whether @var{n} is prime.  Return 2 if @var{n} is definitely prime,
@@ -3519,9 +3504,10 @@ return 1 if @var{n} is probably prime (without being certain), or return 0 if
  @var{n} is definitely composite.
  
  This function does some trial divisions, then some Miller-Rabin probabilistic
-primality tests.  @var{reps} controls how many such tests are done, 5 to 10 is
-a reasonable number, more will reduce the chances of a composite being
-returned as ``probably prime''.
+primality tests.  The argument @var{reps} controls how many such tests are
+done; a higher value will reduce the chances of a composite being returned as
+``probably prime''.  25 is a reasonable number; a composite number will then be
+identified as a prime with a probability of less than @m{2^{-50},2^(-50)}.
  
  Miller-Rabin and similar tests can be more properly called compositeness
  tests.  Numbers which fail are known to be composite but those which pass
@@ -3529,7 +3515,7 @@ might be prime or might be composite.  Only a few composites pass, hence those
  which pass are considered probably prime.
  @end deftypefun
  
-@deftypefun void mpz_nextprime (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_nextprime (mpz_t @var{rop}, const mpz_t @var{op})
  @cindex Next prime function
  Set @var{rop} to the next prime greater than @var{op}.
  
@@ -3540,7 +3526,7 @@ extremely small.
  
  @c mpz_prime_p not implemented as of gmp 3.0.
  
-@c @deftypefun int mpz_prime_p (mpz_t @var{n})
+@c @deftypefun int mpz_prime_p (const mpz_t @var{n})
  @c Return non-zero if @var{n} is prime and zero if @var{n} is a non-prime.
  @c This function is far slower than @code{mpz_probab_prime_p}, but then it
  @c never returns non-zero for composite numbers.
@@ -3551,7 +3537,7 @@ extremely small.
  @c prime, if the @var{reps} argument is in the suggested range.)
  @c @end deftypefun
  
-@deftypefun void mpz_gcd (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_gcd (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
  @cindex Greatest common divisor functions
  @cindex GCD functions
  Set @var{rop} to the greatest common divisor of @var{op1} and @var{op2}.  The
@@ -3559,7 +3545,7 @@ result is always positive even if one or both input operands are negative.
  Except if both inputs are zero; then this function defines @math{gcd(0,0) = 0}.
  @end deftypefun
  
-@deftypefun {unsigned long int} mpz_gcd_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun {unsigned long int} mpz_gcd_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
  Compute the greatest common divisor of @var{op1} and @var{op2}.  If
  @var{rop} is not @code{NULL}, store the result there.
  
@@ -3569,7 +3555,7 @@ to the argument @var{op1}.  Note that the result will always fit if @var{op2}
  is non-zero.
  @end deftypefun
  
-@deftypefun void mpz_gcdext (mpz_t @var{g}, mpz_t @var{s}, mpz_t @var{t}, mpz_t @var{a}, mpz_t @var{b})
+@deftypefun void mpz_gcdext (mpz_t @var{g}, mpz_t @var{s}, mpz_t @var{t}, const mpz_t @var{a}, const mpz_t @var{b})
  @cindex Extended GCD
  @cindex GCD extended
  Set @var{g} to the greatest common divisor of @var{a} and @var{b}, and in
@@ -3579,7 +3565,7 @@ The value in @var{g} is always positive, even if one or both of @var{a} and
  @var{b} are negative (or zero if both inputs are zero).  The values in @var{s}
  and @var{t} are chosen such that normally, @math{@GMPabs{@var{s}} <
  @GMPabs{@var{b}} / (2 @var{g})} and @math{@GMPabs{@var{t}} < @GMPabs{@var{a}}
-/ (2 @var{g})}, and these relations define @var{s} and @var{t} uniquely. There
+/ (2 @var{g})}, and these relations define @var{s} and @var{t} uniquely.  There
  are a few exceptional cases:
  
  If @math{@GMPabs{@var{a}} = @GMPabs{@var{b}}}, then @math{@var{s} = 0},
@@ -3596,8 +3582,8 @@ In all cases, @math{@var{s} = 0} if and only if @math{@var{g} =
  If @var{t} is @code{NULL} then that value is not computed.
  @end deftypefun
  
-@deftypefun void mpz_lcm (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_lcm_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long @var{op2})
+@deftypefun void mpz_lcm (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_lcm_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long @var{op2})
  @cindex Least common multiple functions
  @cindex LCM functions
  Set @var{rop} to the least common multiple of @var{op1} and @var{op2}.
@@ -3605,7 +3591,7 @@ Set @var{rop} to the least common multiple of @var{op1} and @var{op2}.
  @var{op2}.  @var{rop} will be zero if either @var{op1} or @var{op2} is zero.
  @end deftypefun
  
-@deftypefun int mpz_invert (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun int mpz_invert (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
  @cindex Modular inverse functions
  @cindex Inverse modulo functions
  Compute the inverse of @var{op1} modulo @var{op2} and put the result in
@@ -3615,24 +3601,24 @@ exist the return value is zero and @var{rop} is undefined.  The behaviour of
  this function is undefined when @var{op2} is zero.
  @end deftypefun
  
-@deftypefun int mpz_jacobi (mpz_t @var{a}, mpz_t @var{b})
+@deftypefun int mpz_jacobi (const mpz_t @var{a}, const mpz_t @var{b})
  @cindex Jacobi symbol functions
  Calculate the Jacobi symbol @m{\left(a \over b\right),
  (@var{a}/@var{b})}.  This is defined only for @var{b} odd.
  @end deftypefun
  
-@deftypefun int mpz_legendre (mpz_t @var{a}, mpz_t @var{p})
+@deftypefun int mpz_legendre (const mpz_t @var{a}, const mpz_t @var{p})
  @cindex Legendre symbol functions
  Calculate the Legendre symbol @m{\left(a \over p\right),
  (@var{a}/@var{p})}.  This is defined only for @var{p} an odd positive
  prime, and for such @var{p} it's identical to the Jacobi symbol.
  @end deftypefun
  
-@deftypefun int mpz_kronecker (mpz_t @var{a}, mpz_t @var{b})
-@deftypefunx int mpz_kronecker_si (mpz_t @var{a}, long @var{b})
-@deftypefunx int mpz_kronecker_ui (mpz_t @var{a}, unsigned long @var{b})
-@deftypefunx int mpz_si_kronecker (long @var{a}, mpz_t @var{b})
-@deftypefunx int mpz_ui_kronecker (unsigned long @var{a}, mpz_t @var{b})
+@deftypefun int mpz_kronecker (const mpz_t @var{a}, const mpz_t @var{b})
+@deftypefunx int mpz_kronecker_si (const mpz_t @var{a}, long @var{b})
+@deftypefunx int mpz_kronecker_ui (const mpz_t @var{a}, unsigned long @var{b})
+@deftypefunx int mpz_si_kronecker (long @var{a}, const mpz_t @var{b})
+@deftypefunx int mpz_ui_kronecker (unsigned long @var{a}, const mpz_t @var{b})
  @cindex Kronecker symbol functions
  Calculate the Jacobi symbol @m{\left(a \over b\right),
  (@var{a}/@var{b})} with the Kronecker extension @m{\left(a \over
@@ -3648,7 +3634,7 @@ or any number theory textbook.  See also the example program
  @file{demos/qcn.c} which uses @code{mpz_kronecker_ui}.
  @end deftypefun
  
-@deftypefun {mp_bitcnt_t} mpz_remove (mpz_t @var{rop}, mpz_t @var{op}, mpz_t @var{f})
+@deftypefun {mp_bitcnt_t} mpz_remove (mpz_t @var{rop}, const mpz_t @var{op}, const mpz_t @var{f})
  @cindex Remove factor functions
  @cindex Factor removal functions
  Remove all occurrences of the factor @var{f} from @var{op} and store the
@@ -3656,12 +3642,22 @@ result in @var{rop}.  The return value is how many such occurrences were
  removed.
  @end deftypefun
  
-@deftypefun void mpz_fac_ui (mpz_t @var{rop}, unsigned long int @var{op})
+@deftypefun void mpz_fac_ui (mpz_t @var{rop}, unsigned long int @var{n})
+@deftypefunx void mpz_2fac_ui (mpz_t @var{rop}, unsigned long int @var{n})
+@deftypefunx void mpz_mfac_uiui (mpz_t @var{rop}, unsigned long int @var{n}, unsigned long int @var{m})
  @cindex Factorial functions
-Set @var{rop} to @var{op}!, the factorial of @var{op}.
+Set @var{rop} to the factorial of @var{n}: @code{mpz_fac_ui} computes the plain factorial @var{n}!,
+@code{mpz_2fac_ui} computes the double-factorial @var{n}!!, and @code{mpz_mfac_uiui} the
+@var{m}-multi-factorial @m{n!^{(m)}, @var{n}!^(@var{m})}.
+@end deftypefun
+
+@deftypefun void mpz_primorial_ui (mpz_t @var{rop}, unsigned long int @var{n})
+@cindex Primorial functions
+Set @var{rop} to the primorial of @var{n}, i.e. the product of all positive
+prime numbers @math{@le{}@var{n}}.
  @end deftypefun
  
-@deftypefun void mpz_bin_ui (mpz_t @var{rop}, mpz_t @var{n}, unsigned long int @var{k})
+@deftypefun void mpz_bin_ui (mpz_t @var{rop}, const mpz_t @var{n}, unsigned long int @var{k})
  @deftypefunx void mpz_bin_uiui (mpz_t @var{rop}, unsigned long int @var{n}, @w{unsigned long int @var{k}})
  @cindex Binomial coefficient functions
  Compute the binomial coefficient @m{\left({n}\atop{k}\right), @var{n} over
@@ -3710,10 +3706,10 @@ Algorithm}, the reverse is straightforward too.
  @cindex Integer comparison functions
  @cindex Comparison functions
  
-@deftypefn Function int mpz_cmp (mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefnx Function int mpz_cmp_d (mpz_t @var{op1}, double @var{op2})
-@deftypefnx Macro int mpz_cmp_si (mpz_t @var{op1}, signed long int @var{op2})
-@deftypefnx Macro int mpz_cmp_ui (mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefn Function int mpz_cmp (const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefnx Function int mpz_cmp_d (const mpz_t @var{op1}, double @var{op2})
+@deftypefnx Macro int mpz_cmp_si (const mpz_t @var{op1}, signed long int @var{op2})
+@deftypefnx Macro int mpz_cmp_ui (const mpz_t @var{op1}, unsigned long int @var{op2})
  Compare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >
  @var{op2}}, zero if @math{@var{op1} = @var{op2}}, or a negative value if
  @math{@var{op1} < @var{op2}}.
@@ -3723,9 +3719,9 @@ arguments more than once.  @code{mpz_cmp_d} can be called with an infinity,
  but results are undefined for a NaN.
  @end deftypefn
  
-@deftypefn Function int mpz_cmpabs (mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefnx Function int mpz_cmpabs_d (mpz_t @var{op1}, double @var{op2})
-@deftypefnx Function int mpz_cmpabs_ui (mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefn Function int mpz_cmpabs (const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefnx Function int mpz_cmpabs_d (const mpz_t @var{op1}, double @var{op2})
+@deftypefnx Function int mpz_cmpabs_ui (const mpz_t @var{op1}, unsigned long int @var{op2})
  Compare the absolute values of @var{op1} and @var{op2}.  Return a positive
  value if @math{@GMPabs{@var{op1}} > @GMPabs{@var{op2}}}, zero if
  @math{@GMPabs{@var{op1}} = @GMPabs{@var{op2}}}, or a negative value if
@@ -3735,7 +3731,7 @@ value if @math{@GMPabs{@var{op1}} > @GMPabs{@var{op2}}}, zero if
  for a NaN.
  @end deftypefn
  
-@deftypefn Macro int mpz_sgn (mpz_t @var{op})
+@deftypefn Macro int mpz_sgn (const mpz_t @var{op})
  @cindex Sign tests
  @cindex Integer sign tests
  Return @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and
@@ -3758,30 +3754,30 @@ These functions behave as if twos complement arithmetic were used (although
  sign-magnitude is the actual implementation).  The least significant bit is
  number 0.
  
-@deftypefun void mpz_and (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_and (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
  Set @var{rop} to @var{op1} bitwise-and @var{op2}.
  @end deftypefun
  
-@deftypefun void mpz_ior (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_ior (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
  Set @var{rop} to @var{op1} bitwise inclusive-or @var{op2}.
  @end deftypefun
  
-@deftypefun void mpz_xor (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_xor (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
  Set @var{rop} to @var{op1} bitwise exclusive-or @var{op2}.
  @end deftypefun
  
-@deftypefun void mpz_com (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_com (mpz_t @var{rop}, const mpz_t @var{op})
  Set @var{rop} to the one's complement of @var{op}.
  @end deftypefun
  
-@deftypefun {mp_bitcnt_t} mpz_popcount (mpz_t @var{op})
+@deftypefun {mp_bitcnt_t} mpz_popcount (const mpz_t @var{op})
  If @math{@var{op}@ge{}0}, return the population count of @var{op}, which is the
  number of 1 bits in the binary representation.  If @math{@var{op}<0}, the
  number of 1s is infinite, and the return value is the largest possible
  @code{mp_bitcnt_t}.
  @end deftypefun
  
-@deftypefun {mp_bitcnt_t} mpz_hamdist (mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun {mp_bitcnt_t} mpz_hamdist (const mpz_t @var{op1}, const mpz_t @var{op2})
  If @var{op1} and @var{op2} are both @math{@ge{}0} or both @math{<0}, return the
  hamming distance between the two operands, which is the number of bit positions
  where @var{op1} and @var{op2} have different bit values.  If one operand is
@@ -3789,8 +3785,8 @@ where @var{op1} and @var{op2} have different bit values.  If one operand is
  infinite, and the return value is the largest possible @code{mp_bitcnt_t}.
  @end deftypefun
  
-@deftypefun {mp_bitcnt_t} mpz_scan0 (mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
-@deftypefunx {mp_bitcnt_t} mpz_scan1 (mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
+@deftypefun {mp_bitcnt_t} mpz_scan0 (const mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
+@deftypefunx {mp_bitcnt_t} mpz_scan1 (const mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
  @cindex Bit scanning functions
  @cindex Scan bit functions
  Scan @var{op}, starting from bit @var{starting_bit}, towards more significant
@@ -3817,7 +3813,7 @@ Clear bit @var{bit_index} in @var{rop}.
  Complement bit @var{bit_index} in @var{rop}.
  @end deftypefun
  
-@deftypefun int mpz_tstbit (mpz_t @var{op}, mp_bitcnt_t @var{bit_index})
+@deftypefun int mpz_tstbit (const mpz_t @var{op}, mp_bitcnt_t @var{bit_index})
  Test bit @var{bit_index} in @var{op} and return 0 or 1 accordingly.
  @end deftypefun
  
@@ -3840,7 +3836,7 @@ for these functions.
  
  See also @ref{Formatted Output} and @ref{Formatted Input}.
  
-@deftypefun size_t mpz_out_str (FILE *@var{stream}, int @var{base}, mpz_t @var{op})
+@deftypefun size_t mpz_out_str (FILE *@var{stream}, int @var{base}, const mpz_t @var{op})
  Output @var{op} on stdio stream @var{stream}, as a string of digits in base
  @var{base}.  The base argument may vary from 2 to 62 or from @minus{}2 to
  @minus{}36.
@@ -3868,7 +3864,7 @@ the same value.  For bases 37 to 62, upper-case letter represent the usual
  Return the number of bytes read, or if an error occurred, return 0.
  @end deftypefun
  
-@deftypefun size_t mpz_out_raw (FILE *@var{stream}, mpz_t @var{op})
+@deftypefun size_t mpz_out_raw (FILE *@var{stream}, const mpz_t @var{op})
  Output @var{op} on stdio stream @var{stream}, in raw binary format.  The
  integer is written in a portable format, with 4 bytes of size information, and
  that many bytes of limbs.  Both the size and the limbs are written in
@@ -3915,7 +3911,7 @@ The variable @var{state} must be initialized by calling one of the
  invoking this function.
  @end deftypefun
  
-@deftypefun void mpz_urandomm (mpz_t @var{rop}, gmp_randstate_t @var{state}, mpz_t @var{n})
+@deftypefun void mpz_urandomm (mpz_t @var{rop}, gmp_randstate_t @var{state}, const mpz_t @var{n})
  Generate a uniform random integer in the range 0 to @math{@var{n}-1},
  inclusive.
  
@@ -3998,7 +3994,7 @@ feature can account for this, by passing for instance
  @code{8*sizeof(int)-INT_BIT}.
  @end deftypefun
  
-@deftypefun {void *} mpz_export (void *@var{rop}, size_t *@var{countp}, int @var{order}, size_t @var{size}, int @var{endian}, size_t @var{nails}, mpz_t @var{op})
+@deftypefun {void *} mpz_export (void *@var{rop}, size_t *@var{countp}, int @var{order}, size_t @var{size}, int @var{endian}, size_t @var{nails}, const mpz_t @var{op})
  @cindex Integer export
  @cindex Export
  Fill @var{rop} with word data from @var{op}.
@@ -4049,24 +4045,24 @@ p = malloc (count * size);
  @cindex Miscellaneous integer functions
  @cindex Integer miscellaneous functions
  
-@deftypefun int mpz_fits_ulong_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_slong_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_uint_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_sint_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_ushort_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_sshort_p (mpz_t @var{op})
+@deftypefun int mpz_fits_ulong_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_slong_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_uint_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_sint_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_ushort_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_sshort_p (const mpz_t @var{op})
  Return non-zero iff the value of @var{op} fits in an @code{unsigned long int},
  @code{signed long int}, @code{unsigned int}, @code{signed int}, @code{unsigned
  short int}, or @code{signed short int}, respectively.  Otherwise, return zero.
  @end deftypefun
  
-@deftypefn Macro int mpz_odd_p (mpz_t @var{op})
-@deftypefnx Macro int mpz_even_p (mpz_t @var{op})
+@deftypefn Macro int mpz_odd_p (const mpz_t @var{op})
+@deftypefnx Macro int mpz_even_p (const mpz_t @var{op})
  Determine whether @var{op} is odd or even, respectively.  Return non-zero if
  yes, zero if no.  These macros evaluate their argument more than once.
  @end deftypefn
  
-@deftypefun size_t mpz_sizeinbase (mpz_t @var{op}, int @var{base})
+@deftypefun size_t mpz_sizeinbase (const mpz_t @var{op}, int @var{base})
  @cindex Size in digits
  @cindex Digits in an integer
  Return the size of @var{op} measured in number of digits in the given
@@ -4160,7 +4156,7 @@ this.  @code{mpz_realloc2} and @code{_mpz_realloc} are the same except that
  @code{_mpz_realloc} takes its size in limbs.
  @end deftypefun
  
-@deftypefun mp_limb_t mpz_getlimbn (mpz_t @var{op}, mp_size_t @var{n})
+@deftypefun mp_limb_t mpz_getlimbn (const mpz_t @var{op}, mp_size_t @var{n})
  Return limb number @var{n} from @var{op}.  The sign of @var{op} is ignored,
  just the absolute value is used.  The least significant limb is number 0.
  
@@ -4169,7 +4165,7 @@ just the absolute value is used.  The least significant limb is number 0.
  @code{mpz_size(@var{op})-1}.
  @end deftypefun
  
-@deftypefun size_t mpz_size (mpz_t @var{op})
+@deftypefun size_t mpz_size (const mpz_t @var{op})
  Return the size of @var{op} measured in number of limbs.  If @var{op} is zero,
  the returned value will be zero.
  @c (@xref{Nomenclature}, for an explanation of the concept @dfn{limb}.)
@@ -4238,8 +4234,8 @@ Free the space occupied by @var{x}.  Make sure to call this function for all
  Free the space occupied by a NULL-terminated list of @code{mpq_t} variables.
  @end deftypefun
  
-@deftypefun void mpq_set (mpq_t @var{rop}, mpq_t @var{op})
-@deftypefunx void mpq_set_z (mpq_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpq_set (mpq_t @var{rop}, const mpq_t @var{op})
+@deftypefunx void mpq_set_z (mpq_t @var{rop}, const mpz_t @var{op})
  Assign @var{rop} from @var{op}.
  @end deftypefun
  
@@ -4250,7 +4246,7 @@ Set the value of @var{rop} to @var{op1}/@var{op2}.  Note that if @var{op1} and
  @code{mpq_canonicalize} before any operations are performed on @var{rop}.
  @end deftypefun
  
-@deftypefun int mpq_set_str (mpq_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpq_set_str (mpq_t @var{rop}, const char *@var{str}, int @var{base})
  Set @var{rop} from a null-terminated string @var{str} in the given @var{base}.
  
  The string can be an integer like ``41'' or a fraction like ``41/152''.  The
@@ -4282,7 +4278,7 @@ Swap the values @var{rop1} and @var{rop2} efficiently.
  @cindex Rational conversion functions
  @cindex Conversion functions
  
-@deftypefun double mpq_get_d (mpq_t @var{op})
+@deftypefun double mpq_get_d (const mpq_t @var{op})
  Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
  towards zero).
  
@@ -4293,12 +4289,12 @@ Hardware overflow, underflow and denorm traps may or may not occur.
  @end deftypefun
  
  @deftypefun void mpq_set_d (mpq_t @var{rop}, double @var{op})
-@deftypefunx void mpq_set_f (mpq_t @var{rop}, mpf_t @var{op})
+@deftypefunx void mpq_set_f (mpq_t @var{rop}, const mpf_t @var{op})
  Set @var{rop} to the value of @var{op}.  There is no rounding, this conversion
  is exact.
  @end deftypefun
  
-@deftypefun {char *} mpq_get_str (char *@var{str}, int @var{base}, mpq_t @var{op})
+@deftypefun {char *} mpq_get_str (char *@var{str}, int @var{base}, const mpq_t @var{op})
  Convert @var{op} to a string of digits in base @var{base}.  The base may vary
  from 2 to 36.  The string will be of the form @samp{num/den}, or if the
  denominator is 1 then just @samp{num}.
@@ -4330,42 +4326,42 @@ or the given @var{str}.
  @cindex Rational arithmetic functions
  @cindex Arithmetic functions
  
-@deftypefun void mpq_add (mpq_t @var{sum}, mpq_t @var{addend1}, mpq_t @var{addend2})
+@deftypefun void mpq_add (mpq_t @var{sum}, const mpq_t @var{addend1}, const mpq_t @var{addend2})
  Set @var{sum} to @var{addend1} + @var{addend2}.
  @end deftypefun
  
-@deftypefun void mpq_sub (mpq_t @var{difference}, mpq_t @var{minuend}, mpq_t @var{subtrahend})
+@deftypefun void mpq_sub (mpq_t @var{difference}, const mpq_t @var{minuend}, const mpq_t @var{subtrahend})
  Set @var{difference} to @var{minuend} @minus{} @var{subtrahend}.
  @end deftypefun
  
-@deftypefun void mpq_mul (mpq_t @var{product}, mpq_t @var{multiplier}, mpq_t @var{multiplicand})
+@deftypefun void mpq_mul (mpq_t @var{product}, const mpq_t @var{multiplier}, const mpq_t @var{multiplicand})
  Set @var{product} to @math{@var{multiplier} @GMPtimes{} @var{multiplicand}}.
  @end deftypefun
  
-@deftypefun void mpq_mul_2exp (mpq_t @var{rop}, mpq_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpq_mul_2exp (mpq_t @var{rop}, const mpq_t @var{op1}, mp_bitcnt_t @var{op2})
  Set @var{rop} to @m{@var{op1} \times 2^{op2}, @var{op1} times 2 raised to
  @var{op2}}.
  @end deftypefun
  
-@deftypefun void mpq_div (mpq_t @var{quotient}, mpq_t @var{dividend}, mpq_t @var{divisor})
+@deftypefun void mpq_div (mpq_t @var{quotient}, const mpq_t @var{dividend}, const mpq_t @var{divisor})
  @cindex Division functions
  Set @var{quotient} to @var{dividend}/@var{divisor}.
  @end deftypefun
  
-@deftypefun void mpq_div_2exp (mpq_t @var{rop}, mpq_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpq_div_2exp (mpq_t @var{rop}, const mpq_t @var{op1}, mp_bitcnt_t @var{op2})
  Set @var{rop} to @m{@var{op1}/2^{op2}, @var{op1} divided by 2 raised to
  @var{op2}}.
  @end deftypefun
  
-@deftypefun void mpq_neg (mpq_t @var{negated_operand}, mpq_t @var{operand})
+@deftypefun void mpq_neg (mpq_t @var{negated_operand}, const mpq_t @var{operand})
  Set @var{negated_operand} to @minus{}@var{operand}.
  @end deftypefun
  
-@deftypefun void mpq_abs (mpq_t @var{rop}, mpq_t @var{op})
+@deftypefun void mpq_abs (mpq_t @var{rop}, const mpq_t @var{op})
  Set @var{rop} to the absolute value of @var{op}.
  @end deftypefun
  
-@deftypefun void mpq_inv (mpq_t @var{inverted_number}, mpq_t @var{number})
+@deftypefun void mpq_inv (mpq_t @var{inverted_number}, const mpq_t @var{number})
  Set @var{inverted_number} to 1/@var{number}.  If the new denominator is
  zero, this routine will divide by zero.
  @end deftypefun
@@ -4376,7 +4372,7 @@ zero, this routine will divide by zero.
  @cindex Rational comparison functions
  @cindex Comparison functions
  
-@deftypefun int mpq_cmp (mpq_t @var{op1}, mpq_t @var{op2})
+@deftypefun int mpq_cmp (const mpq_t @var{op1}, const mpq_t @var{op2})
  Compare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >
  @var{op2}}, zero if @math{@var{op1} = @var{op2}}, and a negative value if
  @math{@var{op1} < @var{op2}}.
@@ -4385,8 +4381,8 @@ To determine if two rationals are equal, @code{mpq_equal} is faster than
  @code{mpq_cmp}.
  @end deftypefun
  
-@deftypefn Macro int mpq_cmp_ui (mpq_t @var{op1}, unsigned long int @var{num2}, unsigned long int @var{den2})
-@deftypefnx Macro int mpq_cmp_si (mpq_t @var{op1}, long int @var{num2}, unsigned long int @var{den2})
+@deftypefn Macro int mpq_cmp_ui (const mpq_t @var{op1}, unsigned long int @var{num2}, unsigned long int @var{den2})
+@deftypefnx Macro int mpq_cmp_si (const mpq_t @var{op1}, long int @var{num2}, unsigned long int @var{den2})
  Compare @var{op1} and @var{num2}/@var{den2}.  Return a positive value if
  @math{@var{op1} > @var{num2}/@var{den2}}, zero if @math{@var{op1} =
  @var{num2}/@var{den2}}, and a negative value if @math{@var{op1} <
@@ -4398,17 +4394,17 @@ These functions are implemented as a macros and evaluate their arguments
  multiple times.
  @end deftypefn
  
-@deftypefn Macro int mpq_sgn (mpq_t @var{op})
+@deftypefn Macro int mpq_sgn (const mpq_t @var{op})
  @cindex Sign tests
  @cindex Rational sign tests
  Return @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and
  @math{-1} if @math{@var{op} < 0}.
  
  This function is actually implemented as a macro.  It evaluates its
-arguments multiple times.
+argument multiple times.
  @end deftypefn
  
-@deftypefun int mpq_equal (mpq_t @var{op1}, mpq_t @var{op2})
+@deftypefun int mpq_equal (const mpq_t @var{op1}, const mpq_t @var{op2})
  Return non-zero if @var{op1} and @var{op2} are equal, zero if they are
  non-equal.  Although @code{mpq_cmp} can be used for the same purpose, this
  function is much faster.
@@ -4429,16 +4425,16 @@ Note that if an assignment to the numerator and/or denominator could take an
  (@pxref{Rational Number Functions}) then @code{mpq_canonicalize} must be
  called before any other @code{mpq} functions are applied to that @code{mpq_t}.
  
-@deftypefn Macro mpz_t mpq_numref (mpq_t @var{op})
-@deftypefnx Macro mpz_t mpq_denref (mpq_t @var{op})
+@deftypefn Macro mpz_t mpq_numref (const mpq_t @var{op})
+@deftypefnx Macro mpz_t mpq_denref (const mpq_t @var{op})
  Return a reference to the numerator and denominator of @var{op}, respectively.
  The @code{mpz} functions can be used on the result of these macros.
  @end deftypefn
  
-@deftypefun void mpq_get_num (mpz_t @var{numerator}, mpq_t @var{rational})
-@deftypefunx void mpq_get_den (mpz_t @var{denominator}, mpq_t @var{rational})
-@deftypefunx void mpq_set_num (mpq_t @var{rational}, mpz_t @var{numerator})
-@deftypefunx void mpq_set_den (mpq_t @var{rational}, mpz_t @var{denominator})
+@deftypefun void mpq_get_num (mpz_t @var{numerator}, const mpq_t @var{rational})
+@deftypefunx void mpq_get_den (mpz_t @var{denominator}, const mpq_t @var{rational})
+@deftypefunx void mpq_set_num (mpq_t @var{rational}, const mpz_t @var{numerator})
+@deftypefunx void mpq_set_den (mpq_t @var{rational}, const mpz_t @var{denominator})
  Get or set the numerator or denominator of a rational.  These functions are
  equivalent to calling @code{mpz_set} with an appropriate @code{mpq_numref} or
  @code{mpq_denref}.  Direct use of @code{mpq_numref} or @code{mpq_denref} is
@@ -4466,7 +4462,7 @@ for these functions.
  
  See also @ref{Formatted Output} and @ref{Formatted Input}.
  
-@deftypefun size_t mpq_out_str (FILE *@var{stream}, int @var{base}, mpq_t @var{op})
+@deftypefun size_t mpq_out_str (FILE *@var{stream}, int @var{base}, const mpq_t @var{op})
  Output @var{op} on stdio stream @var{stream}, as a string of digits in base
  @var{base}.  The base may vary from 2 to 36.  Output is in the form
  @samp{num/den} or if the denominator is 1 then just @samp{num}.
@@ -4629,7 +4625,7 @@ calculation.  A typical use would be for adjusting the precision gradually in
  iterative algorithms like Newton-Raphson, making the computation precision
  closely match the actual accurate part of the numbers.
  
-@deftypefun {mp_bitcnt_t} mpf_get_prec (mpf_t @var{op})
+@deftypefun {mp_bitcnt_t} mpf_get_prec (const mpf_t @var{op})
  Return the current precision of @var{op}, in bits.
  @end deftypefun
  
@@ -4678,16 +4674,16 @@ different purposes during a calculation.
  These functions assign new values to already initialized floats
  (@pxref{Initializing Floats}).
  
-@deftypefun void mpf_set (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_set (mpf_t @var{rop}, const mpf_t @var{op})
  @deftypefunx void mpf_set_ui (mpf_t @var{rop}, unsigned long int @var{op})
  @deftypefunx void mpf_set_si (mpf_t @var{rop}, signed long int @var{op})
  @deftypefunx void mpf_set_d (mpf_t @var{rop}, double @var{op})
-@deftypefunx void mpf_set_z (mpf_t @var{rop}, mpz_t @var{op})
-@deftypefunx void mpf_set_q (mpf_t @var{rop}, mpq_t @var{op})
+@deftypefunx void mpf_set_z (mpf_t @var{rop}, const mpz_t @var{op})
+@deftypefunx void mpf_set_q (mpf_t @var{rop}, const mpq_t @var{op})
  Set the value of @var{rop} from @var{op}.
  @end deftypefun
  
-@deftypefun int mpf_set_str (mpf_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpf_set_str (mpf_t @var{rop}, const char *@var{str}, int @var{base})
  Set the value of @var{rop} from the string in @var{str}.  The string is of the
  form @samp{M@@N} or, if the base is 10 or less, alternatively @samp{MeN}.
  @samp{M} is the mantissa and @samp{N} is the exponent.  The mantissa is always
@@ -4742,7 +4738,7 @@ functions, it can be used as the source or destination operand for the ordinary
  float functions.  Don't use an initialize-and-set function on a variable
  already initialized!
  
-@deftypefun void mpf_init_set (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_init_set (mpf_t @var{rop}, const mpf_t @var{op})
  @deftypefunx void mpf_init_set_ui (mpf_t @var{rop}, unsigned long int @var{op})
  @deftypefunx void mpf_init_set_si (mpf_t @var{rop}, signed long int @var{op})
  @deftypefunx void mpf_init_set_d (mpf_t @var{rop}, double @var{op})
@@ -4752,7 +4748,7 @@ The precision of @var{rop} will be taken from the active default precision, as
  set by @code{mpf_set_default_prec}.
  @end deftypefun
  
-@deftypefun int mpf_init_set_str (mpf_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpf_init_set_str (mpf_t @var{rop}, const char *@var{str}, int @var{base})
  Initialize @var{rop} and set its value from the string in @var{str}.  See
  @code{mpf_set_str} above for details on the assignment operation.
  
@@ -4770,7 +4766,7 @@ set by @code{mpf_set_default_prec}.
  @cindex Float conversion functions
  @cindex Conversion functions
  
-@deftypefun double mpf_get_d (mpf_t @var{op})
+@deftypefun double mpf_get_d (const mpf_t @var{op})
  Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
  towards zero).
  
@@ -4780,7 +4776,7 @@ available.  For too small @math{0.0} is normally returned.  Hardware overflow,
  underflow and denorm traps may or may not occur.
  @end deftypefun
  
-@deftypefun double mpf_get_d_2exp (signed long int *@var{exp}, mpf_t @var{op})
+@deftypefun double mpf_get_d_2exp (signed long int *@var{exp}, const mpf_t @var{op})
  Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
  towards zero), and with an exponent returned separately.
  
@@ -4794,8 +4790,8 @@ This is similar to the standard C @code{frexp} function (@pxref{Normalization
  Functions,,, libc, The GNU C Library Reference Manual}).
  @end deftypefun
  
-@deftypefun long mpf_get_si (mpf_t @var{op})
-@deftypefunx {unsigned long} mpf_get_ui (mpf_t @var{op})
+@deftypefun long mpf_get_si (const mpf_t @var{op})
+@deftypefunx {unsigned long} mpf_get_ui (const mpf_t @var{op})
  Convert @var{op} to a @code{long} or @code{unsigned long}, truncating any
  fraction part.  If @var{op} is too big for the return type, the result is
  undefined.
@@ -4804,7 +4800,7 @@ See also @code{mpf_fits_slong_p} and @code{mpf_fits_ulong_p}
  (@pxref{Miscellaneous Float Functions}).
  @end deftypefun
  
-@deftypefun {char *} mpf_get_str (char *@var{str}, mp_exp_t *@var{expptr}, int @var{base}, size_t @var{n_digits}, mpf_t @var{op})
+@deftypefun {char *} mpf_get_str (char *@var{str}, mp_exp_t *@var{expptr}, int @var{base}, size_t @var{n_digits}, const mpf_t @var{op})
  Convert @var{op} to a string of digits in base @var{base}.  The base argument
  may vary from 2 to 62 or from @minus{}2 to @minus{}36.  Up to @var{n_digits}
  digits will be generated.  Trailing zeros are not returned.  No more digits
@@ -4846,19 +4842,19 @@ or the given @var{str}.
  @cindex Float arithmetic functions
  @cindex Arithmetic functions
  
-@deftypefun void mpf_add (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_add_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_add (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_add_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
  Set @var{rop} to @math{@var{op1} + @var{op2}}.
  @end deftypefun
  
-@deftypefun void mpf_sub (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_ui_sub (mpf_t @var{rop}, unsigned long int @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_sub_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_sub (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_ui_sub (mpf_t @var{rop}, unsigned long int @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_sub_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
  Set @var{rop} to @var{op1} @minus{} @var{op2}.
  @end deftypefun
  
-@deftypefun void mpf_mul (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_mul_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_mul (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_mul_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
  Set @var{rop} to @math{@var{op1} @GMPtimes{} @var{op2}}.
  @end deftypefun
  
@@ -4867,39 +4863,39 @@ divide functions will make these functions intentionally divide by zero.  This
  lets the user handle arithmetic exceptions in these functions in the same
  manner as other arithmetic exceptions.
  
-@deftypefun void mpf_div (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_ui_div (mpf_t @var{rop}, unsigned long int @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_div_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_div (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_ui_div (mpf_t @var{rop}, unsigned long int @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_div_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
  @cindex Division functions
  Set @var{rop} to @var{op1}/@var{op2}.
  @end deftypefun
  
-@deftypefun void mpf_sqrt (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_sqrt (mpf_t @var{rop}, const mpf_t @var{op})
  @deftypefunx void mpf_sqrt_ui (mpf_t @var{rop}, unsigned long int @var{op})
  @cindex Root extraction functions
  Set @var{rop} to @m{\sqrt{@var{op}}, the square root of @var{op}}.
  @end deftypefun
  
-@deftypefun void mpf_pow_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_pow_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
  @cindex Exponentiation functions
  @cindex Powering functions
  Set @var{rop} to @m{@var{op1}^{op2}, @var{op1} raised to the power @var{op2}}.
  @end deftypefun
  
-@deftypefun void mpf_neg (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_neg (mpf_t @var{rop}, const mpf_t @var{op})
  Set @var{rop} to @minus{}@var{op}.
  @end deftypefun
  
-@deftypefun void mpf_abs (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_abs (mpf_t @var{rop}, const mpf_t @var{op})
  Set @var{rop} to the absolute value of @var{op}.
  @end deftypefun
  
-@deftypefun void mpf_mul_2exp (mpf_t @var{rop}, mpf_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpf_mul_2exp (mpf_t @var{rop}, const mpf_t @var{op1}, mp_bitcnt_t @var{op2})
  Set @var{rop} to @m{@var{op1} \times 2^{op2}, @var{op1} times 2 raised to
  @var{op2}}.
  @end deftypefun
  
-@deftypefun void mpf_div_2exp (mpf_t @var{rop}, mpf_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpf_div_2exp (mpf_t @var{rop}, const mpf_t @var{op1}, mp_bitcnt_t @var{op2})
  Set @var{rop} to @m{@var{op1}/2^{op2}, @var{op1} divided by 2 raised to
  @var{op2}}.
  @end deftypefun
@@ -4910,10 +4906,10 @@ Set @var{rop} to @m{@var{op1}/2^{op2}, @var{op1} divided by 2 raised to
  @cindex Float comparison functions
  @cindex Comparison functions
  
-@deftypefun int mpf_cmp (mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx int mpf_cmp_d (mpf_t @var{op1}, double @var{op2})
-@deftypefunx int mpf_cmp_ui (mpf_t @var{op1}, unsigned long int @var{op2})
-@deftypefunx int mpf_cmp_si (mpf_t @var{op1}, signed long int @var{op2})
+@deftypefun int mpf_cmp (const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx int mpf_cmp_d (const mpf_t @var{op1}, double @var{op2})
+@deftypefunx int mpf_cmp_ui (const mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefunx int mpf_cmp_si (const mpf_t @var{op1}, signed long int @var{op2})
  Compare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >
  @var{op2}}, zero if @math{@var{op1} = @var{op2}}, and a negative value if
  @math{@var{op1} < @var{op2}}.
@@ -4922,7 +4918,7 @@ Compare @var{op1} and @var{op2}.  Return a positive value if @math{@var{op1} >
  a NaN.
  @end deftypefun
  
-@deftypefun int mpf_eq (mpf_t @var{op1}, mpf_t @var{op2}, mp_bitcnt_t op3)
+@deftypefun int mpf_eq (const mpf_t @var{op1}, const mpf_t @var{op2}, mp_bitcnt_t op3)
  Return non-zero if the first @var{op3} bits of @var{op1} and @var{op2} are
  equal, zero otherwise.  I.e., test if @var{op1} and @var{op2} are approximately
  equal.
@@ -4935,18 +4931,18 @@ even if ... is replaced by a semi-infinite number of bits.  Such numbers are
  really just one ulp off, and should be considered equal.
  @end deftypefun
  
-@deftypefun void mpf_reldiff (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
+@deftypefun void mpf_reldiff (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
  Compute the relative difference between @var{op1} and @var{op2} and store the
  result in @var{rop}.  This is @math{@GMPabs{@var{op1}-@var{op2}}/@var{op1}}.
  @end deftypefun
  
-@deftypefn Macro int mpf_sgn (mpf_t @var{op})
+@deftypefn Macro int mpf_sgn (const mpf_t @var{op})
  @cindex Sign tests
  @cindex Float sign tests
  Return @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and
  @math{-1} if @math{@var{op} < 0}.
  
-This function is actually implemented as a macro.  It evaluates its arguments
+This function is actually implemented as a macro.  It evaluates its argument
  multiple times.
  @end deftypefn
  
@@ -4969,7 +4965,7 @@ for these functions.
  
  See also @ref{Formatted Output} and @ref{Formatted Input}.
  
-@deftypefun size_t mpf_out_str (FILE *@var{stream}, int @var{base}, size_t @var{n_digits}, mpf_t @var{op})
+@deftypefun size_t mpf_out_str (FILE *@var{stream}, int @var{base}, size_t @var{n_digits}, const mpf_t @var{op})
  Print @var{op} to @var{stream}, as a string of digits.  Return the number of
  bytes written, or if an error occurred, return 0.
  
@@ -5009,7 +5005,7 @@ numbers like @samp{0.23} are not interpreted as octal.
  Return the number of bytes read, or if an error occurred, return 0.
  @end deftypefun
  
-@c @deftypefun void mpf_out_raw (FILE *@var{stream}, mpf_t @var{float})
+@c @deftypefun void mpf_out_raw (FILE *@var{stream}, const mpf_t @var{float})
  @c Output @var{float} on stdio stream @var{stream}, in raw binary
  @c format.  The float is written in a portable format, with 4 bytes of
  @c size information, and that many bytes of limbs.  Both the size and the
@@ -5028,9 +5024,9 @@ Return the number of bytes read, or if an error occurred, return 0.
  @cindex Miscellaneous float functions
  @cindex Float miscellaneous functions
  
-@deftypefun void mpf_ceil (mpf_t @var{rop}, mpf_t @var{op})
-@deftypefunx void mpf_floor (mpf_t @var{rop}, mpf_t @var{op})
-@deftypefunx void mpf_trunc (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_ceil (mpf_t @var{rop}, const mpf_t @var{op})
+@deftypefunx void mpf_floor (mpf_t @var{rop}, const mpf_t @var{op})
+@deftypefunx void mpf_trunc (mpf_t @var{rop}, const mpf_t @var{op})
  @cindex Rounding functions
  @cindex Float rounding functions
  Set @var{rop} to @var{op} rounded to an integer.  @code{mpf_ceil} rounds to the
@@ -5038,16 +5034,16 @@ next higher integer, @code{mpf_floor} to the next lower, and @code{mpf_trunc}
  to the integer towards zero.
  @end deftypefun
  
-@deftypefun int mpf_integer_p (mpf_t @var{op})
+@deftypefun int mpf_integer_p (const mpf_t @var{op})
  Return non-zero if @var{op} is an integer.
  @end deftypefun
  
-@deftypefun int mpf_fits_ulong_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_slong_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_uint_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_sint_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_ushort_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_sshort_p (mpf_t @var{op})
+@deftypefun int mpf_fits_ulong_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_slong_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_uint_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_sint_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_ushort_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_sshort_p (const mpf_t @var{op})
  Return non-zero if @var{op} would fit in the respective C data type, when
  truncated to an integer.
  @end deftypefun
@@ -5073,7 +5069,7 @@ numbers have proven to be more likely to trigger corner-case bugs.  Negative
  random numbers are generated when @var{max_size} is negative.
  @end deftypefun
  
-@c @deftypefun size_t mpf_size (mpf_t @var{op})
+@c @deftypefun size_t mpf_size (const mpf_t @var{op})
  @c Return the size of @var{op} measured in number of limbs.  If @var{op} is
  @c zero, the returned value will be zero.  (@xref{Nomenclature}, for an
  @c explanation of the concept @dfn{limb}.)
@@ -5195,7 +5191,7 @@ This function requires that @var{s1n} is greater than or equal to
  @var{s2n}.
  @end deftypefun
  
-@deftypefun void mpn_neg (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n})
+@deftypefun mp_limb_t mpn_neg (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n})
  Perform the negation of @{@var{sp}, @var{n}@}, and write the result to
  @{@var{rp}, @var{n}@}.  Return carry-out.
  @end deftypefun
@@ -5393,10 +5389,9 @@ Set @{@var{rp}, @var{retval}@} to the greatest common divisor of @{@var{xp},
  the return value is the actual number produced.  Both source operands are
  destroyed.
  
-@{@var{xp}, @var{xn}@} must have at least as many bits as @{@var{yp},
-@var{yn}@}.  @{@var{yp}, @var{yn}@} must be odd.  Both operands must have
-non-zero most significant limbs.  No overlap is permitted between @{@var{xp},
-@var{xn}@} and @{@var{yp}, @var{yn}@}.
+It is required that @math{@var{xn} @ge @var{yn} > 0}, and the most significant
+limb of @{@var{yp}, @var{yn}@} must be non-zero.  No overlap is permitted
+between @{@var{xp}, @var{xn}@} and @{@var{yp}, @var{yn}@}.
  @end deftypefun
  
  @deftypefun mp_limb_t mpn_gcd_1 (const mp_limb_t *@var{xp}, mp_size_t @var{xn}, mp_limb_t @var{ylimb})
@@ -5451,7 +5446,7 @@ case the return value is zero or non-zero according to whether the remainder
  would have been zero or non-zero.
  
  A return value of zero indicates a perfect square.  See also
-@code{mpz_perfect_square_p}.
+@code{mpn_perfect_square_p}.
  @end deftypefun
  
  @deftypefun mp_size_t mpn_get_str (unsigned char *@var{str}, int @var{base}, mp_limb_t *@var{s1p}, mp_size_t @var{s1n})
@@ -5699,7 +5694,7 @@ Initialize @var{state} for a Mersenne Twister algorithm.  This algorithm is
  fast and has good randomness properties.
  @end deftypefun
  
-@deftypefun void gmp_randinit_lc_2exp (gmp_randstate_t @var{state}, mpz_t @var{a}, @w{unsigned long @var{c}}, @w{mp_bitcnt_t @var{m2exp}})
+@deftypefun void gmp_randinit_lc_2exp (gmp_randstate_t @var{state}, const mpz_t @var{a}, @w{unsigned long @var{c}}, @w{mp_bitcnt_t @var{m2exp}})
  @cindex Linear congruential random numbers
  Initialize @var{state} with a linear congruential algorithm @m{X = (@var{a}X +
  @var{c}) @bmod 2^{m2exp}, X = (@var{a}*X + @var{c}) mod 2^@var{m2exp}}.
@@ -5767,7 +5762,7 @@ Free all memory occupied by @var{state}.
  @cindex Random number seeding
  @cindex Seeding random numbers
  
-@deftypefun void gmp_randseed (gmp_randstate_t @var{state}, mpz_t @var{seed})
+@deftypefun void gmp_randseed (gmp_randstate_t @var{state}, const mpz_t @var{seed})
  @deftypefunx void gmp_randseed_ui (gmp_randstate_t @var{state}, @w{unsigned long int @var{seed}})
  Set an initial seed value into @var{state}.
  
@@ -6404,7 +6399,7 @@ results.  For classes with overloading, see @ref{C++ Class Interface}.
  
  
  
-@node C++ Class Interface, BSD Compatible Functions, Formatted Input, Top
+@node C++ Class Interface, Custom Allocation, Formatted Input, Top
  @chapter C++ Class Interface
  @cindex C++ interface
  
@@ -6542,8 +6537,10 @@ anything.
  @deftypefun {} mpz_class::mpz_class (type @var{n})
  Construct an @code{mpz_class}.  All the standard C++ types may be used, except
  @code{long long} and @code{long double}, and all the GMP C++ classes can be
-used.  Any necessary conversion follows the corresponding C function, for
-example @code{double} follows @code{mpz_set_d} (@pxref{Assigning Integers}).
+used, although conversions from @code{mpq_class} and @code{mpf_class} are
+@code{explicit}.  Any necessary conversion follows the corresponding C
+function, for example @code{double} follows @code{mpz_set_d}
+(@pxref{Assigning Integers}).
  @end deftypefun
  
  @deftypefun explicit mpz_class::mpz_class (mpz_t @var{z})
@@ -6561,6 +6558,11 @@ If the string is not a valid integer, an @code{std::invalid_argument}
  exception is thrown.  The same applies to @code{operator=}.
  @end deftypefun
  
+@deftypefun mpz_class operator"" _mpz (const char *@var{str})
+With C++11 compilers, integers can be constructed with the syntax
+@code{123_mpz} which is equivalent to @code{mpz_class("123")}.
+@end deftypefun
+
  @deftypefun mpz_class operator/ (mpz_class @var{a}, mpz_class @var{d})
  @deftypefunx mpz_class operator% (mpz_class @var{a}, mpz_class @var{d})
  Divisions involving @code{mpz_class} round towards zero, as per the
@@ -6577,7 +6579,7 @@ mpz_fdiv_q (q.get_mpz_t(), a.get_mpz_t(), d.get_mpz_t());
  @end example
  @end deftypefun
  
-@deftypefun mpz_class abs (mpz_class @var{op1})
+@deftypefun mpz_class abs (mpz_class @var{op})
  @deftypefunx int cmp (mpz_class @var{op1}, type @var{op2})
  @deftypefunx int cmp (type @var{op1}, mpz_class @var{op2})
  @maybepagebreak
@@ -6598,6 +6600,9 @@ mpz_fdiv_q (q.get_mpz_t(), a.get_mpz_t(), d.get_mpz_t());
  @deftypefunx int mpz_class::set_str (const string& @var{str}, int @var{base})
  @deftypefunx int sgn (mpz_class @var{op})
  @deftypefunx mpz_class sqrt (mpz_class @var{op})
+@maybepagebreak
+@deftypefunx void mpz_class::swap (mpz_class& @var{op})
+@deftypefunx void swap (mpz_class& @var{op1}, mpz_class& @var{op2})
  These functions provide a C++ class interface to the corresponding GMP C
  routines.
  
@@ -6626,9 +6631,10 @@ canonical form, or if not then @code{mpq_class::canonicalize} called.
  @deftypefun {} mpq_class::mpq_class (type @var{op})
  @deftypefunx {} mpq_class::mpq_class (integer @var{num}, integer @var{den})
  Construct an @code{mpq_class}.  The initial value can be a single value of any
-type, or a pair of integers (@code{mpz_class} or standard C++ integer types)
-representing a fraction, except that @code{long long} and @code{long double}
-are not supported.  For example,
+type (conversion from @code{mpf_class} is @code{explicit}), or a pair of
+integers (@code{mpz_class} or standard C++ integer types) representing a
+fraction, except that @code{long long} and @code{long double} are not
+supported.  For example,
  
  @example
  mpq_class q (99);
@@ -6652,6 +6658,12 @@ If the string is not a valid rational, an @code{std::invalid_argument}
  exception is thrown.  The same applies to @code{operator=}.
  @end deftypefun
  
+@deftypefun mpq_class operator"" _mpq (const char *@var{str})
+With C++11 compilers, integral rationals can be constructed with the syntax
+@code{123_mpq} which is equivalent to @code{mpq_class(123_mpz)}. Other
+rationals can be built as @code{-1_mpq/2} or @code{0xb_mpq/123456_mpz}.
+@end deftypefun
+
  @deftypefun void mpq_class::canonicalize ()
  Put an @code{mpq_class} into canonical form, as per @ref{Rational Number
  Functions}.  All arithmetic operators require their operands in canonical
@@ -6668,6 +6680,9 @@ form, and will return results in canonical form.
  @deftypefunx int mpq_class::set_str (const char *@var{str}, int @var{base})
  @deftypefunx int mpq_class::set_str (const string& @var{str}, int @var{base})
  @deftypefunx int sgn (mpq_class @var{op})
+@maybepagebreak
+@deftypefunx void mpq_class::swap (mpq_class& @var{op})
+@deftypefunx void swap (mpq_class& @var{op1}, mpq_class& @var{op2})
  These functions provide a C++ class interface to the corresponding GMP C
  routines.
  
@@ -6760,6 +6775,11 @@ If the string is not a valid float, an @code{std::invalid_argument} exception
  is thrown.  The same applies to @code{operator=}.
  @end deftypefun
  
+@deftypefun mpf_class operator"" _mpf (const char *@var{str})
+With C++11 compilers, floats can be constructed with the syntax
+@code{1.23e-1_mpf} which is equivalent to @code{mpf_class("1.23e-1")}.
+@end deftypefun
+
  @deftypefun {mpf_class&} mpf_class::operator= (type @var{op})
  Convert and store the given @var{op} value to an @code{mpf_class} object.  The
  same types are accepted as for the constructors above.
@@ -6810,6 +6830,9 @@ with the builtin float types.
  @deftypefunx int mpf_class::set_str (const string& @var{str}, int @var{base})
  @deftypefunx int sgn (mpf_class @var{op})
  @deftypefunx mpf_class sqrt (mpf_class @var{op})
+@maybepagebreak
+@deftypefunx void mpf_class::swap (mpf_class& @var{op})
+@deftypefunx void swap (mpf_class& @var{op1}, mpf_class& @var{op2})
  @deftypefunx mpf_class trunc (mpf_class @var{op})
  These functions provide a C++ class interface to the corresponding GMP C
  routines.
@@ -6871,7 +6894,7 @@ Seed a random number generator.  See @pxref{Random Number Functions}, for how
  to choose a good seed.
  @end deftypefun
  
-@deftypefun mpz_class gmp_randclass::get_z_bits (unsigned long @var{bits})
+@deftypefun mpz_class gmp_randclass::get_z_bits (mp_bitcnt_t @var{bits})
  @deftypefunx mpz_class gmp_randclass::get_z_bits (mpz_class @var{bits})
  Generate a random integer with a specified number of bits.
  @end deftypefun
@@ -6981,131 +7004,7 @@ void fun (T f, T g)
  @end table
  
  
-@node BSD Compatible Functions, Custom Allocation, C++ Class Interface, Top
-@comment  node-name,  next,  previous,  up
-@chapter Berkeley MP Compatible Functions
-@cindex Berkeley MP compatible functions
-@cindex BSD MP compatible functions
-
-These functions are intended to be fully compatible with the Berkeley MP
-library which is available on many BSD derived U*ix systems.  The
-@samp{--enable-mpbsd} option must be used when building GNU MP to make these
-available (@pxref{Installing GMP}).
-
-The original Berkeley MP library has a usage restriction: you cannot use the
-same variable as both source and destination in a single function call.  The
-compatible functions in GNU MP do not share this restriction---inputs and
-outputs may overlap.
-
-It is not recommended that new programs are written using these functions.
-Apart from the incomplete set of functions, the interface for initializing
-@code{MINT} objects is more error prone, and the @code{pow} function collides
-with @code{pow} in @file{libm.a}.
-
-@cindex @code{mp.h}
-@tindex MINT
-Include the header @file{mp.h} to get the definition of the necessary types and
-functions.  If you are on a BSD derived system, make sure to include GNU
-@file{mp.h} if you are going to link the GNU @file{libmp.a} to your program.
-This means that you probably need to give the @samp{-I<dir>} option to the
-compiler, where @samp{<dir>} is the directory where you have GNU @file{mp.h}.
-
-@deftypefun {MINT *} itom (signed short int @var{initial_value})
-Allocate an integer consisting of a @code{MINT} object and dynamic limb space.
-Initialize the integer to @var{initial_value}.  Return a pointer to the
-@code{MINT} object.
-@end deftypefun
-
-@deftypefun {MINT *} xtom (char *@var{initial_value})
-Allocate an integer consisting of a @code{MINT} object and dynamic limb space.
-Initialize the integer from @var{initial_value}, a hexadecimal,
-null-terminated C string.  Return a pointer to the @code{MINT} object.
-@end deftypefun
-
-@deftypefun void move (MINT *@var{src}, MINT *@var{dest})
-Set @var{dest} to @var{src} by copying.  Both variables must be previously
-initialized.
-@end deftypefun
-
-@deftypefun void madd (MINT *@var{src_1}, MINT *@var{src_2}, MINT *@var{destination})
-Add @var{src_1} and @var{src_2} and put the sum in @var{destination}.
-@end deftypefun
-
-@deftypefun void msub (MINT *@var{src_1}, MINT *@var{src_2}, MINT *@var{destination})
-Subtract @var{src_2} from @var{src_1} and put the difference in
-@var{destination}.
-@end deftypefun
-
-@deftypefun void mult (MINT *@var{src_1}, MINT *@var{src_2}, MINT *@var{destination})
-Multiply @var{src_1} and @var{src_2} and put the product in @var{destination}.
-@end deftypefun
-
-@deftypefun void mdiv (MINT *@var{dividend}, MINT *@var{divisor}, MINT *@var{quotient}, MINT *@var{remainder})
-@deftypefunx void sdiv (MINT *@var{dividend}, signed short int @var{divisor}, MINT *@var{quotient}, signed short int *@var{remainder})
-Set @var{quotient} to @var{dividend}/@var{divisor}, and @var{remainder} to
-@var{dividend} mod @var{divisor}.  The quotient is rounded towards zero; the
-remainder has the same sign as the dividend unless it is zero.
-
-Some implementations of these functions work differently---or not at all---for
-negative arguments.
-@end deftypefun
-
-@deftypefun void msqrt (MINT *@var{op}, MINT *@var{root}, MINT *@var{remainder})
-Set @var{root} to @m{\lfloor\sqrt{@var{op}}\rfloor, the truncated integer part
-of the square root of @var{op}}, like @code{mpz_sqrt}.  Set @var{remainder} to
-@m{(@var{op} - @var{root}^2), @var{op}@minus{}@var{root}*@var{root}}, i.e.
-zero if @var{op} is a perfect square.
-
-If @var{root} and @var{remainder} are the same variable, the results are
-undefined.
-@end deftypefun
-
-@deftypefun void pow (MINT *@var{base}, MINT *@var{exp}, MINT *@var{mod}, MINT *@var{dest})
-Set @var{dest} to (@var{base} raised to @var{exp}) modulo @var{mod}.
-
-Note that the name @code{pow} clashes with @code{pow} from the standard C math
-library (@pxref{Exponents and Logarithms,, Exponentiation and Logarithms,
-libc, The GNU C Library Reference Manual}).  An application will only be able
-to use one or the other.
-@end deftypefun
-
-@deftypefun void rpow (MINT *@var{base}, signed short int @var{exp}, MINT *@var{dest})
-Set @var{dest} to @var{base} raised to @var{exp}.
-@end deftypefun
-
-@deftypefun void gcd (MINT *@var{op1}, MINT *@var{op2}, MINT *@var{res})
-Set @var{res} to the greatest common divisor of @var{op1} and @var{op2}.
-@end deftypefun
-
-@deftypefun int mcmp (MINT *@var{op1}, MINT *@var{op2})
-Compare @var{op1} and @var{op2}.  Return a positive value if @var{op1} >
-@var{op2}, zero if @var{op1} = @var{op2}, and a negative value if @var{op1} <
-@var{op2}.
-@end deftypefun
-
-@deftypefun void min (MINT *@var{dest})
-Input a decimal string from @code{stdin}, and put the read integer in
-@var{dest}.  SPC and TAB are allowed in the number string, and are ignored.
-@end deftypefun
-
-@deftypefun void mout (MINT *@var{src})
-Output @var{src} to @code{stdout}, as a decimal string.  Also output a newline.
-@end deftypefun
-
-@deftypefun {char *} mtox (MINT *@var{op})
-Convert @var{op} to a hexadecimal string, and return a pointer to the string.
-The returned string is allocated using the default memory allocation function,
-@code{malloc} by default.  It will be @code{strlen(str)+1} bytes, that being
-exactly enough for the string and null-terminator.
-@end deftypefun
-
-@deftypefun void mfree (MINT *@var{op})
-De-allocate, the space used by @var{op}.  @strong{This function should only be
-passed a value returned by @code{itom} or @code{xtom}.}
-@end deftypefun
-
-
-@node Custom Allocation, Language Bindings, BSD Compatible Functions, Top
+@node Custom Allocation, Language Bindings, C++ Class Interface, Top
  @comment  node-name,  next,  previous,  up
  @chapter Custom Allocation
  @cindex Custom allocation
@@ -7119,9 +7018,6 @@ and terminates the program.
  Alternate functions can be specified, to allocate memory in a different way or
  to have a different error action on running out of memory.
  
-This feature is available in the Berkeley compatibility library (@pxref{BSD
-Compatible Functions}) as well as the main GMP library.
-
  @deftypefun void mp_set_memory_functions (@* void *(*@var{alloc_func_ptr}) (size_t), @* void *(*@var{realloc_func_ptr}) (void *, size_t, size_t), @* void (*@var{free_func_ptr}) (void *, size_t))
  Replace the current allocation functions from the arguments.  If an argument
  is @code{NULL}, the corresponding default function is used.
@@ -8458,9 +8354,10 @@ products must be done as a normal division, but there's still some single limb
  divisions saved.  When @math{d} is a single limb some simplifications arise,
  providing good speedups on a number of processors.
  
-@code{mpn_divexact_by3}, @code{mpn_modexact_1_odd} and the @code{mpn_redc_X}
-functions differ subtly in how they return @math{r}, leading to some negations
-in the above formula, but all are essentially the same.
+The functions @code{mpn_divexact_by3}, @code{mpn_modexact_1_odd} and the
+internal @code{mpn_redc_X} functions differ subtly in how they return @math{r},
+leading to some negations in the above formula, but all are essentially the
+same.
  
  @cindex Divisibility algorithm
  @cindex Congruence algorithm
@@ -8724,6 +8621,9 @@ current size of the cofactors.
  @subsection Jacobi Symbol
  @cindex Jacobi symbol algorithm
  
+[This section is obsolete.  The current Jacobi code actually uses a very
+efficient algorithm.]
+
  @code{mpz_jacobi} and @code{mpz_kronecker} are currently implemented with a
  simple binary algorithm similar to that described for the GCDs (@pxref{Binary
  GCD}).  They're not very fast when both inputs are large.  Lehmer's multi-step
@@ -8770,7 +8670,7 @@ exponent.  Larger exponents use larger values of @math{k}, the choice being
  made to minimize the average number of multiplications that must supplement
  the squaring.
  
-The modular multiplies and squares use either a simple division or the REDC
+The modular multiplies and squarings use either a simple division or the REDC
  method by Montgomery (@pxref{References}).  REDC is a little faster,
  essentially saving N single limb divisions in a fashion similar to an exact
  remainder (@pxref{Exact Remainder}).
@@ -9125,42 +9025,75 @@ for an arbitrary @math{n}.
  @subsection Factorial
  @cindex Factorial algorithm
  
-Factorials are calculated by a combination of removal of twos, powering, and
-binary splitting.  The procedure can be best illustrated with an example,
+Factorials are calculated by a combination of two algorithms. An idea is
+shared among them: to compute the odd part of the factorial; a final step
+takes account of the power of @math{2} term, by shifting.
+
+For small @math{n}, the odd factor of @math{n!} is computed with the simple
+observation that it is equal to the product of all positive odd numbers
+smaller than @math{n} times the odd factor of @m{\lfloor n/2\rfloor!, [n/2]!},
+where @m{\lfloor x\rfloor, [x]} is the integer part of @math{x}, and so on
+recursively. The procedure can be best illustrated with an example,
  
  @quotation
-@math{23! = 1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20.21.22.23}
+@math{23! = (23.21.19.17.15.13.11.9.7.5.3)(11.9.7.5.3)(5.3)2^{19}}
  @end quotation
  
-@noindent
-has factors of two removed,
+Current code collects all the factors in a single list, with a loop and no
+recursion, and compute the product, with no special care for repeated chunks.
  
-@quotation
-@math{23! = 2^{19}.1.1.3.1.5.3.7.1.9.5.11.3.13.7.15.1.17.9.19.5.21.11.23}
-@end quotation
+When @math{n} is larger, computation pass trough prime sieving. An helper
+function is used, as suggested by Peter Luschny:
+@tex
+$$\mathop{\rm msf}(n) = {n!\over\lfloor n/2\rfloor!^2\cdot2^k} = \prod_{p=3}^{n}
+p^{\mathop{\rm L}(p,n)} $$
+@end tex
+@ifnottex
  
-@noindent
-and the resulting terms collected up according to their multiplicity,
+@example
+                            n
+                          -----
+               n!          | |   L(p,n)
+msf(n) = -------------- =  | |  p
+          [n/2]!^2.2^k     p=3
+@end example
+@end ifnottex
  
-@quotation
-@math{23! = 2^{19}.(3.5)^3.(7.9.11)^2.(13.15.17.19.21.23)}
-@end quotation
+Where @math{p} ranges on odd prime numbers. The exponent @math{k} is chosen to
+obtain an odd integer number: @math{k} is the number of 1 bits in the binary
+representation of @m{\lfloor n/2\rfloor, [n/2]}. The function L@math{(p,n)}
+can be defined as zero when @math{p} is composite, and, for any prime
+@math{p}, it is computed with:
+@tex
+$$\mathop{\rm L}(p,n) = \sum_{i>0}\left\lfloor{n\over p^i}\right\rfloor\bmod2
+\leq\log_p(n)$$
+@end tex
+@ifnottex
  
-Each sequence such as @math{13.15.17.19.21.23} is evaluated by splitting into
-every second term, as for instance @math{(13.17.21).(15.19.23)}, and the same
-recursively on each half.  This is implemented iteratively using some bit
-twiddling.
+@example
+          ---
+           \    n
+L(p,n) =   /  [---] mod 2   <=  log (n) .
+          ---  p^i                p
+          i>0
+@end example
+@end ifnottex
+
+With this helper function, we are able to compute the odd part of @math{n!}
+using the recursion implied by @m{n!=\lfloor n/2\rfloor!^2\cdot\mathop{\rm
+msf}(n)\cdot2^k , n!=[n/2]!^2*msf(n)*2^k}. The recursion stops using the
+small-@math{n} algorithm on some @m{\lfloor n/2^i\rfloor, [n/2^i]}.
+
+Both the above algorithms use binary splitting to compute the product of many
+small factors. At first as many products as possible are accumulated in a
+single register, generating a list of factors that fit in a machine word. This
+list is then split into halves, and the product is computed recursively.
  
  Such splitting is more efficient than repeated N@cross{}1 multiplies since it
  forms big multiplies, allowing Karatsuba and higher algorithms to be used.
  And even below the Karatsuba threshold a big block of work can be more
  efficient for the basecase algorithm.
  
-Splitting into subsequences of every second term keeps the resulting products
-more nearly equal in size than would the simpler approach of say taking the
-first half and second half of the sequence.  Nearly equal products are more
-efficient for the current multiply implementation.
-
  
  @node Binomial Coefficients Algorithm, Fibonacci Numbers Algorithm, Factorial Algorithm, Other Algorithms
  @subsection Binomial Coefficients
@@ -9340,7 +9273,7 @@ are also very good and this is the default algorithm used by GMP.
  @cindex Linear congruential algorithm
  Linear congruential generators are described in many text books, for instance
  Knuth volume 2 (@pxref{References}).  With a modulus @math{M} and parameters
-@math{A} and @math{C}, a integer state @math{S} is iterated by the formula
+@math{A} and @math{C}, an integer state @math{S} is iterated by the formula
  @math{S @leftarrow{} A@GMPmultiply{}S+C @bmod{} M}.  At each step the new
  state is a linear function of the previous, mod @math{M}, hence the name of
  the generator.
@@ -10446,11 +10379,12 @@ Jason Moxham rewrote @code{mpz_fac_ui}.
  Pedro Gimeno implemented the Mersenne Twister and made other random number
  improvements.
  
-Niels M@"oller wrote the sub-quadratic GCD and extended GCD code, the
+Niels M@"oller wrote the sub-quadratic GCD, extended GCD and jacobi code, the
  quadratic Hensel division code, and (with Torbj@"orn) the new divide and
  conquer division code for GMP 4.3.  Niels also helped implement the new Toom
  multiply code for GMP 4.3 and implemented helper functions to simplify Toom
-evaluations for GMP 5.0.  He wrote the original version of mpn_mulmod_bnm1.
+evaluations for GMP 5.0.  He wrote the original version of mpn_mulmod_bnm1, and
+he is the main author of the mini-gmp package used for gmp bootstrapping.
  
  Alberto Zanoni and Marco Bodrato suggested the unbalanced multiply strategy,
  and found the optimal strategies for evaluation and interpolation in Toom
@@ -10459,14 +10393,26 @@ multiplication.
  Marco Bodrato helped implement the new Toom multiply code for GMP 4.3 and
  implemented most of the new Toom multiply and squaring code for 5.0.
  He is the main author of the current mpn_mulmod_bnm1 and mpn_mullo_n.  Marco
-also wrote the functions mpn_invert and mpn_invertappr.
+also wrote the functions mpn_invert and mpn_invertappr.  He is the author of
+the current combinatorial functions: binomial, factorial, multifactorial,
+primorial.
  
  David Harvey suggested the internal function @code{mpn_bdiv_dbm1}, implementing
  division relevant to Toom multiplication.  He also worked on fast assembly
-sequences, in particular on a fast AMD64 @code{mpn_mul_basecase}.
+sequences, in particular on a fast AMD64 @code{mpn_mul_basecase}. He wrote
+the internal middle product functions @code{mpn_mulmid_basecase},
+@code{mpn_toom42_mulmid}, @code{mpn_mulmid_n} and related helper routines.
  
  Martin Boij wrote @code{mpn_perfect_power_p}.
  
+Marc Glisse improved @file{gmpxx.h}: use fewer temporaries (faster),
+specializations of @code{numeric_limits} and @code{common_type}, C++11
+features (move constructors, explicit bool conversion, UDL), make the
+conversion from @code{mpq_class} to @code{mpz_class} explicit, optimize
+operations where one argument is a small compile-time constant, replace
+some heap allocations by stack allocations.  He also fixed the eofbit
+handling of C++ streams, and removed one division from @file{mpq/aors.c}.
+
  (This list is chronological, not ordered after significance.  If you have
  contributed to GMP but are not listed above, please tell
  @email{gmp-devel@@gmplib.org} about the omission!)
diff --git a/doc/projects.html b/doc/projects.html

index 79e5aa23b13a9d9e2449b5aae65098ebd3cbd920..35caf59fa7e668f9faf624219f478c7e7956ffe4 100644 (file)
--- a/doc/projects.html
+++ b/doc/projects.html
@@ -15,8 +15,8 @@
  
  <font size=-1>
  <pre>
-Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
-Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2010, 2011
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -37,7 +37,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  <hr>
  <!-- NB. timestamp updated automatically by emacs -->
-  This file current as of 15 Nov 2009.  An up-to-date version is available at
+  This file current as of 5 Dec 2011.  An up-to-date version is available at
    <a href="http://gmplib.org/projects.html">http://gmplib.org/projects.html</a>.
    Please send comments about this page to gmp-devel<font>@</font>gmplib.org.
  
@@ -53,27 +53,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  <ul>
  <li> <strong>Faster multiplication</strong>
  
-  <p> The current multiplication code uses Karatsuba, 3-way and 4-way Toom, and
-      Fermat FFT.  Several new developments are desirable:
-
    <ol>
  
-    <li> Write more toom multiply functions for unbalanced operands.  We now have
-        toom22, toom32, toom42, toom62, toom33, toom53, and toom44.  Most
-        desirable is toom43, which will require a new toom_interpolate_6pts
-        function.  Writing toom52 will then be straightforward.  See also
-        <a href="http://bodrato.it/software/toom.html">Marco Bodrato's
-        site</a>
-
-    <li> Perhaps consider N-way Toom, N > 4.  See Knuth's Seminumerical
-        Algorithms for details on the method, as well as Bodrato's site.  Code
-        implementing it exists.  This is asymptotically inferior to FFTs, but
-        is finer grained.
-
-    <li> The mpn_mul call now (from GMP 4.3) uses toom22, toom32, and toom42
-        for unbalanced operations.  We don't use any of the other new toom
-        functions currently.  Write new clever code for choosing the best toom
-        function from an m-limb and an n-limb operand.
+    <li> Work on the algorithm selection code for unbalanced multiplication.
  
      <li> Implement an FFT variant computing the coefficients mod m different
          limb size primes of the form l*2^k+1. i.e., compute m separate FFTs.
@@ -92,16 +74,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
          <p> [We now have two implementations of this algorithm, one by Tommy
          Färnqvist and one by Niels Möller.]
  
-    <li> Add support for short products, either a given number of low limbs, a
-        given number of high limbs, or perhaps the middle limbs of the result.
-        High short product can be used by <code>mpf_mul</code>, by
-        left-to-right Newton approximations, and for quotient approximation.
-        Low half short product can be of use in sub-quadratic REDC and for
-        right-to-left Newton approximations.  On small sizes a short product
-        will be faster simply through fewer cross-products, similar to the way
-        squaring is faster.  But work by Thom Mulders shows that for Karatsuba
-        and higher order algorithms the advantage is progressively lost, so
-        for large sizes shows products turn out to be no faster.
+    <li> Work on short products.  Our mullo and mulmid are probably K, but we
+         lack mulhi.
  
    </ol>
  
@@ -121,8 +95,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
    <p> Please make sure your new routines are fast for these three situations:
        <ol>
-       <li> Operands that fit into the cache.
         <li> Small operands of less than, say, 10 limbs.
+       <li> Medium size operands, that fit into the cache.
         <li> Huge operands that does not fit into the cache.
        </ol>
  
@@ -145,18 +119,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
        21-bit pieces if one allows the split operands to be negative!)
  
  
-<li> <strong>Math functions for the mpf layer</strong>
-
-  <p> Implement the functions of math.h for the GMP mpf layer! Check the book
-      "Pi and the AGM" by Borwein and Borwein for ideas how to do this.  These
-      functions are desirable: acos, acosh, asin, asinh, atan, atanh, atan2,
-      cos, cosh, exp, log, log10, pow, sin, sinh, tan, tanh.
-
-  <p> Note that the <a href="http://mpfr.org">mpfr</a> functions already
-  provide these functions, and that we usually recommend new programs to use
-  mpfr instead of mpf.
-
-
  <li> <strong>Faster sqrt</strong>
  
    <p> The current code uses divisions, which are reasonably fast, but it'd be
@@ -180,9 +142,24 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  <li> <strong>Nth root</strong>
  
-  <p> Improve mpn_rootrem.  The current code is not too bad, but its average
-      time complexity is a function of the input, while it is possible to
-      make it a function of the output.
+  <p> Improve mpn_rootrem.  The current code is not too bad, but its time
+      complexity is a function of the input, while it is possible to make
+      the <i>average</i> complexity a function of the output.
+
+
+<li> <strong>Fat binaries</strong>
+
+  <p> Add more functions to the set of fat functions.
+
+  <p> The speed of multipliciaton is today highly dependent on combination
+  functions like <code>addlsh1_n</code>.  A fat binary will never use any such
+  functions, since they are classified as optional.  Ideally, we should use
+  them, but making the current compile-time selections of optional functions
+  become run-time selections for fat binaries.
+
+  <p> If we make fat binaries work really well, we should move away frm tehe
+  current configure scheme (at least by default) and instead include all code
+  always.
  
  
  <li> <strong>Exceptions</strong>
@@ -343,131 +320,15 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
        <code>gmp_restrict</code>.
  
  
-<li> <strong>Nx1 Division</strong>
-
-  <p> The limb-by-limb dependencies in the existing Nx1 division (and
-      remainder) code means that chips with multiple execution units or
-      pipelined multipliers are not fully utilized.
-
-  <p> One possibility is to follow the current preinv method but taking two
-      limbs at a time.  That means a 2x2-&gt;4 and a 2x1-&gt;2 multiply for
-      each two limbs processed, and because the 2x2 and 2x1 can each be done in
-      parallel the latency will be not much more than 2 multiplies for two
-      limbs, whereas the single limb method has a 2 multiply latency for just
-      one limb.  A version of <code>mpn_divrem_1</code> doing this has been
-      written in C, but not yet tested on likely chips.  Clearly this scheme
-      would extend to 3x3-&gt;9 and 3x1-&gt;3 etc, though with diminishing
-      returns.
-
-  <p> For <code>mpn_mod_1</code>, Peter L. Montgomery proposes the following
-      scheme.  For a limb R=2^<code>bits_per_mp_limb</code>, pre-calculate
-      values R mod N, R^2 mod N, R^3 mod N, R^4 mod N.  Then take dividend
-      limbs and multiply them by those values, thereby reducing them (moving
-      them down) by the corresponding factor.  The products can be added to
-      produce an intermediate remainder of 2 or 3 limbs to be similarly
-      included in the next step.  The point is that such multiplies can be done
-      in parallel, meaning as little as 1 multiply worth of latency for 4
-      limbs.  If the modulus N is less than R/4 (or is it R/5?) the summed
-      products will fit in 2 limbs, otherwise 3 will be required, but with the
-      high only being small.  Clearly this extends to as many factors of R as a
-      chip can efficiently apply.
-
-  <p> The logical conclusion for powers R^i is a whole array "p[i] = R^i mod N"
-      for i up to k, the size of the dividend.  This could then be applied at
-      multiplier throughput speed like an inner product.  If the powers took
-      roughly k divide steps to calculate then there'd be an advantage any time
-      the same N was used three or more times.  Suggested by Victor Shoup in
-      connection with chinese-remainder style decompositions, but perhaps with
-      other uses.
-
-  <p> <code>mpn_modexact_1_odd</code> calculates an x in the range 0&lt;=x&lt;d
-      satisfying a = q*d + x*b^n, where b=2^bits_per_limb.  The factor b^n
-      needed to get the true remainder r could be calculated by a powering
-      algorithm, allowing <code>mpn_modexact_1_odd</code> to be pressed into
-      service for an <code>mpn_mod_1</code>.  <code>modexact_1</code> is
-      simpler and on some chips can run noticeably faster than plain
-      <code>mod_1</code>, on Athlon for instance 11 cycles/limb instead of 17.
-      Such a difference could soon overcome the time to calculate b^n.  The
-      requirement for an odd divisor in <code>modexact</code> can be handled by
-      some shifting on-the-fly, or perhaps by an extra partial-limb step at the
-      end.
-
  
  <li> <strong>Factorial</strong>
  
-  <p> The removal of twos in the current code could be extended to factors of 3
-      or 5.  Taking this to its logical conclusion would be a complete
-      decomposition into powers of primes.  The power for a prime p is of
-      course floor(n/p)+floor(n/p^2)+...  Conrad Curry found this is quite fast
-      (using simultaneous powering as per Handbook of Applied Cryptography
-      algorithm 14.88).
-
-  <p> A difficulty with using all primes is that quite large n can be
-      calculated on a system with enough memory, larger than we'd probably want
-      for a table of primes, so some sort of sieving would be wanted.  Perhaps
-      just taking out the factors of 3 and 5 would give most of the speedup
-      that a prime decomposition can offer.
+  <p> Rewrite for simplicty and speed.  Work is in progress.
  
  
  <li> <strong>Binomial Coefficients</strong>
  
-  <p> An obvious improvement to the current code would be to strip factors of 2
-      from each multiplier and divisor and count them separately, to be applied
-      with a bit shift at the end.  Factors of 3 and perhaps 5 could even be
-      handled similarly.
-
-  <p> Conrad Curry reports a big speedup for binomial coefficients using a
-      prime powering scheme, at least for k near n/2.  Of course this is only
-      practical for moderate size n since again it requires primes up to n.
-
-  <p> When k is small the current (n-k+1)...n/1...k will be fastest.  Some sort
-      of rule would be needed for when to use this or when to use prime
-      powering.  Such a rule will be a function of both n and k.  Some
-      investigation is needed to see what sort of shape the crossover line will
-      have, the usual parameter tuning can of course find machine dependent
-      constants to fill in where necessary.
-
-  <p> An easier possibility also reported by Conrad Curry is that it may be
-      faster not to divide out the denominator (1...k) one-limb at a time, but
-      do one big division at the end.  Is this because a big divisor in
-      <code>mpn_bdivmod</code> trades the latency of
-      <code>mpn_divexact_1</code> for the throughput of
-      <code>mpn_submul_1</code>?  Overheads must hurt though.
-
-  <p> Another reason a big divisor might help is that
-      <code>mpn_divexact_1</code> won't be getting a full limb in
-      <code>mpz_bin_uiui</code>.  It's called when the n accumulator is full
-      but the k may be far from full.  Perhaps the two could be decoupled so k
-      is applied when full.  It'd be necessary to delay consideration of k
-      terms until the corresponding n terms had been applied though, since
-      otherwise the division won't be exact.
-
-
-<li> <strong>Perfect Power Testing</strong>
-
-  <p> <code>mpz_perfect_power_p</code> could be improved in a number of ways,
-      for instance p-adic arithmetic to find possible roots.
-
-  <p> Non-powers can be quickly identified by checking for Nth power residues
-      modulo small primes, like <code>mpn_perfect_square_p</code> does for
-      squares.  The residues to each power N for a given remainder could be
-      grouped into a bit mask, the masks for the remainders to each divisor
-      would then be "and"ed together to hopefully leave only a few candidate
-      powers.  Need to think about how wide to make such masks, ie. how many
-      powers to examine in this way.
-
-  <p> Any zero remainders found in residue testing reveal factors which can be
-      divided out, with the multiplicity restricting the powers that need to be
-      considered, as per the current code.  Further prime dividing should be
-      grouped into limbs like <code>PP</code>.  Need to think about how much
-      dividing to do like that, probably more for bigger inputs, less for
-      smaller inputs.
-
-  <p> <code>mpn_gcd_1</code> would probably be better than the current private
-      GCD routine.  The use it's put to isn't time-critical, and it might help
-      ensure correctness to just use the main GCD routine.
-
-  <p> [There is work-in-progress with a very fast function.]
+  <p> Rewrite for simplicty and speed.  Work is in progress.
  
  
  <li> <strong>Prime Testing</strong>
@@ -589,6 +450,16 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
        selecting public symbols (used now for libmp).
  
  
+<li> <strong>Math functions for the mpf layer</strong>
+
+  <p> Implement the functions of math.h for the GMP mpf layer! Check the book
+      "Pi and the AGM" by Borwein and Borwein for ideas how to do this.  These
+      functions are desirable: acos, acosh, asin, asinh, atan, atanh, atan2,
+      cos, cosh, exp, log, log10, pow, sin, sinh, tan, tanh.
+
+  <p> Note that the <a href="http://mpfr.org">mpfr</a> functions already
+  provide these functions, and that we usually recommend new programs to use
+  mpfr instead of mpf.
  </ul>
  <hr>
  
diff --git a/doc/stamp-vti b/doc/stamp-vti

index bcfbdca912946b0e4fad4cef1b57eeb83a685708..5d81d9c6ebaa61bc085d9b8a628800c55f263728 100644 (file)
--- a/doc/stamp-vti
+++ b/doc/stamp-vti
@@ -1,4 +1,4 @@
-@set UPDATED 6 May 2012
-@set UPDATED-MONTH May 2012
-@set EDITION 5.0.5
-@set VERSION 5.0.5
+@set UPDATED 30 September 2013
+@set UPDATED-MONTH September 2013
+@set EDITION 5.1.3
+@set VERSION 5.1.3
diff --git a/doc/tasks.html b/doc/tasks.html

index d86e79428dafcb0dbb72f863aad3950df99c15d6..da4dfe0142cce51837486b9a843d209fdb94c289 100644 (file)
--- a/doc/tasks.html
+++ b/doc/tasks.html
@@ -37,7 +37,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  <hr>
  <!-- NB. timestamp updated automatically by emacs -->
-  This file current as of 28 Dec 2009.  An up-to-date version is available at
+  This file current as of 5 Dec 2011.  An up-to-date version is available at
    <a href="http://gmplib.org/tasks.html">http://gmplib.org/tasks.html</a>.
    Please send comments about this page to gmp-devel<font>@</font>gmplib.org.
  
@@ -62,13 +62,6 @@ either already been taken care of, or have become irrelevant.
       <code>_mpz_realloc</code> with a small (1 limb) size.
  <li> One reuse case is missing from mpX/tests/reuse.c:
       <code>mpz_XXX(a,a,a)</code>.
-<li> When printing <code>mpf_t</code> numbers with exponents &gt;2^53 on
-     machines with 64-bit <code>mp_exp_t</code>, the precision of
-     <code>__mp_bases[base].chars_per_bit_exactly</code> is insufficient and
-     <code>mpf_get_str</code> aborts.  Detect and compensate.  Alternately,
-     think seriously about using some sort of fixed-point integer value.
-     Avoiding unnecessary floating point is probably a good thing in general,
-     and it might be faster on some CPUs.
  <li> Make the string reading functions allow the `0x' prefix when the base is
       explicitly 16.  They currently only allow that prefix when the base is
       unspecified (zero).
@@ -122,9 +115,6 @@ either already been taken care of, or have become irrelevant.
       subsequent operations, especially if the value is otherwise only small.
       If low bits of the low limb are zero, use <code>mpn_rshift</code> so as
       to not increase the size.
-<li> <code>mpn_dc_sqrtrem</code>: Don't use <code>mpn_addmul_1</code> with
-     multiplier==2, instead either <code>mpn_addlsh1_n</code> when available,
-     or <code>mpn_lshift</code>+<code>mpn_add_n</code> if not.
  <li> <code>mpn_dc_sqrtrem</code>, <code>mpn_sqrtrem2</code>: Don't use
       <code>mpn_add_1</code> and <code>mpn_sub_1</code> for 1 limb operations,
       instead <code>ADDC_LIMB</code> and <code>SUBC_LIMB</code>.
@@ -133,20 +123,12 @@ either already been taken care of, or have become irrelevant.
       aliasing between <code>sp</code> and <code>rp</code>.
  <li> <code>mpn_sqrtrem</code>: Some work can be saved in the last step when
       the remainder is not required, as noted in Paul's paper.
-<li> <code>mpq_add</code>, <code>mpq_add</code>: The division "op1.den / gcd"
-     is done twice, where of course only once is necessary.  Reported by Larry
-     Lambe.
  <li> <code>mpq_add</code>, <code>mpq_sub</code>: The gcd fits a single limb
-     with high probability and in this case <code>modlimb_invert</code> could
+     with high probability and in this case <code>binvert_limb</code> could
       be used to calculate the inverse just once for the two exact divisions
       "op1.den / gcd" and "op2.den / gcd", rather than letting
-     <code>mpn_divexact_1</code> do it each time.  This would require a new
-     <code>mpn_preinv_divexact_1</code> interface.  Not sure if it'd be worth
-     the trouble.
-<li> <code>mpq_add</code>, <code>mpq_sub</code>: The use of
-     <code>mpz_mul(x,y,x)</code> causes temp allocation or copying in
-     <code>mpz_mul</code> which can probably be avoided.  A rewrite using
-     <code>mpn</code> might be best.
+     <code>mpn_bdiv_q_1</code> do it each time.  This would require calling
+     <code>mpn_pi1_bdiv_q_1</code>.
  <li> <code>mpn_gcdext</code>: Don't test <code>count_leading_zeros</code> for
       zero, instead check the high bit of the operand and avoid invoking
       <code>count_leading_zeros</code>.  This is an optimization on all
@@ -173,26 +155,20 @@ either already been taken care of, or have become irrelevant.
       since there's no apparent way to get <code>SHRT_MAX</code> with an
       expression (since <code>short</code> and <code>unsigned short</code> can
       be different sizes).
-<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> aren't very
-     fast on one or two limb moduli, due to a lot of function call
-     overheads.  These could perhaps be handled as special cases.
-<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> want better
-     algorithm selection, and the latter should use REDC.  Both could
-     change to use an <code>mpn_powm</code> and <code>mpn_redc</code>.
+<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> aren't very fast on one
+     or two limb moduli, due to a lot of function call overheads.  These could
+     perhaps be handled as special cases.
+<li> Make sure <code>mpz_powm_ui</code> is never slower than the corresponding
+     computation using <code>mpz_powm</code>.
  <li> <code>mpz_powm</code> REDC should do multiplications by <code>g[]</code>
       using the division method when they're small, since the REDC form of a
       small multiplier is normally a full size product.  Probably would need a
       new tuned parameter to say what size multiplier is "small", as a function
       of the size of the modulus.
-<li> <code>mpz_powm</code> REDC should handle even moduli if possible.  Maybe
-     this would mean for m=n*2^k doing mod n using REDC and an auxiliary
-     calculation mod 2^k, then putting them together at the end.
-<li> <code>mpn_gcd</code> might be able to be sped up on small to
-     moderate sizes by improving <code>find_a</code>, possibly just by
-     providing an alternate implementation for CPUs with slowish
+<li> <code>mpn_gcd</code> might be able to be sped up on small to moderate
+     sizes by improving <code>find_a</code>, possibly just by providing an
+     alternate implementation for CPUs with slowish
       <code>count_leading_zeros</code>.
-<li> Toom3 could use a low to high cache localized evaluate and interpolate.
-     The necessary <code>mpn_divexact_by3c</code> exists.
  <li> <code>mpf_set_str</code> produces low zero limbs when a string has a
       fraction but is exactly representable, eg. 0.5 in decimal.  These could be
       stripped to save work in later operations.
@@ -371,7 +347,7 @@ either already been taken care of, or have become irrelevant.
  <li> UltraSPARC/32: <code>mpn_divexact_by3c</code> can work 64-bits at a time
       using <code>mulx</code>, in assembler.  This would be the same as for
       sparc64.
-<li> UltraSPARC: <code>modlimb_invert</code> might save a few cycles from
+<li> UltraSPARC: <code>binvert_limb</code> might save a few cycles from
       masking down to just the useful bits at each point in the calculation,
       since <code>mulx</code> speed depends on the highest bit set.  Either
       explicit masks or small types like <code>short</code> and
diff --git a/doc/version.texi b/doc/version.texi

index bcfbdca912946b0e4fad4cef1b57eeb83a685708..5d81d9c6ebaa61bc085d9b8a628800c55f263728 100644 (file)
--- a/doc/version.texi
+++ b/doc/version.texi
@@ -1,4 +1,4 @@
-@set UPDATED 6 May 2012
-@set UPDATED-MONTH May 2012
-@set EDITION 5.0.5
-@set VERSION 5.0.5
+@set UPDATED 30 September 2013
+@set UPDATED-MONTH September 2013
+@set EDITION 5.1.3
+@set VERSION 5.1.3
diff --git a/dumbmp.c b/dumbmp.c

deleted file mode 100644 (file)

index c87aae4..0000000
--- a/dumbmp.c
+++ /dev/null
@@ -1,922 +0,0 @@
-/* dumbmp mini GMP compatible library.
-
-Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-
-/* The code here implements a subset (a very limited subset) of the main GMP
-   functions.  It's designed for use in a few build-time calculations and
-   will be slow, but highly portable.
-
-   None of the normal GMP configure things are used, nor any of the normal
-   gmp.h or gmp-impl.h.  To use this file in a program just #include
-   "dumbmp.c".
-
-   ANSI function definitions can be used here, since ansi2knr is run if
-   necessary.  But other ANSI-isms like "const" should be avoided.
-
-   mp_limb_t here is an unsigned long, since that's a sensible type
-   everywhere we know of, with 8*sizeof(unsigned long) giving the number of
-   bits in the type (that not being true for instance with int or short on
-   Cray vector systems.)
-
-   Only the low half of each mp_limb_t is used, so as to make carry handling
-   and limb multiplies easy.  GMP_LIMB_BITS is the number of bits used.  */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-
-typedef unsigned long mp_limb_t;
-
-typedef struct {
-  int        _mp_alloc;
-  int        _mp_size;
-  mp_limb_t *_mp_d;
-} mpz_t[1];
-
-#define GMP_LIMB_BITS  (sizeof (mp_limb_t) * 8 / 2)
-
-#define ABS(x)   ((x) >= 0 ? (x) : -(x))
-#define MIN(l,o) ((l) < (o) ? (l) : (o))
-#define MAX(h,i) ((h) > (i) ? (h) : (i))
-
-#define ALLOC(x) ((x)->_mp_alloc)
-#define PTR(x)   ((x)->_mp_d)
-#define SIZ(x)   ((x)->_mp_size)
-#define ABSIZ(x) ABS (SIZ (x))
-#define LOMASK   ((1L << GMP_LIMB_BITS) - 1)
-#define LO(x)    ((x) & LOMASK)
-#define HI(x)    ((x) >> GMP_LIMB_BITS)
-
-#define ASSERT(cond)                                    \
-  do {                                                  \
-    if (! (cond))                                       \
-      {                                                 \
-        fprintf (stderr, "Assertion failure\n");        \
-        abort ();                                       \
-      }                                                 \
-  } while (0)
-
-
-char *
-xmalloc (int n)
-{
-  char  *p;
-  p = malloc (n);
-  if (p == NULL)
-    {
-      fprintf (stderr, "Out of memory (alloc %d bytes)\n", n);
-      abort ();
-    }
-  return p;
-}
-
-mp_limb_t *
-xmalloc_limbs (int n)
-{
-  return (mp_limb_t *) xmalloc (n * sizeof (mp_limb_t));
-}
-
-void
-mem_copyi (char *dst, char *src, int size)
-{
-  int  i;
-  for (i = 0; i < size; i++)
-    dst[i] = src[i];
-}
-
-static int
-isprime (unsigned long int t)
-{
-  unsigned long int q, r, d;
-
-  if (t < 32)
-    return (0xa08a28acUL >> t) & 1;
-  if ((t & 1) == 0)
-    return 0;
-
-  if (t % 3 == 0)
-    return 0;
-  if (t % 5 == 0)
-    return 0;
-  if (t % 7 == 0)
-    return 0;
-
-  for (d = 11;;)
-    {
-      q = t / d;
-      r = t - q * d;
-      if (q < d)
-       return 1;
-      if (r == 0)
-       break;
-      d += 2;
-      q = t / d;
-      r = t - q * d;
-      if (q < d)
-       return 1;
-      if (r == 0)
-       break;
-      d += 4;
-    }
-  return 0;
-}
-
-int
-log2_ceil (int n)
-{
-  int  e;
-  ASSERT (n >= 1);
-  for (e = 0; ; e++)
-    if ((1 << e) >= n)
-      break;
-  return e;
-}
-
-void
-mpz_realloc (mpz_t r, int n)
-{
-  if (n <= ALLOC(r))
-    return;
-
-  ALLOC(r) = n;
-  PTR(r) = (mp_limb_t *) realloc (PTR(r), n * sizeof (mp_limb_t));
-  if (PTR(r) == NULL)
-    {
-      fprintf (stderr, "Out of memory (realloc to %d)\n", n);
-      abort ();
-    }
-}
-
-void
-mpn_normalize (mp_limb_t *rp, int *rnp)
-{
-  int  rn = *rnp;
-  while (rn > 0 && rp[rn-1] == 0)
-    rn--;
-  *rnp = rn;
-}
-
-void
-mpn_copyi (mp_limb_t *dst, mp_limb_t *src, int n)
-{
-  int  i;
-  for (i = 0; i < n; i++)
-    dst[i] = src[i];
-}
-
-void
-mpn_zero (mp_limb_t *rp, int rn)
-{
-  int  i;
-  for (i = 0; i < rn; i++)
-    rp[i] = 0;
-}
-
-void
-mpz_init (mpz_t r)
-{
-  ALLOC(r) = 1;
-  PTR(r) = xmalloc_limbs (ALLOC(r));
-  PTR(r)[0] = 0;
-  SIZ(r) = 0;
-}
-
-void
-mpz_clear (mpz_t r)
-{
-  free (PTR (r));
-  ALLOC(r) = -1;
-  SIZ (r) = 0xbadcafeL;
-  PTR (r) = (mp_limb_t *) 0xdeadbeefL;
-}
-
-int
-mpz_sgn (mpz_t a)
-{
-  return (SIZ(a) > 0 ? 1 : SIZ(a) == 0 ? 0 : -1);
-}
-
-int
-mpz_odd_p (mpz_t a)
-{
-  if (SIZ(a) == 0)
-    return 0;
-  else
-    return (PTR(a)[0] & 1) != 0;
-}
-
-int
-mpz_even_p (mpz_t a)
-{
-  if (SIZ(a) == 0)
-    return 1;
-  else
-    return (PTR(a)[0] & 1) == 0;
-}
-
-size_t
-mpz_sizeinbase (mpz_t a, int base)
-{
-  int an = ABSIZ (a);
-  mp_limb_t *ap = PTR (a);
-  int cnt;
-  mp_limb_t hi;
-
-  if (base != 2)
-    abort ();
-
-  if (an == 0)
-    return 1;
-
-  cnt = 0;
-  for (hi = ap[an - 1]; hi != 0; hi >>= 1)
-    cnt += 1;
-  return (an - 1) * GMP_LIMB_BITS + cnt;
-}
-
-void
-mpz_set (mpz_t r, mpz_t a)
-{
-  mpz_realloc (r, ABSIZ (a));
-  SIZ(r) = SIZ(a);
-  mpn_copyi (PTR(r), PTR(a), ABSIZ (a));
-}
-
-void
-mpz_init_set (mpz_t r, mpz_t a)
-{
-  mpz_init (r);
-  mpz_set (r, a);
-}
-
-void
-mpz_set_ui (mpz_t r, unsigned long ui)
-{
-  int  rn;
-  mpz_realloc (r, 2);
-  PTR(r)[0] = LO(ui);
-  PTR(r)[1] = HI(ui);
-  rn = 2;
-  mpn_normalize (PTR(r), &rn);
-  SIZ(r) = rn;
-}
-
-void
-mpz_init_set_ui (mpz_t r, unsigned long ui)
-{
-  mpz_init (r);
-  mpz_set_ui (r, ui);
-}
-
-void
-mpz_setbit (mpz_t r, unsigned long bit)
-{
-  int        limb, rn, extend;
-  mp_limb_t  *rp;
-
-  rn = SIZ(r);
-  if (rn < 0)
-    abort ();  /* only r>=0 */
-
-  limb = bit / GMP_LIMB_BITS;
-  bit %= GMP_LIMB_BITS;
-
-  mpz_realloc (r, limb+1);
-  rp = PTR(r);
-  extend = (limb+1) - rn;
-  if (extend > 0)
-    mpn_zero (rp + rn, extend);
-
-  rp[limb] |= (mp_limb_t) 1 << bit;
-  SIZ(r) = MAX (rn, limb+1);
-}
-
-int
-mpz_tstbit (mpz_t r, unsigned long bit)
-{
-  int  limb;
-
-  if (SIZ(r) < 0)
-    abort ();  /* only r>=0 */
-
-  limb = bit / GMP_LIMB_BITS;
-  if (SIZ(r) <= limb)
-    return 0;
-
-  bit %= GMP_LIMB_BITS;
-  return (PTR(r)[limb] >> bit) & 1;
-}
-
-int
-popc_limb (mp_limb_t a)
-{
-  int  ret = 0;
-  while (a != 0)
-    {
-      ret += (a & 1);
-      a >>= 1;
-    }
-  return ret;
-}
-
-unsigned long
-mpz_popcount (mpz_t a)
-{
-  unsigned long  ret;
-  int            i;
-
-  if (SIZ(a) < 0)
-    abort ();
-
-  ret = 0;
-  for (i = 0; i < SIZ(a); i++)
-    ret += popc_limb (PTR(a)[i]);
-  return ret;
-}
-
-void
-mpz_add (mpz_t r, mpz_t a, mpz_t b)
-{
-  int an = ABSIZ (a), bn = ABSIZ (b), rn;
-  mp_limb_t *rp, *ap, *bp;
-  int i;
-  mp_limb_t t, cy;
-
-  if ((SIZ (a) ^ SIZ (b)) < 0)
-    abort ();                  /* really subtraction */
-  if (SIZ (a) < 0)
-    abort ();
-
-  mpz_realloc (r, MAX (an, bn) + 1);
-  ap = PTR (a);  bp = PTR (b);  rp = PTR (r);
-  if (an < bn)
-    {
-      mp_limb_t *tp;  int tn;
-      tn = an; an = bn; bn = tn;
-      tp = ap; ap = bp; bp = tp;
-    }
-
-  cy = 0;
-  for (i = 0; i < bn; i++)
-    {
-      t = ap[i] + bp[i] + cy;
-      rp[i] = LO (t);
-      cy = HI (t);
-    }
-  for (i = bn; i < an; i++)
-    {
-      t = ap[i] + cy;
-      rp[i] = LO (t);
-      cy = HI (t);
-    }
-  rp[an] = cy;
-  rn = an + 1;
-
-  mpn_normalize (rp, &rn);
-  SIZ (r) = rn;
-}
-
-void
-mpz_add_ui (mpz_t r, mpz_t a, unsigned long int ui)
-{
-  mpz_t b;
-
-  mpz_init (b);
-  mpz_set_ui (b, ui);
-  mpz_add (r, a, b);
-  mpz_clear (b);
-}
-
-void
-mpz_sub (mpz_t r, mpz_t a, mpz_t b)
-{
-  int an = ABSIZ (a), bn = ABSIZ (b), rn;
-  mp_limb_t *rp, *ap, *bp;
-  int i;
-  mp_limb_t t, cy;
-
-  if ((SIZ (a) ^ SIZ (b)) < 0)
-    abort ();                  /* really addition */
-  if (SIZ (a) < 0)
-    abort ();
-
-  mpz_realloc (r, MAX (an, bn) + 1);
-  ap = PTR (a);  bp = PTR (b);  rp = PTR (r);
-  if (an < bn)
-    {
-      mp_limb_t *tp;  int tn;
-      tn = an; an = bn; bn = tn;
-      tp = ap; ap = bp; bp = tp;
-    }
-
-  cy = 0;
-  for (i = 0; i < bn; i++)
-    {
-      t = ap[i] - bp[i] - cy;
-      rp[i] = LO (t);
-      cy = LO (-HI (t));
-    }
-  for (i = bn; i < an; i++)
-    {
-      t = ap[i] - cy;
-      rp[i] = LO (t);
-      cy = LO (-HI (t));
-    }
-  rp[an] = cy;
-  rn = an + 1;
-
-  if (cy != 0)
-    {
-      cy = 0;
-      for (i = 0; i < rn; i++)
-       {
-         t = -rp[i] - cy;
-         rp[i] = LO (t);
-         cy = LO (-HI (t));
-       }
-      SIZ (r) = -rn;
-      return;
-    }
-
-  mpn_normalize (rp, &rn);
-  SIZ (r) = rn;
-}
-
-void
-mpz_sub_ui (mpz_t r, mpz_t a, unsigned long int ui)
-{
-  mpz_t b;
-
-  mpz_init (b);
-  mpz_set_ui (b, ui);
-  mpz_sub (r, a, b);
-  mpz_clear (b);
-}
-
-void
-mpz_mul (mpz_t r, mpz_t a, mpz_t b)
-{
-  int an = ABSIZ (a), bn = ABSIZ (b), rn;
-  mp_limb_t *scratch, *tmp, *ap = PTR (a), *bp = PTR (b);
-  int ai, bi;
-  mp_limb_t t, cy;
-
-  scratch = xmalloc_limbs (an + bn);
-  tmp = scratch;
-
-  for (bi = 0; bi < bn; bi++)
-    tmp[bi] = 0;
-
-  for (ai = 0; ai < an; ai++)
-    {
-      tmp = scratch + ai;
-      cy = 0;
-      for (bi = 0; bi < bn; bi++)
-       {
-         t = ap[ai] * bp[bi] + tmp[bi] + cy;
-         tmp[bi] = LO (t);
-         cy = HI (t);
-       }
-      tmp[bn] = cy;
-    }
-
-  rn = an + bn;
-  mpn_normalize (scratch, &rn);
-  free (PTR (r));
-  PTR (r) = scratch;
-  SIZ (r) = (SIZ (a) ^ SIZ (b)) >= 0 ? rn : -rn;
-  ALLOC (r) = an + bn;
-}
-
-void
-mpz_mul_ui (mpz_t r, mpz_t a, unsigned long int ui)
-{
-  mpz_t b;
-
-  mpz_init (b);
-  mpz_set_ui (b, ui);
-  mpz_mul (r, a, b);
-  mpz_clear (b);
-}
-
-void
-mpz_mul_2exp (mpz_t r, mpz_t a, unsigned long int bcnt)
-{
-  mpz_set (r, a);
-  while (bcnt)
-    {
-      mpz_add (r, r, r);
-      bcnt -= 1;
-    }
-}
-
-void
-mpz_ui_pow_ui (mpz_t r, unsigned long b, unsigned long e)
-{
-  unsigned long  i;
-  mpz_t          bz;
-
-  mpz_init (bz);
-  mpz_set_ui (bz, b);
-
-  mpz_set_ui (r, 1L);
-  for (i = 0; i < e; i++)
-    mpz_mul (r, r, bz);
-
-  mpz_clear (bz);
-}
-
-void
-mpz_tdiv_q_2exp (mpz_t r, mpz_t a, unsigned long int bcnt)
-{
-  int as, rn;
-  int cnt, tnc;
-  int lcnt;
-  mp_limb_t high_limb, low_limb;
-  int i;
-
-  as = SIZ (a);
-  lcnt = bcnt / GMP_LIMB_BITS;
-  rn = ABS (as) - lcnt;
-  if (rn <= 0)
-    SIZ (r) = 0;
-  else
-    {
-      mp_limb_t *rp, *ap;
-
-      mpz_realloc (r, rn);
-
-      rp = PTR (r);
-      ap = PTR (a);
-
-      cnt = bcnt % GMP_LIMB_BITS;
-      if (cnt != 0)
-        {
-         ap += lcnt;
-         tnc = GMP_LIMB_BITS - cnt;
-         high_limb = *ap++;
-         low_limb = high_limb >> cnt;
-
-         for (i = rn - 1; i != 0; i--)
-           {
-             high_limb = *ap++;
-             *rp++ = low_limb | LO (high_limb << tnc);
-             low_limb = high_limb >> cnt;
-           }
-         *rp = low_limb;
-          rn -= low_limb == 0;
-        }
-      else
-        {
-         ap += lcnt;
-          mpn_copyi (rp, ap, rn);
-        }
-
-      SIZ (r) = as >= 0 ? rn : -rn;
-    }
-}
-
-void
-mpz_tdiv_r_2exp (mpz_t r, mpz_t a, unsigned long int bcnt)
-{
-  int    rn, bwhole;
-
-  mpz_set (r, a);
-  rn = ABSIZ(r);
-
-  bwhole = bcnt / GMP_LIMB_BITS;
-  bcnt %= GMP_LIMB_BITS;
-  if (rn > bwhole)
-    {
-      rn = bwhole+1;
-      PTR(r)[rn-1] &= ((mp_limb_t) 1 << bcnt) - 1;
-      mpn_normalize (PTR(r), &rn);
-      SIZ(r) = (SIZ(r) >= 0 ? rn : -rn);
-    }
-}
-
-int
-mpz_cmp (mpz_t a, mpz_t b)
-{
-  mp_limb_t *ap, *bp, al, bl;
-  int as = SIZ (a), bs = SIZ (b);
-  int i;
-  int sign;
-
-  if (as != bs)
-    return as > bs ? 1 : -1;
-
-  sign = as > 0 ? 1 : -1;
-
-  ap = PTR (a);
-  bp = PTR (b);
-  for (i = ABS (as) - 1; i >= 0; i--)
-    {
-      al = ap[i];
-      bl = bp[i];
-      if (al != bl)
-       return al > bl ? sign : -sign;
-    }
-  return 0;
-}
-
-int
-mpz_cmp_ui (mpz_t a, unsigned long b)
-{
-  mpz_t  bz;
-  int    ret;
-  mpz_init_set_ui (bz, b);
-  ret = mpz_cmp (a, bz);
-  mpz_clear (bz);
-  return ret;
-}
-
-void
-mpz_tdiv_qr (mpz_t q, mpz_t r, mpz_t a, mpz_t b)
-{
-  mpz_t          tmpr, tmpb;
-  unsigned long  cnt;
-
-  ASSERT (mpz_sgn(a) >= 0);
-  ASSERT (mpz_sgn(b) > 0);
-
-  mpz_init_set (tmpr, a);
-  mpz_init_set (tmpb, b);
-  mpz_set_ui (q, 0L);
-
-  if (mpz_cmp (tmpr, tmpb) > 0)
-    {
-      cnt = mpz_sizeinbase (tmpr, 2) - mpz_sizeinbase (tmpb, 2) + 1;
-      mpz_mul_2exp (tmpb, tmpb, cnt);
-
-      for ( ; cnt > 0; cnt--)
-        {
-          mpz_mul_2exp (q, q, 1);
-          mpz_tdiv_q_2exp (tmpb, tmpb, 1L);
-          if (mpz_cmp (tmpr, tmpb) >= 0)
-            {
-              mpz_sub (tmpr, tmpr, tmpb);
-              mpz_add_ui (q, q, 1L);
-              ASSERT (mpz_cmp (tmpr, tmpb) < 0);
-            }
-        }
-    }
-
-  mpz_set (r, tmpr);
-  mpz_clear (tmpr);
-  mpz_clear (tmpb);
-}
-
-void
-mpz_tdiv_qr_ui (mpz_t q, mpz_t r, mpz_t a, unsigned long b)
-{
-  mpz_t  bz;
-  mpz_init_set_ui (bz, b);
-  mpz_tdiv_qr (q, r, a, bz);
-  mpz_clear (bz);
-}
-
-void
-mpz_tdiv_q (mpz_t q, mpz_t a, mpz_t b)
-{
-  mpz_t  r;
-
-  mpz_init (r);
-  mpz_tdiv_qr (q, r, a, b);
-  mpz_clear (r);
-}
-
-void
-mpz_tdiv_r (mpz_t r, mpz_t a, mpz_t b)
-{
-  mpz_t  q;
-
-  mpz_init (q);
-  mpz_tdiv_qr (q, r, a, b);
-  mpz_clear (q);
-}
-
-void
-mpz_tdiv_q_ui (mpz_t q, mpz_t n, unsigned long d)
-{
-  mpz_t  dz;
-  mpz_init_set_ui (dz, d);
-  mpz_tdiv_q (q, n, dz);
-  mpz_clear (dz);
-}
-
-/* Set inv to the inverse of d, in the style of invert_limb, ie. for
-   udiv_qrnnd_preinv.  */
-void
-mpz_preinv_invert (mpz_t inv, mpz_t d, int numb_bits)
-{
-  mpz_t  t;
-  int    norm;
-  ASSERT (SIZ(d) > 0);
-
-  norm = numb_bits - mpz_sizeinbase (d, 2);
-  ASSERT (norm >= 0);
-  mpz_init_set_ui (t, 1L);
-  mpz_mul_2exp (t, t, 2*numb_bits - norm);
-  mpz_tdiv_q (inv, t, d);
-  mpz_set_ui (t, 1L);
-  mpz_mul_2exp (t, t, numb_bits);
-  mpz_sub (inv, inv, t);
-
-  mpz_clear (t);
-}
-
-/* Remove leading '0' characters from the start of a string, by copying the
-   remainder down. */
-void
-strstrip_leading_zeros (char *s)
-{
-  char  c, *p;
-
-  p = s;
-  while (*s == '0')
-    s++;
-
-  do
-    {
-      c = *s++;
-      *p++ = c;
-    }
-  while (c != '\0');
-}
-
-char *
-mpz_get_str (char *buf, int base, mpz_t a)
-{
-  static char  tohex[] = "0123456789abcdef";
-
-  mp_limb_t  alimb, *ap;
-  int        an, bn, i, j;
-  char       *bp;
-
-  if (base != 16)
-    abort ();
-  if (SIZ (a) < 0)
-    abort ();
-
-  if (buf == 0)
-    buf = xmalloc (ABSIZ (a) * (GMP_LIMB_BITS / 4) + 3);
-
-  an = ABSIZ (a);
-  if (an == 0)
-    {
-      buf[0] = '0';
-      buf[1] = '\0';
-      return buf;
-    }
-
-  ap = PTR (a);
-  bn = an * (GMP_LIMB_BITS / 4);
-  bp = buf + bn;
-
-  for (i = 0; i < an; i++)
-    {
-      alimb = ap[i];
-      for (j = 0; j < GMP_LIMB_BITS / 4; j++)
-        {
-          bp--;
-          *bp = tohex [alimb & 0xF];
-          alimb >>= 4;
-        }
-      ASSERT (alimb == 0);
-    }
-  ASSERT (bp == buf);
-
-  buf[bn] = '\0';
-
-  strstrip_leading_zeros (buf);
-  return buf;
-}
-
-void
-mpz_out_str (FILE *file, int base, mpz_t a)
-{
-  char *str;
-
-  if (file == 0)
-    file = stdout;
-
-  str = mpz_get_str (0, 16, a);
-  fputs (str, file);
-  free (str);
-}
-
-/* Calculate r satisfying r*d == 1 mod 2^n. */
-void
-mpz_invert_2exp (mpz_t r, mpz_t a, unsigned long n)
-{
-  unsigned long  i;
-  mpz_t  inv, prod;
-
-  ASSERT (mpz_odd_p (a));
-
-  mpz_init_set_ui (inv, 1L);
-  mpz_init (prod);
-
-  for (i = 1; i < n; i++)
-    {
-      mpz_mul (prod, inv, a);
-      if (mpz_tstbit (prod, i) != 0)
-        mpz_setbit (inv, i);
-    }
-
-  mpz_mul (prod, inv, a);
-  mpz_tdiv_r_2exp (prod, prod, n);
-  ASSERT (mpz_cmp_ui (prod, 1L) == 0);
-
-  mpz_set (r, inv);
-
-  mpz_clear (inv);
-  mpz_clear (prod);
-}
-
-/* Calculate inv satisfying r*a == 1 mod 2^n. */
-void
-mpz_invert_ui_2exp (mpz_t r, unsigned long a, unsigned long n)
-{
-  mpz_t  az;
-  mpz_init_set_ui (az, a);
-  mpz_invert_2exp (r, az, n);
-  mpz_clear (az);
-}
-
-/* x=y^z */
-void
-mpz_pow_ui (mpz_t x, mpz_t y, unsigned long z)
-{
-  mpz_t t;
-
-  mpz_init_set_ui (t, 1);
-  for (; z != 0; z--)
-    mpz_mul (t, t, y);
-  mpz_set (x, t);
-  mpz_clear (t);
-}
-
-/* x=x+y*z */
-void
-mpz_addmul_ui (mpz_t x, mpz_t y, unsigned long z)
-{
-  mpz_t t;
-
-  mpz_init (t);
-  mpz_mul_ui (t, y, z);
-  mpz_add (x, x, t);
-  mpz_clear (t);
-}
-
-/* x=floor(y^(1/z)) */
-void
-mpz_root (mpz_t x, mpz_t y, unsigned long z)
-{
-  mpz_t t, u;
-
-  if (mpz_sgn (y) < 0)
-    {
-      fprintf (stderr, "mpz_root does not accept negative values\n");
-      abort ();
-    }
-  if (mpz_cmp_ui (y, 1) <= 0)
-    {
-      mpz_set (x, y);
-      return;
-    }
-  mpz_init (t);
-  mpz_init_set (u, y);
-  do
-    {
-      mpz_pow_ui (t, u, z - 1);
-      mpz_tdiv_q (t, y, t);
-      mpz_addmul_ui (t, u, z - 1);
-      mpz_tdiv_q_ui (t, t, z);
-      if (mpz_cmp (t, u) >= 0)
-       break;
-      mpz_set (u, t);
-    }
-  while (1);
-  mpz_set (x, u);
-  mpz_clear (t);
-  mpz_clear (u);
-}
diff --git a/extract-dbl.c b/extract-dbl.c

index 9c2ae9b7c08cdbd0328b63cc2a161fbf1fc64652..f81789a4bfca4249722fb05e027d116d75f78c71 100644 (file)
--- a/extract-dbl.c
+++ b/extract-dbl.c
@@ -1,6 +1,7 @@
  /* __gmp_extract_double -- convert from double to array of mp_limb_t.
  
-Copyright 1996, 1999, 2000, 2001, 2002, 2006 Free Software Foundation, Inc.
+Copyright 1996, 1999, 2000, 2001, 2002, 2006, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -28,8 +29,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define _GMP_IEEE_FLOATS 0
  #endif
  
-#define BITS_IN_MANTISSA 53
-
  /* Extract a non-negative double in d.  */
  
  int
diff --git a/gen-bases.c b/gen-bases.c

index 31895e8b240939c1bacc634c4fad19be941e7aae..60cbd3f2fce55c21e109bbd5120d92b5533b89fa 100644 (file)
--- a/gen-bases.c
+++ b/gen-bases.c
@@ -1,7 +1,7 @@
  /* Generate mp_bases data.
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2002, 2004, 2011, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -18,13 +18,10 @@ License for more details.
  You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
-#include <math.h>
-
-#include "dumbmp.c"
+#include "bootstrap.c"
  
  
  int    chars_per_limb;
-double chars_per_bit_exactly;
  mpz_t  big_base;
  int    normalization_steps;
  mpz_t  big_base_inverted;
@@ -59,8 +56,6 @@ generate (int limb_bits, int nail_bits, int base)
        chars_per_limb++;
      }
  
-  chars_per_bit_exactly = 0.69314718055994530942 / log ((double) base);
-
    mpz_ui_pow_ui (big_base, (long) base, (long) chars_per_limb);
  
    normalization_steps = limb_bits - mpz_sizeinbase (big_base, 2);
@@ -97,11 +92,61 @@ header (int limb_bits, int nail_bits)
    printf ("#define MP_BASES_NORMALIZATION_STEPS_10 %d\n", normalization_steps);
  }
  
+
+#define EXTRA 16
+
+/* Compute log(2)/log(b) as a fixnum. */
+void
+mp_2logb (mpz_t r, int bi, int prec)
+{
+  mpz_t t, t2, two, b;
+  int i;
+
+  mpz_init_set_ui (t, 1);
+  mpz_mul_2exp (t, t, prec+EXTRA);
+
+  mpz_init (t2);
+
+  mpz_init_set_ui (two, 2);
+  mpz_mul_2exp (two, two, prec+EXTRA);
+
+  mpz_set_ui (r, 0);
+
+  mpz_init_set_ui (b, bi);
+  mpz_mul_2exp (b, b, prec+EXTRA);
+
+  for (i = prec-1; i >= 0; i--)
+    {
+      mpz_mul_2exp (b, b, prec+EXTRA);
+      mpz_sqrt (b, b);
+
+      mpz_mul (t2, t, b);
+      mpz_tdiv_q_2exp (t2, t2, prec+EXTRA);
+
+      if (mpz_cmp (t2, two) < 0)       /* not too large? */
+       {
+         mpz_setbit (r, i);            /* set next less significant bit */
+         mpz_set (t, t2);              /* new value acceptable */
+       }
+    }
+
+  mpz_clear (t);
+  mpz_clear (t2);
+  mpz_clear (two);
+  mpz_clear (b);
+}
+
  void
  table (int limb_bits, int nail_bits)
  {
    int  numb_bits = limb_bits - nail_bits;
    int  base;
+  mpz_t r, t, logb2, log2b;
+
+  mpz_init (r);
+  mpz_init (t);
+  mpz_init (logb2);
+  mpz_init (log2b);
  
    printf ("/* This file generated by gen-bases.c - DO NOT EDIT. */\n");
    printf ("\n");
@@ -113,30 +158,45 @@ table (int limb_bits, int nail_bits)
    printf ("#endif\n");
    printf ("\n");
    puts ("const struct bases mp_bases[257] =\n{");
-  puts ("  /*   0 */ { 0, 0.0, 0 },");
-  puts ("  /*   1 */ { 0, 1e37, 0 },");
+  puts ("  /*   0 */ { 0, 0, 0, 0, 0 },");
+  puts ("  /*   1 */ { 0, 0, 0, 0, 0 },");
    for (base = 2; base <= 256; base++)
      {
        generate (limb_bits, nail_bits, base);
+      mp_2logb (r, base, limb_bits + 8);
+      mpz_tdiv_q_2exp (logb2, r, 8);
+      mpz_set_ui (t, 1);
+      mpz_mul_2exp (t, t, 2*limb_bits + 5);
+      mpz_sub_ui (t, t, 1);
+      mpz_add_ui (r, r, 1);
+      mpz_tdiv_q (log2b, t, r);
  
        printf ("  /* %3u */ { ", base);
        if (POW2_P (base))
         {
-          printf ("%u, %.16f, 0x%x },\n",
-                  chars_per_limb, chars_per_bit_exactly, ulog2 (base) - 1);
-       }
-      else
-       {
-          printf ("%u, %.16f, CNST_LIMB(0x",
-                  chars_per_limb, chars_per_bit_exactly);
-         mpz_out_str (stdout, 16, big_base);
-          printf ("), CNST_LIMB(0x");
-         mpz_out_str (stdout, 16, big_base_inverted);
-          printf (") },\n");
+          mpz_set_ui (big_base, ulog2 (base) - 1);
+         mpz_set_ui (big_base_inverted, 0);
         }
+
+      printf ("%u,", chars_per_limb);
+      printf (" CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, logb2);
+      printf ("), CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, log2b);
+      printf ("), CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, big_base);
+      printf ("), CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, big_base_inverted);
+      printf (") },\n");
      }
  
    puts ("};");
+
+  mpz_clear (r);
+  mpz_clear (t);
+  mpz_clear (logb2);
+  mpz_clear (log2b);
+
  }
  
  int
diff --git a/gen-fac.c b/gen-fac.c

new file mode 100644 (file)

index 0000000..522e573
--- /dev/null
+++ b/gen-fac.c
@@ -0,0 +1,330 @@
+/* Generate data for combinatorics: fac_ui, bin_uiui, ...
+
+Copyright 2002, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "bootstrap.c"
+
+int
+mpz_remove_twos (mpz_t x)
+{
+  int r = 0;
+  for (;mpz_even_p (x);r++)
+    mpz_tdiv_q_2exp (x, x, 1);
+  return r;
+}
+
+/* returns 0 on success                */
+int
+gen_consts (int numb, int nail, int limb)
+{
+  mpz_t x, mask, y, last;
+  unsigned long a, b;
+  unsigned long ofl, ofe;
+
+  printf ("/* This file is automatically generated by gen-fac.c */\n\n");
+  printf ("#if GMP_NUMB_BITS != %d\n", numb);
+  printf ("Error , error this data is for %d GMP_NUMB_BITS only\n", numb);
+  printf ("#endif\n");
+#if 0
+  printf ("#if GMP_LIMB_BITS != %d\n", limb);
+  printf ("Error , error this data is for %d GMP_LIMB_BITS only\n", limb);
+  printf ("#endif\n");
+#endif
+
+  printf
+    ("/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1");
+  mpz_init_set_ui (x, 1);
+  mpz_init (last);
+  for (b = 2;; b++)
+    {
+      mpz_mul_ui (x, x, b);    /* so b!=a       */
+      if (mpz_sizeinbase (x, 2) > numb)
+       break;
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+
+  printf
+    ("\n/* This table is 0!,1!,2!/2,3!/2,...,n!/2^sn where n!/2^sn is an */\n");
+  printf
+    ("/* odd integer for each n, and n!/2^sn has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define ONE_LIMB_ODD_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x1");
+  mpz_set_ui (x, 1);
+  for (b = 3;; b++)
+    {
+      for (a = b; (a & 1) == 0; a >>= 1);
+      mpz_set (last, x);
+      mpz_mul_ui (x, x, a);
+      if (mpz_sizeinbase (x, 2) > numb)
+       break;
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+  printf
+    ("#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, last);
+  printf (")\n");
+
+  ofl = b - 1;
+  printf
+    ("#define ODD_FACTORIAL_TABLE_LIMIT (%lu)\n", ofl);
+  mpz_init (mask);
+  mpz_setbit (mask, numb);
+  mpz_sub_ui (mask, mask, 1);
+  printf
+    ("\n/* Previous table, continued, values modulo 2^GMP_NUMB_BITS */\n");
+  printf
+    ("#define ONE_LIMB_ODD_FACTORIAL_EXTTABLE CNST_LIMB(0x");
+  mpz_and (x, x, mask);
+  mpz_out_str (stdout, 16, x);
+  mpz_init (y);
+  mpz_bin_uiui (y, b, b/2);
+  b++;
+  for (;; b++)
+    {
+      for (a = b; (a & 1) == 0; a >>= 1);
+      if (a == b) {
+       mpz_divexact_ui (y, y, a/2+1);
+       mpz_mul_ui (y, y, a);
+      } else
+       mpz_mul_2exp (y, y, 1);
+      if (mpz_sizeinbase (y, 2) > numb)
+       break;
+      mpz_mul_ui (x, x, a);
+      mpz_and (x, x, mask);
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+  ofe = b - 1;
+  printf
+    ("#define ODD_FACTORIAL_EXTTABLE_LIMIT (%lu)\n", ofe);
+
+  printf
+    ("\n/* This table is 1!!,3!!,...,(2n+1)!! where (2n+1)!! has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE CNST_LIMB(0x1");
+  mpz_set_ui (x, 1);
+  for (b = 3;; b+=2)
+    {
+      mpz_set (last, x);
+      mpz_mul_ui (x, x, b);
+      if (mpz_sizeinbase (x, 2) > numb)
+       break;
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+  printf
+    ("#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, last);
+  printf (")\n");
+
+  printf
+    ("#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (%lu)\n", b - 2);
+
+  printf
+    ("\n/* This table x_1, x_2,... contains values s.t. x_n^n has <= GMP_NUMB_BITS bits */\n");
+  printf
+    ("#define NTH_ROOT_NUMB_MASK_TABLE (GMP_NUMB_MASK");
+  for (b = 2;b <= 8; b++)
+    {
+      mpz_root (x, mask, b);
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+
+  mpz_add_ui (mask, mask, 1);
+  printf
+    ("\n/* This table contains inverses of odd factorials, modulo 2^GMP_NUMB_BITS */\n");
+  printf
+    ("\n/* It begins with (2!/2)^-1=1 */\n");
+  printf
+    ("#define ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE CNST_LIMB(0x1");
+  mpz_set_ui (x, 1);
+  for (b = 3;b <= ofe - 2; b++)
+    {
+      for (a = b; (a & 1) == 0; a >>= 1);
+      mpz_mul_ui (x, x, a);
+      mpz_invert (y, x, mask);
+      printf ("),CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, y);
+    }
+  printf (")\n");
+
+  ofe = (ofe / 16 + 1) * 16;
+
+  printf
+    ("\n/* This table contains 2n-popc(2n) for small n */\n");
+  printf
+    ("\n/* It begins with 2-1=1 (n=1) */\n");
+  printf
+    ("#define TABLE_2N_MINUS_POPC_2N 1");
+  for (b = 4; b <= ofe; b += 2)
+    {
+      mpz_set_ui (x, b);
+      printf (",%lu",b - mpz_popcount (x));
+    }
+  printf ("\n");
+  printf
+    ("#define TABLE_LIMIT_2N_MINUS_POPC_2N %lu\n", ofe + 1);
+
+
+  ofl = (ofl + 1) / 2;
+  printf
+    ("#define ODD_CENTRAL_BINOMIAL_OFFSET (%lu)\n", ofl);
+  printf
+    ("\n/* This table contains binomial(2k,k)/2^t */\n");
+  printf
+    ("\n/* It begins with ODD_CENTRAL_BINOMIAL_TABLE_MIN */\n");
+  printf
+    ("#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE ");
+  for (b = ofl;; b++)
+    {
+      mpz_bin_uiui (x, 2 * b, b);
+      mpz_remove_twos (x);
+      if (mpz_sizeinbase (x, 2) > numb)
+       break;
+      if (b != ofl)
+       printf ("),");
+      printf("CNST_LIMB(0x");
+      mpz_out_str (stdout, 16, x);
+    }
+  printf (")\n");
+
+  ofe = b - 1;
+  printf
+    ("#define ODD_CENTRAL_BINOMIAL_TABLE_LIMIT (%lu)\n", ofe);
+
+  printf
+    ("\n/* This table contains the inverses of elements in the previous table. */\n");
+  printf
+    ("#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE CNST_LIMB(0x");
+  for (b = ofl; b <= ofe; b++)
+    {
+      mpz_bin_uiui (x, 2 * b, b);
+      mpz_remove_twos (x);
+      mpz_invert (x, x, mask);
+      mpz_out_str (stdout, 16, x);
+      if (b != ofe)
+       printf ("),CNST_LIMB(0x");
+    }
+  printf (")\n");
+
+  printf
+    ("\n/* This table contains the values t in the formula binomial(2k,k)/2^t */\n");
+  printf
+    ("#define CENTRAL_BINOMIAL_2FAC_TABLE ");
+  for (b = ofl; b <= ofe; b++)
+    {
+      mpz_bin_uiui (x, 2 * b, b);
+      printf ("%d", mpz_remove_twos (x));
+      if (b != ofe)
+       printf (",");
+    }
+  printf ("\n");
+
+#if 0
+  mpz_set_ui (x, 1);
+  mpz_mul_2exp (x, x, limb + 1);       /* x=2^(limb+1)        */
+  mpz_init (y);
+  mpz_set_ui (y, 10000);
+  mpz_mul (x, x, y);           /* x=2^(limb+1)*10^4     */
+  mpz_set_ui (y, 27182);       /* exp(1)*10^4      */
+  mpz_tdiv_q (x, x, y);                /* x=2^(limb+1)/exp(1)        */
+  printf ("\n/* is 2^(GMP_LIMB_BITS+1)/exp(1) */\n");
+  printf ("#define FAC2OVERE CNST_LIMB(0x");
+  mpz_out_str (stdout, 16, x);
+  printf (")\n");
+
+
+  printf
+    ("\n/* FACMULn is largest odd x such that x*(x+2)*...*(x+2(n-1))<=2^GMP_NUMB_BITS-1 */\n\n");
+  mpz_init (z);
+  mpz_init (t);
+  for (a = 2; a <= 4; a++)
+    {
+      mpz_set_ui (x, 1);
+      mpz_mul_2exp (x, x, numb);
+      mpz_root (x, x, a);
+      /* so x is approx sol       */
+      if (mpz_even_p (x))
+       mpz_sub_ui (x, x, 1);
+      mpz_set_ui (y, 1);
+      mpz_mul_2exp (y, y, numb);
+      mpz_sub_ui (y, y, 1);
+      /* decrement x until we are <= real sol     */
+      do
+       {
+         mpz_sub_ui (x, x, 2);
+         odd_products (t, x, a);
+         if (mpz_cmp (t, y) <= 0)
+           break;
+       }
+      while (1);
+      /* increment x until > real sol     */
+      do
+       {
+         mpz_add_ui (x, x, 2);
+         odd_products (t, x, a);
+         if (mpz_cmp (t, y) > 0)
+           break;
+       }
+      while (1);
+      /* dec once to get real sol */
+      mpz_sub_ui (x, x, 2);
+      printf ("#define FACMUL%lu CNST_LIMB(0x", a);
+      mpz_out_str (stdout, 16, x);
+      printf (")\n");
+    }
+#endif
+
+  return 0;
+}
+
+int
+main (int argc, char *argv[])
+{
+  int nail_bits, limb_bits, numb_bits;
+
+  if (argc != 3)
+    {
+      fprintf (stderr, "Usage: gen-fac_ui limbbits nailbits\n");
+      exit (1);
+    }
+  limb_bits = atoi (argv[1]);
+  nail_bits = atoi (argv[2]);
+  numb_bits = limb_bits - nail_bits;
+  if (limb_bits < 2 || nail_bits < 0 || numb_bits < 1)
+    {
+      fprintf (stderr, "Invalid limb/nail bits %d,%d\n", limb_bits,
+              nail_bits);
+      exit (1);
+    }
+  gen_consts (numb_bits, nail_bits, limb_bits);
+  return 0;
+}
diff --git a/gen-fac_ui.c b/gen-fac_ui.c

deleted file mode 100644 (file)

index a9521ba..0000000
--- a/gen-fac_ui.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/* Generate mpz_fac_ui data.
-
-Copyright 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "dumbmp.c"
-
-
-/* sets x=y*(y+2)*(y+4)*....*(y+2*(z-1))       */
-void
-odd_products (mpz_t x, mpz_t y, int z)
-{
-  mpz_t t;
-
-  mpz_init_set (t, y);
-  mpz_set_ui (x, 1);
-  for (; z != 0; z--)
-    {
-      mpz_mul (x, x, t);
-      mpz_add_ui (t, t, 2);
-    }
-  mpz_clear (t);
-  return;
-}
-
-/* returns 0 on success                */
-int
-gen_consts (int numb, int nail, int limb)
-{
-  mpz_t x, y, z, t;
-  unsigned long a, b, first = 1;
-
-  printf ("/* This file is automatically generated by gen-fac_ui.c */\n\n");
-  printf ("#if GMP_NUMB_BITS != %d\n", numb);
-  printf ("Error , error this data is for %d GMP_NUMB_BITS only\n", numb);
-  printf ("#endif\n");
-  printf ("#if GMP_LIMB_BITS != %d\n", limb);
-  printf ("Error , error this data is for %d GMP_LIMB_BITS only\n", limb);
-  printf ("#endif\n");
-
-  printf
-    ("/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n");
-  printf
-    ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2),");
-  mpz_init_set_ui (x, 2);
-  for (b = 3;; b++)
-    {
-      mpz_mul_ui (x, x, b);    /* so b!=a       */
-      if (mpz_sizeinbase (x, 2) > numb)
-       break;
-      if (first)
-       {
-         first = 0;
-       }
-      else
-       {
-         printf ("),");
-       }
-      printf ("CNST_LIMB(0x");
-      mpz_out_str (stdout, 16, x);
-    }
-  printf (")\n");
-
-
-  mpz_set_ui (x, 1);
-  mpz_mul_2exp (x, x, limb + 1);       /* x=2^(limb+1)        */
-  mpz_init (y);
-  mpz_set_ui (y, 10000);
-  mpz_mul (x, x, y);           /* x=2^(limb+1)*10^4     */
-  mpz_set_ui (y, 27182);       /* exp(1)*10^4      */
-  mpz_tdiv_q (x, x, y);                /* x=2^(limb+1)/exp(1)        */
-  printf ("\n/* is 2^(GMP_LIMB_BITS+1)/exp(1) */\n");
-  printf ("#define FAC2OVERE CNST_LIMB(0x");
-  mpz_out_str (stdout, 16, x);
-  printf (")\n");
-
-
-  printf
-    ("\n/* FACMULn is largest odd x such that x*(x+2)*...*(x+2(n-1))<=2^GMP_NUMB_BITS-1 */\n\n");
-  mpz_init (z);
-  mpz_init (t);
-  for (a = 2; a <= 4; a++)
-    {
-      mpz_set_ui (x, 1);
-      mpz_mul_2exp (x, x, numb);
-      mpz_root (x, x, a);
-      /* so x is approx sol       */
-      if (mpz_even_p (x))
-       mpz_sub_ui (x, x, 1);
-      mpz_set_ui (y, 1);
-      mpz_mul_2exp (y, y, numb);
-      mpz_sub_ui (y, y, 1);
-      /* decrement x until we are <= real sol     */
-      do
-       {
-         mpz_sub_ui (x, x, 2);
-         odd_products (t, x, a);
-         if (mpz_cmp (t, y) <= 0)
-           break;
-       }
-      while (1);
-      /* increment x until > real sol     */
-      do
-       {
-         mpz_add_ui (x, x, 2);
-         odd_products (t, x, a);
-         if (mpz_cmp (t, y) > 0)
-           break;
-       }
-      while (1);
-      /* dec once to get real sol */
-      mpz_sub_ui (x, x, 2);
-      printf ("#define FACMUL%lu CNST_LIMB(0x", a);
-      mpz_out_str (stdout, 16, x);
-      printf (")\n");
-    }
-
-  return 0;
-}
-
-int
-main (int argc, char *argv[])
-{
-  int nail_bits, limb_bits, numb_bits;
-
-  if (argc != 3)
-    {
-      fprintf (stderr, "Usage: gen-fac_ui limbbits nailbits\n");
-      exit (1);
-    }
-  limb_bits = atoi (argv[1]);
-  nail_bits = atoi (argv[2]);
-  numb_bits = limb_bits - nail_bits;
-  if (limb_bits < 0 || nail_bits < 0 || numb_bits < 0)
-    {
-      fprintf (stderr, "Invalid limb/nail bits %d,%d\n", limb_bits,
-              nail_bits);
-      exit (1);
-    }
-  gen_consts (numb_bits, nail_bits, limb_bits);
-  return 0;
-}
diff --git a/gen-fib.c b/gen-fib.c

index fd7bb96a77b87eb15a3e8b02f9cf765df7a578b7..b6c77459237c21967476e7250754c1f82c40b010 100644 (file)
--- a/gen-fib.c
+++ b/gen-fib.c
@@ -1,6 +1,6 @@
  /* Generate Fibonacci table data.
  
-Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2004, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -18,7 +18,7 @@ You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
-#include "dumbmp.c"
+#include "bootstrap.c"
  
  mpz_t  *f;
  int    fnum, fib_limit, luc_limit;
@@ -34,7 +34,7 @@ generate (int numb_bits)
  
    /* fib(2n) > 2^n, so use 2n as a limit for the table size */
    falloc = 2 * numb_bits;
-  f = (mpz_t *) xmalloc (falloc * sizeof (*f));
+  f = xmalloc (falloc * sizeof (*f));
  
    mpz_init_set_ui (f[0], 1L);  /* F[-1] */
    mpz_init_set_ui (f[1], 0L);  /* F[0] */
@@ -43,7 +43,7 @@ generate (int numb_bits)
  
    for (i = 2; ; i++)
      {
-      ASSERT (i < falloc);
+      assert (i < falloc);
  
        /* F[i] = F[i-1] + F[i-2] */
        mpz_init (f[i]);
diff --git a/gen-jacobitab.c b/gen-jacobitab.c

new file mode 100644 (file)

index 0000000..a37ff22
--- /dev/null
+++ b/gen-jacobitab.c
@@ -0,0 +1,117 @@
+/* gen-jacobi.c
+
+   Contributed to the GNU project by Niels Möller.
+
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* Generate the lookup table needed for fast left-to-right computation
+   of the Jacobi symbol. */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static const struct
+{
+  unsigned char a;
+  unsigned char b;
+} decode_table[13] = {
+  /*  0 */ { 0, 1 },
+  /*  1 */ { 0, 3 },
+  /*  2 */ { 1, 1 },
+  /*  3 */ { 1, 3 },
+  /*  4 */ { 2, 1 },
+  /*  5 */ { 2, 3 },
+  /*  6 */ { 3, 1 },
+  /*  7 */ { 3, 3 }, /* d = 1 */
+  /*  8 */ { 1, 0 },
+  /*  9 */ { 1, 2 },
+  /* 10 */ { 3, 0 },
+  /* 11 */ { 3, 2 },
+  /* 12 */ { 3, 3 }, /* d = 0 */
+
+};
+#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
+#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
+
+#define JACOBI_E(bits) ((bits) & 1)
+#define JACOBI_D(bits) (((bits)>>1) == 7) /* Gives 0 for don't care states. */
+
+static unsigned
+encode (unsigned a, unsigned b, unsigned d)
+{
+  unsigned i;
+
+  assert (d < 2);
+  assert (a < 4);
+  assert (b < 4);
+  assert ( (a | b ) & 1);
+
+  if (a == 3 && b == 3)
+    return d ? 7 : 12;
+
+  for (i = 0; i < 12; i++)
+    if (decode_table[i].a == a
+       && decode_table[i].b == b)
+      return i;
+
+  abort ();
+}
+
+int
+main (int argc, char **argv)
+{
+  unsigned bits;
+
+  for (bits = 0; bits < 208; bits++)
+    {
+      unsigned e, a, b, d_old, d, q;
+
+      if (bits && !(bits & 0xf))
+       printf("\n");
+
+      q = bits & 3;
+      d = (bits >> 2) & 1;
+
+      e = JACOBI_E (bits >> 3);
+      a = JACOBI_A (bits >> 3);
+      b = JACOBI_B (bits >> 3);
+      d_old = JACOBI_D (bits >> 3);
+
+      if (d != d_old && a == 3 && b == 3)
+       e ^= 1;
+
+      if (d == 1)
+       {
+         if (b == 2)
+           e ^= (q & (a >> 1)) ^ (q >> 1);
+         a = (a - q * b) & 3;
+       }
+      else
+       {
+         if (a == 2)
+           e ^= (q & (b >> 1)) ^ (q >> 1);
+         b = (b - q * a) & 3;
+       }
+
+      printf("%2d,", (encode (a, b, d) << 1) | e);
+    }
+  printf("\n");
+
+  return 0;
+}
diff --git a/gen-psqr.c b/gen-psqr.c

index 9c33d7a681056e7849b6037073497e0ab5bd6520..513286c012334abbc3b319b3ec7c19ced125423c 100644 (file)
--- a/gen-psqr.c
+++ b/gen-psqr.c
@@ -1,6 +1,6 @@
  /* Generate perfect square testing data.
  
-Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
+Copyright 2002, 2003, 2004, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -20,7 +20,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include <stdio.h>
  #include <stdlib.h>
  
-#include "dumbmp.c"
+#include "bootstrap.c"
  
  
  /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
@@ -152,9 +152,9 @@ f_cmp_fraction (const void *parg, const void *qarg)
     accordingly.  */
  #define COLLAPSE_ELEMENT(array, idx, narray)                    \
    do {                                                          \
-    mem_copyi ((char *) &(array)[idx],                          \
-               (char *) &(array)[idx+1],                        \
-               ((narray)-((idx)+1)) * sizeof (array[0]));       \
+    memmove (&(array)[idx],                                    \
+            &(array)[idx+1],                                   \
+            ((narray)-((idx)+1)) * sizeof (array[0]));         \
      (narray)--;                                                 \
    } while (0)
  
@@ -173,7 +173,7 @@ mul_2exp_mod (int n, int p, int m)
  int
  neg_mod (int n, int m)
  {
-  ASSERT (n >= 0 && n < m);
+  assert (n >= 0 && n < m);
    return (n == 0 ? 0 : m-n);
  }
  
@@ -202,7 +202,7 @@ generate_sq_res_0x100 (int limb_bits)
    int  i, res;
  
    nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
-  sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
+  sq_res_0x100 = xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
  
    for (i = 0; i < nsq_res_0x100; i++)
      mpz_init_set_ui (sq_res_0x100[i], 0L);
@@ -233,9 +233,8 @@ generate_mod (int limb_bits, int nail_bits)
    /* no more than limb_bits many factors in a one limb modulus (and of
       course in reality nothing like that many) */
    factor_alloc = limb_bits;
-  factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
-  rawfactor = (struct rawfactor_t *)
-    xmalloc (factor_alloc * sizeof (*rawfactor));
+  factor = xmalloc (factor_alloc * sizeof (*factor));
+  rawfactor = xmalloc (factor_alloc * sizeof (*rawfactor));
  
    if (numb_bits % 4 != 0)
      {
@@ -301,7 +300,7 @@ generate_mod (int limb_bits, int nail_bits)
            }
          while (mpz_sgn (r) == 0);
  
-        ASSERT (nrawfactor < factor_alloc);
+        assert (nrawfactor < factor_alloc);
          rawfactor[nrawfactor].divisor = i;
          rawfactor[nrawfactor].multiplicity = multiplicity;
          nrawfactor++;
@@ -341,7 +340,7 @@ generate_mod (int limb_bits, int nail_bits)
              break;
            mpz_set (pp, new_pp);
  
-          ASSERT (nrawfactor < factor_alloc);
+          assert (nrawfactor < factor_alloc);
            rawfactor[nrawfactor].divisor = i;
            rawfactor[nrawfactor].multiplicity = 1;
            nrawfactor++;
@@ -377,7 +376,7 @@ generate_mod (int limb_bits, int nail_bits)
    for (i = 0; i < nrawfactor; i++)
      {
        int  j;
-      ASSERT (nfactor < factor_alloc);
+      assert (nfactor < factor_alloc);
        factor[nfactor].divisor = 1;
        for (j = 0; j < rawfactor[i].multiplicity; j++)
          factor[nfactor].divisor *= rawfactor[i].divisor;
diff --git a/gen-trialdivtab.c b/gen-trialdivtab.c

index 708253926fcbf2c00b5c3d19db1cdb14793279ae..d87265ec41c1cfe2aa8ae7a54817e6504163d955 100644 (file)
--- a/gen-trialdivtab.c
+++ b/gen-trialdivtab.c
@@ -2,7 +2,7 @@
  
     Contributed to the GNU project by Torbjorn Granlund.
  
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -17,7 +17,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  License for more details.
  
  You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.  */
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /*
    Generate tables for fast, division-free trial division for GMP.
@@ -36,7 +36,7 @@ along with the GNU MP Library.        If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdlib.h>
  #include <stdio.h>
-#include "dumbmp.c"
+#include "bootstrap.c"
  
  int sumspills (mpz_t, mpz_t *, int);
  void mpn_mod_1s_4p_cps (mpz_t [7], mpz_t);
@@ -203,20 +203,7 @@ main (int argc, char *argv[])
  unsigned long
  mpz_log2 (mpz_t x)
  {
-  mpz_t y;
-  unsigned long cnt;
-
-  mpz_init (y);
-  mpz_set (y, x);
-  cnt = 0;
-  while (mpz_sgn (y) != 0)
-    {
-      mpz_tdiv_q_2exp (y, y, 1);
-      cnt++;
-    }
-  mpz_clear (y);
-
-  return cnt;
+  return mpz_sgn (x) ? mpz_sizeinbase (x, 2) : 0;
  }
  
  void
diff --git a/gmp-h.in b/gmp-h.in

index 01757dfbc7a2d8771382529c03064218ded06de2..7deb67ad49755c3a98b23201be0b7adbce02baa0 100644 (file)
--- a/gmp-h.in
+++ b/gmp-h.in
@@ -1,8 +1,8 @@
  /* Definitions for GNU multiple precision functions.   -*- mode: c -*-
  
-Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002,
+2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -61,57 +61,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #endif
  
  
-/* __STDC__ - some ANSI compilers define this only to 0, hence the use of
-       "defined" and not "__STDC__-0".  In particular Sun workshop C 5.0
-       sets __STDC__ to 0, but requires "##" for token pasting.
-
-   _AIX - gnu ansidecl.h asserts that all known AIX compilers are ANSI but
-       don't always define __STDC__.
-
-   __DECC - current versions of DEC C (5.9 for instance) for alpha are ANSI,
-       but don't define __STDC__ in their default mode.  Don't know if old
-       versions might have been K&R, but let's not worry about that unless
-       someone is still using one.
-
-   _mips - gnu ansidecl.h says the RISC/OS MIPS compiler is ANSI in SVR4
-       mode, but doesn't define __STDC__.
-
-   _MSC_VER - Microsoft C is ANSI, but __STDC__ is undefined unless the /Za
-       option is given (in which case it's 1).
-
-   _WIN32 - tested for by gnu ansidecl.h, no doubt on the assumption that
-      all w32 compilers are ansi.
-
-   Note: This same set of tests is used by gen-psqr.c and
-   demos/expr/expr-impl.h, so if anything needs adding, then be sure to
-   update those too.  */
-
-#if  defined (__STDC__)                                 \
-  || defined (__cplusplus)                              \
-  || defined (_AIX)                                     \
-  || defined (__DECC)                                   \
-  || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
-  || defined (_MSC_VER)                                 \
-  || defined (_WIN32)
-#define __GMP_HAVE_CONST        1
-#define __GMP_HAVE_PROTOTYPES   1
-#define __GMP_HAVE_TOKEN_PASTE  1
-#else
-#define __GMP_HAVE_CONST        0
-#define __GMP_HAVE_PROTOTYPES   0
-#define __GMP_HAVE_TOKEN_PASTE  0
-#endif
-
-
-#if __GMP_HAVE_CONST
-#define __gmp_const   const
-#define __gmp_signed  signed
-#else
-#define __gmp_const
-#define __gmp_signed
-#endif
-
-
  /* __GMP_DECLSPEC supports Windows DLL versions of libgmp, and is empty in
     all other circumstances.
  
@@ -167,7 +116,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #endif
  
  #if __GMP_LIBGMP_DLL
-#if __GMP_WITHIN_GMP
+#ifdef __GMP_WITHIN_GMP
  /* compiling to go into a DLL libgmp */
  #define __GMP_DECLSPEC  __GMP_DECLSPEC_EXPORT
  #else
@@ -214,7 +163,7 @@ typedef __mpz_struct MP_INT;    /* gmp 1 source compatibility */
  typedef __mpz_struct mpz_t[1];
  
  typedef mp_limb_t *            mp_ptr;
-typedef __gmp_const mp_limb_t *        mp_srcptr;
+typedef const mp_limb_t *      mp_srcptr;
  #if defined (_CRAY) && ! defined (_CRAYMPP)
  /* plain `int' is much faster (48 bits) */
  #define __GMP_MP_SIZE_T_INT     1
@@ -271,18 +220,18 @@ typedef __gmp_randstate_struct gmp_randstate_t[1];
  
  /* Types for function declarations in gmp files.  */
  /* ??? Should not pollute user name space with these ??? */
-typedef __gmp_const __mpz_struct *mpz_srcptr;
+typedef const __mpz_struct *mpz_srcptr;
  typedef __mpz_struct *mpz_ptr;
-typedef __gmp_const __mpf_struct *mpf_srcptr;
+typedef const __mpf_struct *mpf_srcptr;
  typedef __mpf_struct *mpf_ptr;
-typedef __gmp_const __mpq_struct *mpq_srcptr;
+typedef const __mpq_struct *mpq_srcptr;
  typedef __mpq_struct *mpq_ptr;
  
  
  /* This is not wanted in mp.h, so put it outside the __GNU_MP__ common
     section. */
  #if __GMP_LIBGMP_DLL
-#if __GMP_WITHIN_GMPXX
+#ifdef __GMP_WITHIN_GMPXX
  /* compiling to go into a DLL libgmpxx */
  #define __GMP_DECLSPEC_XX  __GMP_DECLSPEC_EXPORT
  #else
@@ -295,18 +244,8 @@ typedef __mpq_struct *mpq_ptr;
  #endif
  
  
-#if __GMP_HAVE_PROTOTYPES
-#define __GMP_PROTO(x) x
-#else
-#define __GMP_PROTO(x) ()
-#endif
-
  #ifndef __MPN
-#if __GMP_HAVE_TOKEN_PASTE
  #define __MPN(x) __gmpn_##x
-#else
-#define __MPN(x) __gmpn_/**/x
-#endif
  #endif
  
  /* For reference, "defined(EOF)" cannot be used here.  In g++ 2.95.4,
@@ -420,7 +359,8 @@ typedef __mpq_struct *mpq_ptr;
      GCC 4.3 and above with -std=c99 or -std=gnu99 implements ISO C99
      inline semantics, unless -fgnu89-inline is used.  */
  #ifdef __GNUC__
-#if (defined __GNUC_STDC_INLINE__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 2)
+#if (defined __GNUC_STDC_INLINE__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 2) \
+  || (defined __GNUC_GNU_INLINE__ && defined __cplusplus)
  #define __GMP_EXTERN_INLINE extern __inline__ __attribute__ ((__gnu_inline__))
  #else
  #define __GMP_EXTERN_INLINE      extern __inline__
@@ -538,116 +478,114 @@ using std::FILE;
  #endif
  
  #define mp_set_memory_functions __gmp_set_memory_functions
-__GMP_DECLSPEC void mp_set_memory_functions __GMP_PROTO ((void *(*) (size_t),
+__GMP_DECLSPEC void mp_set_memory_functions (void *(*) (size_t),
                                       void *(*) (void *, size_t, size_t),
-                                     void (*) (void *, size_t))) __GMP_NOTHROW;
+                                     void (*) (void *, size_t)) __GMP_NOTHROW;
  
  #define mp_get_memory_functions __gmp_get_memory_functions
-__GMP_DECLSPEC void mp_get_memory_functions __GMP_PROTO ((void *(**) (size_t),
+__GMP_DECLSPEC void mp_get_memory_functions (void *(**) (size_t),
                                        void *(**) (void *, size_t, size_t),
-                                      void (**) (void *, size_t))) __GMP_NOTHROW;
+                                      void (**) (void *, size_t)) __GMP_NOTHROW;
  
  #define mp_bits_per_limb __gmp_bits_per_limb
-__GMP_DECLSPEC extern __gmp_const int mp_bits_per_limb;
+__GMP_DECLSPEC extern const int mp_bits_per_limb;
  
  #define gmp_errno __gmp_errno
  __GMP_DECLSPEC extern int gmp_errno;
  
  #define gmp_version __gmp_version
-__GMP_DECLSPEC extern __gmp_const char * __gmp_const gmp_version;
+__GMP_DECLSPEC extern const char * const gmp_version;
  
  
  /**************** Random number routines.  ****************/
  
  /* obsolete */
  #define gmp_randinit __gmp_randinit
-__GMP_DECLSPEC void gmp_randinit __GMP_PROTO ((gmp_randstate_t, gmp_randalg_t, ...));
+__GMP_DECLSPEC void gmp_randinit (gmp_randstate_t, gmp_randalg_t, ...);
  
  #define gmp_randinit_default __gmp_randinit_default
-__GMP_DECLSPEC void gmp_randinit_default __GMP_PROTO ((gmp_randstate_t));
+__GMP_DECLSPEC void gmp_randinit_default (gmp_randstate_t);
  
  #define gmp_randinit_lc_2exp __gmp_randinit_lc_2exp
-__GMP_DECLSPEC void gmp_randinit_lc_2exp __GMP_PROTO ((gmp_randstate_t,
-                                                      mpz_srcptr, unsigned long int,
-                                                      mp_bitcnt_t));
+__GMP_DECLSPEC void gmp_randinit_lc_2exp (gmp_randstate_t, mpz_srcptr, unsigned long int, mp_bitcnt_t);
  
  #define gmp_randinit_lc_2exp_size __gmp_randinit_lc_2exp_size
-__GMP_DECLSPEC int gmp_randinit_lc_2exp_size __GMP_PROTO ((gmp_randstate_t, mp_bitcnt_t));
+__GMP_DECLSPEC int gmp_randinit_lc_2exp_size (gmp_randstate_t, mp_bitcnt_t);
  
  #define gmp_randinit_mt __gmp_randinit_mt
-__GMP_DECLSPEC void gmp_randinit_mt __GMP_PROTO ((gmp_randstate_t));
+__GMP_DECLSPEC void gmp_randinit_mt (gmp_randstate_t);
  
  #define gmp_randinit_set __gmp_randinit_set
-__GMP_DECLSPEC void gmp_randinit_set __GMP_PROTO ((gmp_randstate_t, __gmp_const __gmp_randstate_struct *));
+__GMP_DECLSPEC void gmp_randinit_set (gmp_randstate_t, const __gmp_randstate_struct *);
  
  #define gmp_randseed __gmp_randseed
-__GMP_DECLSPEC void gmp_randseed __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
+__GMP_DECLSPEC void gmp_randseed (gmp_randstate_t, mpz_srcptr);
  
  #define gmp_randseed_ui __gmp_randseed_ui
-__GMP_DECLSPEC void gmp_randseed_ui __GMP_PROTO ((gmp_randstate_t, unsigned long int));
+__GMP_DECLSPEC void gmp_randseed_ui (gmp_randstate_t, unsigned long int);
  
  #define gmp_randclear __gmp_randclear
-__GMP_DECLSPEC void gmp_randclear __GMP_PROTO ((gmp_randstate_t));
+__GMP_DECLSPEC void gmp_randclear (gmp_randstate_t);
  
  #define gmp_urandomb_ui __gmp_urandomb_ui
-__GMP_DECLSPEC unsigned long gmp_urandomb_ui __GMP_PROTO ((gmp_randstate_t, unsigned long));
+__GMP_DECLSPEC unsigned long gmp_urandomb_ui (gmp_randstate_t, unsigned long);
  
  #define gmp_urandomm_ui __gmp_urandomm_ui
-__GMP_DECLSPEC unsigned long gmp_urandomm_ui __GMP_PROTO ((gmp_randstate_t, unsigned long));
+__GMP_DECLSPEC unsigned long gmp_urandomm_ui (gmp_randstate_t, unsigned long);
  
  
  /**************** Formatted output routines.  ****************/
  
  #define gmp_asprintf __gmp_asprintf
-__GMP_DECLSPEC int gmp_asprintf __GMP_PROTO ((char **, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_asprintf (char **, const char *, ...);
  
  #define gmp_fprintf __gmp_fprintf
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC int gmp_fprintf __GMP_PROTO ((FILE *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_fprintf (FILE *, const char *, ...);
  #endif
  
  #define gmp_obstack_printf __gmp_obstack_printf
  #if defined (_GMP_H_HAVE_OBSTACK)
-__GMP_DECLSPEC int gmp_obstack_printf __GMP_PROTO ((struct obstack *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_obstack_printf (struct obstack *, const char *, ...);
  #endif
  
  #define gmp_obstack_vprintf __gmp_obstack_vprintf
  #if defined (_GMP_H_HAVE_OBSTACK) && defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_obstack_vprintf __GMP_PROTO ((struct obstack *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_obstack_vprintf (struct obstack *, const char *, va_list);
  #endif
  
  #define gmp_printf __gmp_printf
-__GMP_DECLSPEC int gmp_printf __GMP_PROTO ((__gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_printf (const char *, ...);
  
  #define gmp_snprintf __gmp_snprintf
-__GMP_DECLSPEC int gmp_snprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_snprintf (char *, size_t, const char *, ...);
  
  #define gmp_sprintf __gmp_sprintf
-__GMP_DECLSPEC int gmp_sprintf __GMP_PROTO ((char *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_sprintf (char *, const char *, ...);
  
  #define gmp_vasprintf __gmp_vasprintf
  #if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vasprintf __GMP_PROTO ((char **, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vasprintf (char **, const char *, va_list);
  #endif
  
  #define gmp_vfprintf __gmp_vfprintf
  #if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vfprintf __GMP_PROTO ((FILE *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vfprintf (FILE *, const char *, va_list);
  #endif
  
  #define gmp_vprintf __gmp_vprintf
  #if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vprintf __GMP_PROTO ((__gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vprintf (const char *, va_list);
  #endif
  
  #define gmp_vsnprintf __gmp_vsnprintf
  #if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vsnprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vsnprintf (char *, size_t, const char *, va_list);
  #endif
  
  #define gmp_vsprintf __gmp_vsprintf
  #if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vsprintf __GMP_PROTO ((char *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vsprintf (char *, const char *, va_list);
  #endif
  
  
@@ -655,28 +593,28 @@ __GMP_DECLSPEC int gmp_vsprintf __GMP_PROTO ((char *, __gmp_const char *, va_lis
  
  #define gmp_fscanf __gmp_fscanf
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC int gmp_fscanf __GMP_PROTO ((FILE *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_fscanf (FILE *, const char *, ...);
  #endif
  
  #define gmp_scanf __gmp_scanf
-__GMP_DECLSPEC int gmp_scanf __GMP_PROTO ((__gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_scanf (const char *, ...);
  
  #define gmp_sscanf __gmp_sscanf
-__GMP_DECLSPEC int gmp_sscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_sscanf (const char *, const char *, ...);
  
  #define gmp_vfscanf __gmp_vfscanf
  #if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vfscanf __GMP_PROTO ((FILE *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vfscanf (FILE *, const char *, va_list);
  #endif
  
  #define gmp_vscanf __gmp_vscanf
  #if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vscanf __GMP_PROTO ((__gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vscanf (const char *, va_list);
  #endif
  
  #define gmp_vsscanf __gmp_vsscanf
  #if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vsscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vsscanf (const char *, const char *, va_list);
  #endif
  
  
@@ -684,811 +622,820 @@ __GMP_DECLSPEC int gmp_vsscanf __GMP_PROTO ((__gmp_const char *, __gmp_const cha
  
  #define _mpz_realloc __gmpz_realloc
  #define mpz_realloc __gmpz_realloc
-__GMP_DECLSPEC void *_mpz_realloc __GMP_PROTO ((mpz_ptr, mp_size_t));
+__GMP_DECLSPEC void *_mpz_realloc (mpz_ptr, mp_size_t);
  
  #define mpz_abs __gmpz_abs
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_abs)
-__GMP_DECLSPEC void mpz_abs __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_abs (mpz_ptr, mpz_srcptr);
  #endif
  
  #define mpz_add __gmpz_add
-__GMP_DECLSPEC void mpz_add __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_add (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_add_ui __gmpz_add_ui
-__GMP_DECLSPEC void mpz_add_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_add_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_addmul __gmpz_addmul
-__GMP_DECLSPEC void mpz_addmul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_addmul (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_addmul_ui __gmpz_addmul_ui
-__GMP_DECLSPEC void mpz_addmul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_addmul_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_and __gmpz_and
-__GMP_DECLSPEC void mpz_and __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_and (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_array_init __gmpz_array_init
-__GMP_DECLSPEC void mpz_array_init __GMP_PROTO ((mpz_ptr, mp_size_t, mp_size_t));
+__GMP_DECLSPEC void mpz_array_init (mpz_ptr, mp_size_t, mp_size_t);
  
  #define mpz_bin_ui __gmpz_bin_ui
-__GMP_DECLSPEC void mpz_bin_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_bin_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_bin_uiui __gmpz_bin_uiui
-__GMP_DECLSPEC void mpz_bin_uiui __GMP_PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+__GMP_DECLSPEC void mpz_bin_uiui (mpz_ptr, unsigned long int, unsigned long int);
  
  #define mpz_cdiv_q __gmpz_cdiv_q
-__GMP_DECLSPEC void mpz_cdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_cdiv_q (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_cdiv_q_2exp __gmpz_cdiv_q_2exp
-__GMP_DECLSPEC void mpz_cdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_cdiv_q_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
  
  #define mpz_cdiv_q_ui __gmpz_cdiv_q_ui
-__GMP_DECLSPEC unsigned long int mpz_cdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_cdiv_q_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_cdiv_qr __gmpz_cdiv_qr
-__GMP_DECLSPEC void mpz_cdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_cdiv_qr (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_cdiv_qr_ui __gmpz_cdiv_qr_ui
-__GMP_DECLSPEC unsigned long int mpz_cdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_cdiv_qr_ui (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_cdiv_r __gmpz_cdiv_r
-__GMP_DECLSPEC void mpz_cdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_cdiv_r (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_cdiv_r_2exp __gmpz_cdiv_r_2exp
-__GMP_DECLSPEC void mpz_cdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_cdiv_r_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
  
  #define mpz_cdiv_r_ui __gmpz_cdiv_r_ui
-__GMP_DECLSPEC unsigned long int mpz_cdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_cdiv_r_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_cdiv_ui __gmpz_cdiv_ui
-__GMP_DECLSPEC unsigned long int mpz_cdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long int mpz_cdiv_ui (mpz_srcptr, unsigned long int) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_clear __gmpz_clear
-__GMP_DECLSPEC void mpz_clear __GMP_PROTO ((mpz_ptr));
+__GMP_DECLSPEC void mpz_clear (mpz_ptr);
  
  #define mpz_clears __gmpz_clears
-__GMP_DECLSPEC void mpz_clears __GMP_PROTO ((mpz_ptr, ...));
+__GMP_DECLSPEC void mpz_clears (mpz_ptr, ...);
  
  #define mpz_clrbit __gmpz_clrbit
-__GMP_DECLSPEC void mpz_clrbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_clrbit (mpz_ptr, mp_bitcnt_t);
  
  #define mpz_cmp __gmpz_cmp
-__GMP_DECLSPEC int mpz_cmp __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmp (mpz_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_cmp_d __gmpz_cmp_d
-__GMP_DECLSPEC int mpz_cmp_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmp_d (mpz_srcptr, double) __GMP_ATTRIBUTE_PURE;
  
  #define _mpz_cmp_si __gmpz_cmp_si
-__GMP_DECLSPEC int _mpz_cmp_si __GMP_PROTO ((mpz_srcptr, signed long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int _mpz_cmp_si (mpz_srcptr, signed long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define _mpz_cmp_ui __gmpz_cmp_ui
-__GMP_DECLSPEC int _mpz_cmp_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int _mpz_cmp_ui (mpz_srcptr, unsigned long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_cmpabs __gmpz_cmpabs
-__GMP_DECLSPEC int mpz_cmpabs __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmpabs (mpz_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_cmpabs_d __gmpz_cmpabs_d
-__GMP_DECLSPEC int mpz_cmpabs_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmpabs_d (mpz_srcptr, double) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_cmpabs_ui __gmpz_cmpabs_ui
-__GMP_DECLSPEC int mpz_cmpabs_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmpabs_ui (mpz_srcptr, unsigned long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_com __gmpz_com
-__GMP_DECLSPEC void mpz_com __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_com (mpz_ptr, mpz_srcptr);
  
  #define mpz_combit __gmpz_combit
-__GMP_DECLSPEC void mpz_combit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_combit (mpz_ptr, mp_bitcnt_t);
  
  #define mpz_congruent_p __gmpz_congruent_p
-__GMP_DECLSPEC int mpz_congruent_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_congruent_p (mpz_srcptr, mpz_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_congruent_2exp_p __gmpz_congruent_2exp_p
-__GMP_DECLSPEC int mpz_congruent_2exp_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_congruent_2exp_p (mpz_srcptr, mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_congruent_ui_p __gmpz_congruent_ui_p
-__GMP_DECLSPEC int mpz_congruent_ui_p __GMP_PROTO ((mpz_srcptr, unsigned long, unsigned long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_congruent_ui_p (mpz_srcptr, unsigned long, unsigned long) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_divexact __gmpz_divexact
-__GMP_DECLSPEC void mpz_divexact __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_divexact (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_divexact_ui __gmpz_divexact_ui
-__GMP_DECLSPEC void mpz_divexact_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long));
+__GMP_DECLSPEC void mpz_divexact_ui (mpz_ptr, mpz_srcptr, unsigned long);
  
  #define mpz_divisible_p __gmpz_divisible_p
-__GMP_DECLSPEC int mpz_divisible_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_divisible_p (mpz_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_divisible_ui_p __gmpz_divisible_ui_p
-__GMP_DECLSPEC int mpz_divisible_ui_p __GMP_PROTO ((mpz_srcptr, unsigned long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_divisible_ui_p (mpz_srcptr, unsigned long) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_divisible_2exp_p __gmpz_divisible_2exp_p
-__GMP_DECLSPEC int mpz_divisible_2exp_p __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_divisible_2exp_p (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_dump __gmpz_dump
-__GMP_DECLSPEC void mpz_dump __GMP_PROTO ((mpz_srcptr));
+__GMP_DECLSPEC void mpz_dump (mpz_srcptr);
  
  #define mpz_export __gmpz_export
-__GMP_DECLSPEC void *mpz_export __GMP_PROTO ((void *, size_t *, int, size_t, int, size_t, mpz_srcptr));
+__GMP_DECLSPEC void *mpz_export (void *, size_t *, int, size_t, int, size_t, mpz_srcptr);
  
  #define mpz_fac_ui __gmpz_fac_ui
-__GMP_DECLSPEC void mpz_fac_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_fac_ui (mpz_ptr, unsigned long int);
+
+#define mpz_2fac_ui __gmpz_2fac_ui
+__GMP_DECLSPEC void mpz_2fac_ui (mpz_ptr, unsigned long int);
+
+#define mpz_mfac_uiui __gmpz_mfac_uiui
+__GMP_DECLSPEC void mpz_mfac_uiui (mpz_ptr, unsigned long int, unsigned long int);
+
+#define mpz_primorial_ui __gmpz_primorial_ui
+__GMP_DECLSPEC void mpz_primorial_ui (mpz_ptr, unsigned long int);
  
  #define mpz_fdiv_q __gmpz_fdiv_q
-__GMP_DECLSPEC void mpz_fdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_fdiv_q (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_fdiv_q_2exp __gmpz_fdiv_q_2exp
-__GMP_DECLSPEC void mpz_fdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_fdiv_q_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
  
  #define mpz_fdiv_q_ui __gmpz_fdiv_q_ui
-__GMP_DECLSPEC unsigned long int mpz_fdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_fdiv_q_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_fdiv_qr __gmpz_fdiv_qr
-__GMP_DECLSPEC void mpz_fdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_fdiv_qr (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_fdiv_qr_ui __gmpz_fdiv_qr_ui
-__GMP_DECLSPEC unsigned long int mpz_fdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_fdiv_qr_ui (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_fdiv_r __gmpz_fdiv_r
-__GMP_DECLSPEC void mpz_fdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_fdiv_r (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_fdiv_r_2exp __gmpz_fdiv_r_2exp
-__GMP_DECLSPEC void mpz_fdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_fdiv_r_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
  
  #define mpz_fdiv_r_ui __gmpz_fdiv_r_ui
-__GMP_DECLSPEC unsigned long int mpz_fdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_fdiv_r_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_fdiv_ui __gmpz_fdiv_ui
-__GMP_DECLSPEC unsigned long int mpz_fdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long int mpz_fdiv_ui (mpz_srcptr, unsigned long int) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_fib_ui __gmpz_fib_ui
-__GMP_DECLSPEC void mpz_fib_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_fib_ui (mpz_ptr, unsigned long int);
  
  #define mpz_fib2_ui __gmpz_fib2_ui
-__GMP_DECLSPEC void mpz_fib2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_fib2_ui (mpz_ptr, mpz_ptr, unsigned long int);
  
  #define mpz_fits_sint_p __gmpz_fits_sint_p
-__GMP_DECLSPEC int mpz_fits_sint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_sint_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_fits_slong_p __gmpz_fits_slong_p
-__GMP_DECLSPEC int mpz_fits_slong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_slong_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_fits_sshort_p __gmpz_fits_sshort_p
-__GMP_DECLSPEC int mpz_fits_sshort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_sshort_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_fits_uint_p __gmpz_fits_uint_p
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_uint_p)
-__GMP_DECLSPEC int mpz_fits_uint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_uint_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  #endif
  
  #define mpz_fits_ulong_p __gmpz_fits_ulong_p
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ulong_p)
-__GMP_DECLSPEC int mpz_fits_ulong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_ulong_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  #endif
  
  #define mpz_fits_ushort_p __gmpz_fits_ushort_p
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ushort_p)
-__GMP_DECLSPEC int mpz_fits_ushort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_ushort_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  #endif
  
  #define mpz_gcd __gmpz_gcd
-__GMP_DECLSPEC void mpz_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_gcd_ui __gmpz_gcd_ui
-__GMP_DECLSPEC unsigned long int mpz_gcd_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_gcd_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_gcdext __gmpz_gcdext
-__GMP_DECLSPEC void mpz_gcdext __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_gcdext (mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_get_d __gmpz_get_d
-__GMP_DECLSPEC double mpz_get_d __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC double mpz_get_d (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_get_d_2exp __gmpz_get_d_2exp
-__GMP_DECLSPEC double mpz_get_d_2exp __GMP_PROTO ((signed long int *, mpz_srcptr));
+__GMP_DECLSPEC double mpz_get_d_2exp (signed long int *, mpz_srcptr);
  
  #define mpz_get_si __gmpz_get_si
-__GMP_DECLSPEC /* signed */ long int mpz_get_si __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC /* signed */ long int mpz_get_si (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_get_str __gmpz_get_str
-__GMP_DECLSPEC char *mpz_get_str __GMP_PROTO ((char *, int, mpz_srcptr));
+__GMP_DECLSPEC char *mpz_get_str (char *, int, mpz_srcptr);
  
  #define mpz_get_ui __gmpz_get_ui
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_get_ui)
-__GMP_DECLSPEC unsigned long int mpz_get_ui __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long int mpz_get_ui (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  #endif
  
  #define mpz_getlimbn __gmpz_getlimbn
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_getlimbn)
-__GMP_DECLSPEC mp_limb_t mpz_getlimbn __GMP_PROTO ((mpz_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpz_getlimbn (mpz_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  #endif
  
  #define mpz_hamdist __gmpz_hamdist
-__GMP_DECLSPEC mp_bitcnt_t mpz_hamdist __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpz_hamdist (mpz_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_import __gmpz_import
-__GMP_DECLSPEC void mpz_import __GMP_PROTO ((mpz_ptr, size_t, int, size_t, int, size_t, __gmp_const void *));
+__GMP_DECLSPEC void mpz_import (mpz_ptr, size_t, int, size_t, int, size_t, const void *);
  
  #define mpz_init __gmpz_init
-__GMP_DECLSPEC void mpz_init __GMP_PROTO ((mpz_ptr));
+__GMP_DECLSPEC void mpz_init (mpz_ptr);
  
  #define mpz_init2 __gmpz_init2
-__GMP_DECLSPEC void mpz_init2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_init2 (mpz_ptr, mp_bitcnt_t);
  
  #define mpz_inits __gmpz_inits
-__GMP_DECLSPEC void mpz_inits __GMP_PROTO ((mpz_ptr, ...));
+__GMP_DECLSPEC void mpz_inits (mpz_ptr, ...);
  
  #define mpz_init_set __gmpz_init_set
-__GMP_DECLSPEC void mpz_init_set __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_init_set (mpz_ptr, mpz_srcptr);
  
  #define mpz_init_set_d __gmpz_init_set_d
-__GMP_DECLSPEC void mpz_init_set_d __GMP_PROTO ((mpz_ptr, double));
+__GMP_DECLSPEC void mpz_init_set_d (mpz_ptr, double);
  
  #define mpz_init_set_si __gmpz_init_set_si
-__GMP_DECLSPEC void mpz_init_set_si __GMP_PROTO ((mpz_ptr, signed long int));
+__GMP_DECLSPEC void mpz_init_set_si (mpz_ptr, signed long int);
  
  #define mpz_init_set_str __gmpz_init_set_str
-__GMP_DECLSPEC int mpz_init_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpz_init_set_str (mpz_ptr, const char *, int);
  
  #define mpz_init_set_ui __gmpz_init_set_ui
-__GMP_DECLSPEC void mpz_init_set_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_init_set_ui (mpz_ptr, unsigned long int);
  
  #define mpz_inp_raw __gmpz_inp_raw
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpz_inp_raw __GMP_PROTO ((mpz_ptr, FILE *));
+__GMP_DECLSPEC size_t mpz_inp_raw (mpz_ptr, FILE *);
  #endif
  
  #define mpz_inp_str __gmpz_inp_str
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpz_inp_str __GMP_PROTO ((mpz_ptr, FILE *, int));
+__GMP_DECLSPEC size_t mpz_inp_str (mpz_ptr, FILE *, int);
  #endif
  
  #define mpz_invert __gmpz_invert
-__GMP_DECLSPEC int mpz_invert __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC int mpz_invert (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_ior __gmpz_ior
-__GMP_DECLSPEC void mpz_ior __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_ior (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_jacobi __gmpz_jacobi
-__GMP_DECLSPEC int mpz_jacobi __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_jacobi (mpz_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_kronecker mpz_jacobi  /* alias */
  
  #define mpz_kronecker_si __gmpz_kronecker_si
-__GMP_DECLSPEC int mpz_kronecker_si __GMP_PROTO ((mpz_srcptr, long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_kronecker_si (mpz_srcptr, long) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_kronecker_ui __gmpz_kronecker_ui
-__GMP_DECLSPEC int mpz_kronecker_ui __GMP_PROTO ((mpz_srcptr, unsigned long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_kronecker_ui (mpz_srcptr, unsigned long) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_si_kronecker __gmpz_si_kronecker
-__GMP_DECLSPEC int mpz_si_kronecker __GMP_PROTO ((long, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_si_kronecker (long, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_ui_kronecker __gmpz_ui_kronecker
-__GMP_DECLSPEC int mpz_ui_kronecker __GMP_PROTO ((unsigned long, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_ui_kronecker (unsigned long, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_lcm __gmpz_lcm
-__GMP_DECLSPEC void mpz_lcm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_lcm (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_lcm_ui __gmpz_lcm_ui
-__GMP_DECLSPEC void mpz_lcm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long));
+__GMP_DECLSPEC void mpz_lcm_ui (mpz_ptr, mpz_srcptr, unsigned long);
  
  #define mpz_legendre mpz_jacobi  /* alias */
  
  #define mpz_lucnum_ui __gmpz_lucnum_ui
-__GMP_DECLSPEC void mpz_lucnum_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_lucnum_ui (mpz_ptr, unsigned long int);
  
  #define mpz_lucnum2_ui __gmpz_lucnum2_ui
-__GMP_DECLSPEC void mpz_lucnum2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_lucnum2_ui (mpz_ptr, mpz_ptr, unsigned long int);
  
  #define mpz_millerrabin __gmpz_millerrabin
-__GMP_DECLSPEC int mpz_millerrabin __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_millerrabin (mpz_srcptr, int) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_mod __gmpz_mod
-__GMP_DECLSPEC void mpz_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_mod (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_mod_ui mpz_fdiv_r_ui /* same as fdiv_r because divisor unsigned */
  
  #define mpz_mul __gmpz_mul
-__GMP_DECLSPEC void mpz_mul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_mul (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_mul_2exp __gmpz_mul_2exp
-__GMP_DECLSPEC void mpz_mul_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_mul_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
  
  #define mpz_mul_si __gmpz_mul_si
-__GMP_DECLSPEC void mpz_mul_si __GMP_PROTO ((mpz_ptr, mpz_srcptr, long int));
+__GMP_DECLSPEC void mpz_mul_si (mpz_ptr, mpz_srcptr, long int);
  
  #define mpz_mul_ui __gmpz_mul_ui
-__GMP_DECLSPEC void mpz_mul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_mul_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_neg __gmpz_neg
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_neg)
-__GMP_DECLSPEC void mpz_neg __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_neg (mpz_ptr, mpz_srcptr);
  #endif
  
  #define mpz_nextprime __gmpz_nextprime
-__GMP_DECLSPEC void mpz_nextprime __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_nextprime (mpz_ptr, mpz_srcptr);
  
  #define mpz_out_raw __gmpz_out_raw
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpz_out_raw __GMP_PROTO ((FILE *, mpz_srcptr));
+__GMP_DECLSPEC size_t mpz_out_raw (FILE *, mpz_srcptr);
  #endif
  
  #define mpz_out_str __gmpz_out_str
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpz_out_str __GMP_PROTO ((FILE *, int, mpz_srcptr));
+__GMP_DECLSPEC size_t mpz_out_str (FILE *, int, mpz_srcptr);
  #endif
  
  #define mpz_perfect_power_p __gmpz_perfect_power_p
-__GMP_DECLSPEC int mpz_perfect_power_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_perfect_power_p (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_perfect_square_p __gmpz_perfect_square_p
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_perfect_square_p)
-__GMP_DECLSPEC int mpz_perfect_square_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_perfect_square_p (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
  #endif
  
  #define mpz_popcount __gmpz_popcount
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_popcount)
-__GMP_DECLSPEC mp_bitcnt_t mpz_popcount __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpz_popcount (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  #endif
  
  #define mpz_pow_ui __gmpz_pow_ui
-__GMP_DECLSPEC void mpz_pow_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_pow_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_powm __gmpz_powm
-__GMP_DECLSPEC void mpz_powm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_powm (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_powm_sec __gmpz_powm_sec
-__GMP_DECLSPEC void mpz_powm_sec __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_powm_sec (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_powm_ui __gmpz_powm_ui
-__GMP_DECLSPEC void mpz_powm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr));
+__GMP_DECLSPEC void mpz_powm_ui (mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr);
  
  #define mpz_probab_prime_p __gmpz_probab_prime_p
-__GMP_DECLSPEC int mpz_probab_prime_p __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_probab_prime_p (mpz_srcptr, int) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_random __gmpz_random
-__GMP_DECLSPEC void mpz_random __GMP_PROTO ((mpz_ptr, mp_size_t));
+__GMP_DECLSPEC void mpz_random (mpz_ptr, mp_size_t);
  
  #define mpz_random2 __gmpz_random2
-__GMP_DECLSPEC void mpz_random2 __GMP_PROTO ((mpz_ptr, mp_size_t));
+__GMP_DECLSPEC void mpz_random2 (mpz_ptr, mp_size_t);
  
  #define mpz_realloc2 __gmpz_realloc2
-__GMP_DECLSPEC void mpz_realloc2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_realloc2 (mpz_ptr, mp_bitcnt_t);
  
  #define mpz_remove __gmpz_remove
-__GMP_DECLSPEC mp_bitcnt_t mpz_remove __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC mp_bitcnt_t mpz_remove (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_root __gmpz_root
-__GMP_DECLSPEC int mpz_root __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC int mpz_root (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_rootrem __gmpz_rootrem
-__GMP_DECLSPEC void mpz_rootrem __GMP_PROTO ((mpz_ptr,mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_rootrem (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_rrandomb __gmpz_rrandomb
-__GMP_DECLSPEC void mpz_rrandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_rrandomb (mpz_ptr, gmp_randstate_t, mp_bitcnt_t);
  
  #define mpz_scan0 __gmpz_scan0
-__GMP_DECLSPEC mp_bitcnt_t mpz_scan0 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan0 (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_scan1 __gmpz_scan1
-__GMP_DECLSPEC mp_bitcnt_t mpz_scan1 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan1 (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_set __gmpz_set
-__GMP_DECLSPEC void mpz_set __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_set (mpz_ptr, mpz_srcptr);
  
  #define mpz_set_d __gmpz_set_d
-__GMP_DECLSPEC void mpz_set_d __GMP_PROTO ((mpz_ptr, double));
+__GMP_DECLSPEC void mpz_set_d (mpz_ptr, double);
  
  #define mpz_set_f __gmpz_set_f
-__GMP_DECLSPEC void mpz_set_f __GMP_PROTO ((mpz_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpz_set_f (mpz_ptr, mpf_srcptr);
  
  #define mpz_set_q __gmpz_set_q
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_set_q)
-__GMP_DECLSPEC void mpz_set_q __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpz_set_q (mpz_ptr, mpq_srcptr);
  #endif
  
  #define mpz_set_si __gmpz_set_si
-__GMP_DECLSPEC void mpz_set_si __GMP_PROTO ((mpz_ptr, signed long int));
+__GMP_DECLSPEC void mpz_set_si (mpz_ptr, signed long int);
  
  #define mpz_set_str __gmpz_set_str
-__GMP_DECLSPEC int mpz_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpz_set_str (mpz_ptr, const char *, int);
  
  #define mpz_set_ui __gmpz_set_ui
-__GMP_DECLSPEC void mpz_set_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_set_ui (mpz_ptr, unsigned long int);
  
  #define mpz_setbit __gmpz_setbit
-__GMP_DECLSPEC void mpz_setbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_setbit (mpz_ptr, mp_bitcnt_t);
  
  #define mpz_size __gmpz_size
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_size)
-__GMP_DECLSPEC size_t mpz_size __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC size_t mpz_size (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  #endif
  
  #define mpz_sizeinbase __gmpz_sizeinbase
-__GMP_DECLSPEC size_t mpz_sizeinbase __GMP_PROTO ((mpz_srcptr, int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC size_t mpz_sizeinbase (mpz_srcptr, int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_sqrt __gmpz_sqrt
-__GMP_DECLSPEC void mpz_sqrt __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_sqrt (mpz_ptr, mpz_srcptr);
  
  #define mpz_sqrtrem __gmpz_sqrtrem
-__GMP_DECLSPEC void mpz_sqrtrem __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_sqrtrem (mpz_ptr, mpz_ptr, mpz_srcptr);
  
  #define mpz_sub __gmpz_sub
-__GMP_DECLSPEC void mpz_sub __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_sub (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_sub_ui __gmpz_sub_ui
-__GMP_DECLSPEC void mpz_sub_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_sub_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_ui_sub __gmpz_ui_sub
-__GMP_DECLSPEC void mpz_ui_sub __GMP_PROTO ((mpz_ptr, unsigned long int, mpz_srcptr));
+__GMP_DECLSPEC void mpz_ui_sub (mpz_ptr, unsigned long int, mpz_srcptr);
  
  #define mpz_submul __gmpz_submul
-__GMP_DECLSPEC void mpz_submul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_submul (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_submul_ui __gmpz_submul_ui
-__GMP_DECLSPEC void mpz_submul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_submul_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_swap __gmpz_swap
-__GMP_DECLSPEC void mpz_swap __GMP_PROTO ((mpz_ptr, mpz_ptr)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpz_swap (mpz_ptr, mpz_ptr) __GMP_NOTHROW;
  
  #define mpz_tdiv_ui __gmpz_tdiv_ui
-__GMP_DECLSPEC unsigned long int mpz_tdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long int mpz_tdiv_ui (mpz_srcptr, unsigned long int) __GMP_ATTRIBUTE_PURE;
  
  #define mpz_tdiv_q __gmpz_tdiv_q
-__GMP_DECLSPEC void mpz_tdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_tdiv_q (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_tdiv_q_2exp __gmpz_tdiv_q_2exp
-__GMP_DECLSPEC void mpz_tdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_tdiv_q_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
  
  #define mpz_tdiv_q_ui __gmpz_tdiv_q_ui
-__GMP_DECLSPEC unsigned long int mpz_tdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_tdiv_q_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_tdiv_qr __gmpz_tdiv_qr
-__GMP_DECLSPEC void mpz_tdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_tdiv_qr (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_tdiv_qr_ui __gmpz_tdiv_qr_ui
-__GMP_DECLSPEC unsigned long int mpz_tdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_tdiv_qr_ui (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_tdiv_r __gmpz_tdiv_r
-__GMP_DECLSPEC void mpz_tdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_tdiv_r (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  #define mpz_tdiv_r_2exp __gmpz_tdiv_r_2exp
-__GMP_DECLSPEC void mpz_tdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_tdiv_r_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
  
  #define mpz_tdiv_r_ui __gmpz_tdiv_r_ui
-__GMP_DECLSPEC unsigned long int mpz_tdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_tdiv_r_ui (mpz_ptr, mpz_srcptr, unsigned long int);
  
  #define mpz_tstbit __gmpz_tstbit
-__GMP_DECLSPEC int mpz_tstbit __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_tstbit (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpz_ui_pow_ui __gmpz_ui_pow_ui
-__GMP_DECLSPEC void mpz_ui_pow_ui __GMP_PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+__GMP_DECLSPEC void mpz_ui_pow_ui (mpz_ptr, unsigned long int, unsigned long int);
  
  #define mpz_urandomb __gmpz_urandomb
-__GMP_DECLSPEC void mpz_urandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_urandomb (mpz_ptr, gmp_randstate_t, mp_bitcnt_t);
  
  #define mpz_urandomm __gmpz_urandomm
-__GMP_DECLSPEC void mpz_urandomm __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mpz_srcptr));
+__GMP_DECLSPEC void mpz_urandomm (mpz_ptr, gmp_randstate_t, mpz_srcptr);
  
  #define mpz_xor __gmpz_xor
  #define mpz_eor __gmpz_xor
-__GMP_DECLSPEC void mpz_xor __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_xor (mpz_ptr, mpz_srcptr, mpz_srcptr);
  
  
  /**************** Rational (i.e. Q) routines.  ****************/
  
  #define mpq_abs __gmpq_abs
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_abs)
-__GMP_DECLSPEC void mpq_abs __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_abs (mpq_ptr, mpq_srcptr);
  #endif
  
  #define mpq_add __gmpq_add
-__GMP_DECLSPEC void mpq_add __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_add (mpq_ptr, mpq_srcptr, mpq_srcptr);
  
  #define mpq_canonicalize __gmpq_canonicalize
-__GMP_DECLSPEC void mpq_canonicalize __GMP_PROTO ((mpq_ptr));
+__GMP_DECLSPEC void mpq_canonicalize (mpq_ptr);
  
  #define mpq_clear __gmpq_clear
-__GMP_DECLSPEC void mpq_clear __GMP_PROTO ((mpq_ptr));
+__GMP_DECLSPEC void mpq_clear (mpq_ptr);
  
  #define mpq_clears __gmpq_clears
-__GMP_DECLSPEC void mpq_clears __GMP_PROTO ((mpq_ptr, ...));
+__GMP_DECLSPEC void mpq_clears (mpq_ptr, ...);
  
  #define mpq_cmp __gmpq_cmp
-__GMP_DECLSPEC int mpq_cmp __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpq_cmp (mpq_srcptr, mpq_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define _mpq_cmp_si __gmpq_cmp_si
-__GMP_DECLSPEC int _mpq_cmp_si __GMP_PROTO ((mpq_srcptr, long, unsigned long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int _mpq_cmp_si (mpq_srcptr, long, unsigned long) __GMP_ATTRIBUTE_PURE;
  
  #define _mpq_cmp_ui __gmpq_cmp_ui
-__GMP_DECLSPEC int _mpq_cmp_ui __GMP_PROTO ((mpq_srcptr, unsigned long int, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int _mpq_cmp_ui (mpq_srcptr, unsigned long int, unsigned long int) __GMP_ATTRIBUTE_PURE;
  
  #define mpq_div __gmpq_div
-__GMP_DECLSPEC void mpq_div __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_div (mpq_ptr, mpq_srcptr, mpq_srcptr);
  
  #define mpq_div_2exp __gmpq_div_2exp
-__GMP_DECLSPEC void mpq_div_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpq_div_2exp (mpq_ptr, mpq_srcptr, mp_bitcnt_t);
  
  #define mpq_equal __gmpq_equal
-__GMP_DECLSPEC int mpq_equal __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpq_equal (mpq_srcptr, mpq_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpq_get_num __gmpq_get_num
-__GMP_DECLSPEC void mpq_get_num __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_get_num (mpz_ptr, mpq_srcptr);
  
  #define mpq_get_den __gmpq_get_den
-__GMP_DECLSPEC void mpq_get_den __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_get_den (mpz_ptr, mpq_srcptr);
  
  #define mpq_get_d __gmpq_get_d
-__GMP_DECLSPEC double mpq_get_d __GMP_PROTO ((mpq_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC double mpq_get_d (mpq_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define mpq_get_str __gmpq_get_str
-__GMP_DECLSPEC char *mpq_get_str __GMP_PROTO ((char *, int, mpq_srcptr));
+__GMP_DECLSPEC char *mpq_get_str (char *, int, mpq_srcptr);
  
  #define mpq_init __gmpq_init
-__GMP_DECLSPEC void mpq_init __GMP_PROTO ((mpq_ptr));
+__GMP_DECLSPEC void mpq_init (mpq_ptr);
  
  #define mpq_inits __gmpq_inits
-__GMP_DECLSPEC void mpq_inits __GMP_PROTO ((mpq_ptr, ...));
+__GMP_DECLSPEC void mpq_inits (mpq_ptr, ...);
  
  #define mpq_inp_str __gmpq_inp_str
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpq_inp_str __GMP_PROTO ((mpq_ptr, FILE *, int));
+__GMP_DECLSPEC size_t mpq_inp_str (mpq_ptr, FILE *, int);
  #endif
  
  #define mpq_inv __gmpq_inv
-__GMP_DECLSPEC void mpq_inv __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_inv (mpq_ptr, mpq_srcptr);
  
  #define mpq_mul __gmpq_mul
-__GMP_DECLSPEC void mpq_mul __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_mul (mpq_ptr, mpq_srcptr, mpq_srcptr);
  
  #define mpq_mul_2exp __gmpq_mul_2exp
-__GMP_DECLSPEC void mpq_mul_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpq_mul_2exp (mpq_ptr, mpq_srcptr, mp_bitcnt_t);
  
  #define mpq_neg __gmpq_neg
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_neg)
-__GMP_DECLSPEC void mpq_neg __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_neg (mpq_ptr, mpq_srcptr);
  #endif
  
  #define mpq_out_str __gmpq_out_str
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpq_out_str __GMP_PROTO ((FILE *, int, mpq_srcptr));
+__GMP_DECLSPEC size_t mpq_out_str (FILE *, int, mpq_srcptr);
  #endif
  
  #define mpq_set __gmpq_set
-__GMP_DECLSPEC void mpq_set __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_set (mpq_ptr, mpq_srcptr);
  
  #define mpq_set_d __gmpq_set_d
-__GMP_DECLSPEC void mpq_set_d __GMP_PROTO ((mpq_ptr, double));
+__GMP_DECLSPEC void mpq_set_d (mpq_ptr, double);
  
  #define mpq_set_den __gmpq_set_den
-__GMP_DECLSPEC void mpq_set_den __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpq_set_den (mpq_ptr, mpz_srcptr);
  
  #define mpq_set_f __gmpq_set_f
-__GMP_DECLSPEC void mpq_set_f __GMP_PROTO ((mpq_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpq_set_f (mpq_ptr, mpf_srcptr);
  
  #define mpq_set_num __gmpq_set_num
-__GMP_DECLSPEC void mpq_set_num __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpq_set_num (mpq_ptr, mpz_srcptr);
  
  #define mpq_set_si __gmpq_set_si
-__GMP_DECLSPEC void mpq_set_si __GMP_PROTO ((mpq_ptr, signed long int, unsigned long int));
+__GMP_DECLSPEC void mpq_set_si (mpq_ptr, signed long int, unsigned long int);
  
  #define mpq_set_str __gmpq_set_str
-__GMP_DECLSPEC int mpq_set_str __GMP_PROTO ((mpq_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpq_set_str (mpq_ptr, const char *, int);
  
  #define mpq_set_ui __gmpq_set_ui
-__GMP_DECLSPEC void mpq_set_ui __GMP_PROTO ((mpq_ptr, unsigned long int, unsigned long int));
+__GMP_DECLSPEC void mpq_set_ui (mpq_ptr, unsigned long int, unsigned long int);
  
  #define mpq_set_z __gmpq_set_z
-__GMP_DECLSPEC void mpq_set_z __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpq_set_z (mpq_ptr, mpz_srcptr);
  
  #define mpq_sub __gmpq_sub
-__GMP_DECLSPEC void mpq_sub __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_sub (mpq_ptr, mpq_srcptr, mpq_srcptr);
  
  #define mpq_swap __gmpq_swap
-__GMP_DECLSPEC void mpq_swap __GMP_PROTO ((mpq_ptr, mpq_ptr)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpq_swap (mpq_ptr, mpq_ptr) __GMP_NOTHROW;
  
  
  /**************** Float (i.e. F) routines.  ****************/
  
  #define mpf_abs __gmpf_abs
-__GMP_DECLSPEC void mpf_abs __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_abs (mpf_ptr, mpf_srcptr);
  
  #define mpf_add __gmpf_add
-__GMP_DECLSPEC void mpf_add __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_add (mpf_ptr, mpf_srcptr, mpf_srcptr);
  
  #define mpf_add_ui __gmpf_add_ui
-__GMP_DECLSPEC void mpf_add_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_add_ui (mpf_ptr, mpf_srcptr, unsigned long int);
  #define mpf_ceil __gmpf_ceil
-__GMP_DECLSPEC void mpf_ceil __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_ceil (mpf_ptr, mpf_srcptr);
  
  #define mpf_clear __gmpf_clear
-__GMP_DECLSPEC void mpf_clear __GMP_PROTO ((mpf_ptr));
+__GMP_DECLSPEC void mpf_clear (mpf_ptr);
  
  #define mpf_clears __gmpf_clears
-__GMP_DECLSPEC void mpf_clears __GMP_PROTO ((mpf_ptr, ...));
+__GMP_DECLSPEC void mpf_clears (mpf_ptr, ...);
  
  #define mpf_cmp __gmpf_cmp
-__GMP_DECLSPEC int mpf_cmp __GMP_PROTO ((mpf_srcptr, mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_cmp (mpf_srcptr, mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_cmp_d __gmpf_cmp_d
-__GMP_DECLSPEC int mpf_cmp_d __GMP_PROTO ((mpf_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_cmp_d (mpf_srcptr, double) __GMP_ATTRIBUTE_PURE;
  
  #define mpf_cmp_si __gmpf_cmp_si
-__GMP_DECLSPEC int mpf_cmp_si __GMP_PROTO ((mpf_srcptr, signed long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_cmp_si (mpf_srcptr, signed long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_cmp_ui __gmpf_cmp_ui
-__GMP_DECLSPEC int mpf_cmp_ui __GMP_PROTO ((mpf_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_cmp_ui (mpf_srcptr, unsigned long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_div __gmpf_div
-__GMP_DECLSPEC void mpf_div __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_div (mpf_ptr, mpf_srcptr, mpf_srcptr);
  
  #define mpf_div_2exp __gmpf_div_2exp
-__GMP_DECLSPEC void mpf_div_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_div_2exp (mpf_ptr, mpf_srcptr, mp_bitcnt_t);
  
  #define mpf_div_ui __gmpf_div_ui
-__GMP_DECLSPEC void mpf_div_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_div_ui (mpf_ptr, mpf_srcptr, unsigned long int);
  
  #define mpf_dump __gmpf_dump
-__GMP_DECLSPEC void mpf_dump __GMP_PROTO ((mpf_srcptr));
+__GMP_DECLSPEC void mpf_dump (mpf_srcptr);
  
  #define mpf_eq __gmpf_eq
-__GMP_DECLSPEC int mpf_eq __GMP_PROTO ((mpf_srcptr, mpf_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_eq (mpf_srcptr, mpf_srcptr, mp_bitcnt_t) __GMP_ATTRIBUTE_PURE;
  
  #define mpf_fits_sint_p __gmpf_fits_sint_p
-__GMP_DECLSPEC int mpf_fits_sint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_sint_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_fits_slong_p __gmpf_fits_slong_p
-__GMP_DECLSPEC int mpf_fits_slong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_slong_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_fits_sshort_p __gmpf_fits_sshort_p
-__GMP_DECLSPEC int mpf_fits_sshort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_sshort_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_fits_uint_p __gmpf_fits_uint_p
-__GMP_DECLSPEC int mpf_fits_uint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_uint_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_fits_ulong_p __gmpf_fits_ulong_p
-__GMP_DECLSPEC int mpf_fits_ulong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_ulong_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_fits_ushort_p __gmpf_fits_ushort_p
-__GMP_DECLSPEC int mpf_fits_ushort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_ushort_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_floor __gmpf_floor
-__GMP_DECLSPEC void mpf_floor __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_floor (mpf_ptr, mpf_srcptr);
  
  #define mpf_get_d __gmpf_get_d
-__GMP_DECLSPEC double mpf_get_d __GMP_PROTO ((mpf_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC double mpf_get_d (mpf_srcptr) __GMP_ATTRIBUTE_PURE;
  
  #define mpf_get_d_2exp __gmpf_get_d_2exp
-__GMP_DECLSPEC double mpf_get_d_2exp __GMP_PROTO ((signed long int *, mpf_srcptr));
+__GMP_DECLSPEC double mpf_get_d_2exp (signed long int *, mpf_srcptr);
  
  #define mpf_get_default_prec __gmpf_get_default_prec
-__GMP_DECLSPEC mp_bitcnt_t mpf_get_default_prec __GMP_PROTO ((void)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_default_prec (void) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_get_prec __gmpf_get_prec
-__GMP_DECLSPEC mp_bitcnt_t mpf_get_prec __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_prec (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_get_si __gmpf_get_si
-__GMP_DECLSPEC long mpf_get_si __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC long mpf_get_si (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_get_str __gmpf_get_str
-__GMP_DECLSPEC char *mpf_get_str __GMP_PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
+__GMP_DECLSPEC char *mpf_get_str (char *, mp_exp_t *, int, size_t, mpf_srcptr);
  
  #define mpf_get_ui __gmpf_get_ui
-__GMP_DECLSPEC unsigned long mpf_get_ui __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long mpf_get_ui (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_init __gmpf_init
-__GMP_DECLSPEC void mpf_init __GMP_PROTO ((mpf_ptr));
+__GMP_DECLSPEC void mpf_init (mpf_ptr);
  
  #define mpf_init2 __gmpf_init2
-__GMP_DECLSPEC void mpf_init2 __GMP_PROTO ((mpf_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_init2 (mpf_ptr, mp_bitcnt_t);
  
  #define mpf_inits __gmpf_inits
-__GMP_DECLSPEC void mpf_inits __GMP_PROTO ((mpf_ptr, ...));
+__GMP_DECLSPEC void mpf_inits (mpf_ptr, ...);
  
  #define mpf_init_set __gmpf_init_set
-__GMP_DECLSPEC void mpf_init_set __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_init_set (mpf_ptr, mpf_srcptr);
  
  #define mpf_init_set_d __gmpf_init_set_d
-__GMP_DECLSPEC void mpf_init_set_d __GMP_PROTO ((mpf_ptr, double));
+__GMP_DECLSPEC void mpf_init_set_d (mpf_ptr, double);
  
  #define mpf_init_set_si __gmpf_init_set_si
-__GMP_DECLSPEC void mpf_init_set_si __GMP_PROTO ((mpf_ptr, signed long int));
+__GMP_DECLSPEC void mpf_init_set_si (mpf_ptr, signed long int);
  
  #define mpf_init_set_str __gmpf_init_set_str
-__GMP_DECLSPEC int mpf_init_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpf_init_set_str (mpf_ptr, const char *, int);
  
  #define mpf_init_set_ui __gmpf_init_set_ui
-__GMP_DECLSPEC void mpf_init_set_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+__GMP_DECLSPEC void mpf_init_set_ui (mpf_ptr, unsigned long int);
  
  #define mpf_inp_str __gmpf_inp_str
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpf_inp_str __GMP_PROTO ((mpf_ptr, FILE *, int));
+__GMP_DECLSPEC size_t mpf_inp_str (mpf_ptr, FILE *, int);
  #endif
  
  #define mpf_integer_p __gmpf_integer_p
-__GMP_DECLSPEC int mpf_integer_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_integer_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_mul __gmpf_mul
-__GMP_DECLSPEC void mpf_mul __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_mul (mpf_ptr, mpf_srcptr, mpf_srcptr);
  
  #define mpf_mul_2exp __gmpf_mul_2exp
-__GMP_DECLSPEC void mpf_mul_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_mul_2exp (mpf_ptr, mpf_srcptr, mp_bitcnt_t);
  
  #define mpf_mul_ui __gmpf_mul_ui
-__GMP_DECLSPEC void mpf_mul_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_mul_ui (mpf_ptr, mpf_srcptr, unsigned long int);
  
  #define mpf_neg __gmpf_neg
-__GMP_DECLSPEC void mpf_neg __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_neg (mpf_ptr, mpf_srcptr);
  
  #define mpf_out_str __gmpf_out_str
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpf_out_str __GMP_PROTO ((FILE *, int, size_t, mpf_srcptr));
+__GMP_DECLSPEC size_t mpf_out_str (FILE *, int, size_t, mpf_srcptr);
  #endif
  
  #define mpf_pow_ui __gmpf_pow_ui
-__GMP_DECLSPEC void mpf_pow_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_pow_ui (mpf_ptr, mpf_srcptr, unsigned long int);
  
  #define mpf_random2 __gmpf_random2
-__GMP_DECLSPEC void mpf_random2 __GMP_PROTO ((mpf_ptr, mp_size_t, mp_exp_t));
+__GMP_DECLSPEC void mpf_random2 (mpf_ptr, mp_size_t, mp_exp_t);
  
  #define mpf_reldiff __gmpf_reldiff
-__GMP_DECLSPEC void mpf_reldiff __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_reldiff (mpf_ptr, mpf_srcptr, mpf_srcptr);
  
  #define mpf_set __gmpf_set
-__GMP_DECLSPEC void mpf_set __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_set (mpf_ptr, mpf_srcptr);
  
  #define mpf_set_d __gmpf_set_d
-__GMP_DECLSPEC void mpf_set_d __GMP_PROTO ((mpf_ptr, double));
+__GMP_DECLSPEC void mpf_set_d (mpf_ptr, double);
  
  #define mpf_set_default_prec __gmpf_set_default_prec
-__GMP_DECLSPEC void mpf_set_default_prec __GMP_PROTO ((mp_bitcnt_t)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpf_set_default_prec (mp_bitcnt_t) __GMP_NOTHROW;
  
  #define mpf_set_prec __gmpf_set_prec
-__GMP_DECLSPEC void mpf_set_prec __GMP_PROTO ((mpf_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_set_prec (mpf_ptr, mp_bitcnt_t);
  
  #define mpf_set_prec_raw __gmpf_set_prec_raw
-__GMP_DECLSPEC void mpf_set_prec_raw __GMP_PROTO ((mpf_ptr, mp_bitcnt_t)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpf_set_prec_raw (mpf_ptr, mp_bitcnt_t) __GMP_NOTHROW;
  
  #define mpf_set_q __gmpf_set_q
-__GMP_DECLSPEC void mpf_set_q __GMP_PROTO ((mpf_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpf_set_q (mpf_ptr, mpq_srcptr);
  
  #define mpf_set_si __gmpf_set_si
-__GMP_DECLSPEC void mpf_set_si __GMP_PROTO ((mpf_ptr, signed long int));
+__GMP_DECLSPEC void mpf_set_si (mpf_ptr, signed long int);
  
  #define mpf_set_str __gmpf_set_str
-__GMP_DECLSPEC int mpf_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpf_set_str (mpf_ptr, const char *, int);
  
  #define mpf_set_ui __gmpf_set_ui
-__GMP_DECLSPEC void mpf_set_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+__GMP_DECLSPEC void mpf_set_ui (mpf_ptr, unsigned long int);
  
  #define mpf_set_z __gmpf_set_z
-__GMP_DECLSPEC void mpf_set_z __GMP_PROTO ((mpf_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpf_set_z (mpf_ptr, mpz_srcptr);
  
  #define mpf_size __gmpf_size
-__GMP_DECLSPEC size_t mpf_size __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC size_t mpf_size (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpf_sqrt __gmpf_sqrt
-__GMP_DECLSPEC void mpf_sqrt __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_sqrt (mpf_ptr, mpf_srcptr);
  
  #define mpf_sqrt_ui __gmpf_sqrt_ui
-__GMP_DECLSPEC void mpf_sqrt_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+__GMP_DECLSPEC void mpf_sqrt_ui (mpf_ptr, unsigned long int);
  
  #define mpf_sub __gmpf_sub
-__GMP_DECLSPEC void mpf_sub __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_sub (mpf_ptr, mpf_srcptr, mpf_srcptr);
  
  #define mpf_sub_ui __gmpf_sub_ui
-__GMP_DECLSPEC void mpf_sub_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_sub_ui (mpf_ptr, mpf_srcptr, unsigned long int);
  
  #define mpf_swap __gmpf_swap
-__GMP_DECLSPEC void mpf_swap __GMP_PROTO ((mpf_ptr, mpf_ptr)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpf_swap (mpf_ptr, mpf_ptr) __GMP_NOTHROW;
  
  #define mpf_trunc __gmpf_trunc
-__GMP_DECLSPEC void mpf_trunc __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_trunc (mpf_ptr, mpf_srcptr);
  
  #define mpf_ui_div __gmpf_ui_div
-__GMP_DECLSPEC void mpf_ui_div __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+__GMP_DECLSPEC void mpf_ui_div (mpf_ptr, unsigned long int, mpf_srcptr);
  
  #define mpf_ui_sub __gmpf_ui_sub
-__GMP_DECLSPEC void mpf_ui_sub __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+__GMP_DECLSPEC void mpf_ui_sub (mpf_ptr, unsigned long int, mpf_srcptr);
  
  #define mpf_urandomb __gmpf_urandomb
-__GMP_DECLSPEC void mpf_urandomb __GMP_PROTO ((mpf_t, gmp_randstate_t, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_urandomb (mpf_t, gmp_randstate_t, mp_bitcnt_t);
  
  
  /************ Low level positive-integer (i.e. N) routines.  ************/
@@ -1497,168 +1444,171 @@ __GMP_DECLSPEC void mpf_urandomb __GMP_PROTO ((mpf_t, gmp_randstate_t, mp_bitcnt
  
  #define mpn_add __MPN(add)
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add)
-__GMP_DECLSPEC mp_limb_t mpn_add __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
  #endif
  
  #define mpn_add_1 __MPN(add_1)
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add_1)
-__GMP_DECLSPEC mp_limb_t mpn_add_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW;
+__GMP_DECLSPEC mp_limb_t mpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) __GMP_NOTHROW;
  #endif
  
  #define mpn_add_n __MPN(add_n)
-__GMP_DECLSPEC mp_limb_t mpn_add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  
  #define mpn_addmul_1 __MPN(addmul_1)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define mpn_cmp __MPN(cmp)
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_cmp)
-__GMP_DECLSPEC int mpn_cmp __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpn_cmp (mp_srcptr, mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  #endif
  
  #define mpn_divexact_by3(dst,src,size) \
    mpn_divexact_by3c (dst, src, size, __GMP_CAST (mp_limb_t, 0))
  
  #define mpn_divexact_by3c __MPN(divexact_by3c)
-__GMP_DECLSPEC mp_limb_t mpn_divexact_by3c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_divexact_by3c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define mpn_divmod_1(qp,np,nsize,dlimb) \
    mpn_divrem_1 (qp, __GMP_CAST (mp_size_t, 0), np, nsize, dlimb)
  
  #define mpn_divrem __MPN(divrem)
-__GMP_DECLSPEC mp_limb_t mpn_divrem __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_divrem (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
  
  #define mpn_divrem_1 __MPN(divrem_1)
-__GMP_DECLSPEC mp_limb_t mpn_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define mpn_divrem_2 __MPN(divrem_2)
-__GMP_DECLSPEC mp_limb_t mpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_divrem_2 (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+
+#define mpn_div_qr_2 __MPN(div_qr_2)
+__GMP_DECLSPEC mp_limb_t mpn_div_qr_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  #define mpn_gcd __MPN(gcd)
-__GMP_DECLSPEC mp_size_t mpn_gcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_gcd (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
  
  #define mpn_gcd_1 __MPN(gcd_1)
-__GMP_DECLSPEC mp_limb_t mpn_gcd_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_gcd_1 (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
  
  #define mpn_gcdext_1 __MPN(gcdext_1)
-__GMP_DECLSPEC mp_limb_t mpn_gcdext_1 __GMP_PROTO ((mp_limb_signed_t *, mp_limb_signed_t *, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_gcdext_1 (mp_limb_signed_t *, mp_limb_signed_t *, mp_limb_t, mp_limb_t);
  
  #define mpn_gcdext __MPN(gcdext)
-__GMP_DECLSPEC mp_size_t mpn_gcdext __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_gcdext (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
  
  #define mpn_get_str __MPN(get_str)
-__GMP_DECLSPEC size_t mpn_get_str __GMP_PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
+__GMP_DECLSPEC size_t mpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
  
  #define mpn_hamdist __MPN(hamdist)
-__GMP_DECLSPEC mp_bitcnt_t mpn_hamdist __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpn_hamdist (mp_srcptr, mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpn_lshift __MPN(lshift)
-__GMP_DECLSPEC mp_limb_t mpn_lshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
  
  #define mpn_mod_1 __MPN(mod_1)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1 (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
  
  #define mpn_mul __MPN(mul)
-__GMP_DECLSPEC mp_limb_t mpn_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
  
  #define mpn_mul_1 __MPN(mul_1)
-__GMP_DECLSPEC mp_limb_t mpn_mul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define mpn_mul_n __MPN(mul_n)
-__GMP_DECLSPEC void mpn_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  
  #define mpn_sqr __MPN(sqr)
-__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
  
  #define mpn_neg __MPN(neg)
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_neg)
-__GMP_DECLSPEC mp_limb_t mpn_neg __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_neg (mp_ptr, mp_srcptr, mp_size_t);
  #endif
  
  #define mpn_com __MPN(com)
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_com)
-__GMP_DECLSPEC void mpn_com __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_com (mp_ptr, mp_srcptr, mp_size_t);
  #endif
  
  #define mpn_perfect_square_p __MPN(perfect_square_p)
-__GMP_DECLSPEC int mpn_perfect_square_p __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpn_perfect_square_p (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
  
  #define mpn_perfect_power_p __MPN(perfect_power_p)
-__GMP_DECLSPEC int mpn_perfect_power_p __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpn_perfect_power_p (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
  
  #define mpn_popcount __MPN(popcount)
-__GMP_DECLSPEC mp_bitcnt_t mpn_popcount __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpn_popcount (mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
  
  #define mpn_pow_1 __MPN(pow_1)
-__GMP_DECLSPEC mp_size_t mpn_pow_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_pow_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
  
  /* undocumented now, but retained here for upward compatibility */
  #define mpn_preinv_mod_1 __MPN(preinv_mod_1)
-__GMP_DECLSPEC mp_limb_t mpn_preinv_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mod_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
  
  #define mpn_random __MPN(random)
-__GMP_DECLSPEC void mpn_random __GMP_PROTO ((mp_ptr, mp_size_t));
+__GMP_DECLSPEC void mpn_random (mp_ptr, mp_size_t);
  
  #define mpn_random2 __MPN(random2)
-__GMP_DECLSPEC void mpn_random2 __GMP_PROTO ((mp_ptr, mp_size_t));
+__GMP_DECLSPEC void mpn_random2 (mp_ptr, mp_size_t);
  
  #define mpn_rshift __MPN(rshift)
-__GMP_DECLSPEC mp_limb_t mpn_rshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
  
  #define mpn_scan0 __MPN(scan0)
-__GMP_DECLSPEC mp_bitcnt_t mpn_scan0 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan0 (mp_srcptr, mp_bitcnt_t) __GMP_ATTRIBUTE_PURE;
  
  #define mpn_scan1 __MPN(scan1)
-__GMP_DECLSPEC mp_bitcnt_t mpn_scan1 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan1 (mp_srcptr, mp_bitcnt_t) __GMP_ATTRIBUTE_PURE;
  
  #define mpn_set_str __MPN(set_str)
-__GMP_DECLSPEC mp_size_t mpn_set_str __GMP_PROTO ((mp_ptr, __gmp_const unsigned char *, size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_set_str (mp_ptr, const unsigned char *, size_t, int);
  
  #define mpn_sqrtrem __MPN(sqrtrem)
-__GMP_DECLSPEC mp_size_t mpn_sqrtrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t);
  
  #define mpn_sub __MPN(sub)
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub)
-__GMP_DECLSPEC mp_limb_t mpn_sub __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
  #endif
  
  #define mpn_sub_1 __MPN(sub_1)
  #if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub_1)
-__GMP_DECLSPEC mp_limb_t mpn_sub_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW;
+__GMP_DECLSPEC mp_limb_t mpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) __GMP_NOTHROW;
  #endif
  
  #define mpn_sub_n __MPN(sub_n)
-__GMP_DECLSPEC mp_limb_t mpn_sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  
  #define mpn_submul_1 __MPN(submul_1)
-__GMP_DECLSPEC mp_limb_t mpn_submul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define mpn_tdiv_qr __MPN(tdiv_qr)
-__GMP_DECLSPEC void mpn_tdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_tdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
  
  #define mpn_and_n __MPN(and_n)
-__GMP_DECLSPEC void mpn_and_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_and_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  #define mpn_andn_n __MPN(andn_n)
-__GMP_DECLSPEC void mpn_andn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_andn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  #define mpn_nand_n __MPN(nand_n)
-__GMP_DECLSPEC void mpn_nand_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_nand_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  #define mpn_ior_n __MPN(ior_n)
-__GMP_DECLSPEC void mpn_ior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_ior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  #define mpn_iorn_n __MPN(iorn_n)
-__GMP_DECLSPEC void mpn_iorn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_iorn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  #define mpn_nior_n __MPN(nior_n)
-__GMP_DECLSPEC void mpn_nior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_nior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  #define mpn_xor_n __MPN(xor_n)
-__GMP_DECLSPEC void mpn_xor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_xor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  #define mpn_xnor_n __MPN(xnor_n)
-__GMP_DECLSPEC void mpn_xnor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_xnor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  
  #define mpn_copyi __MPN(copyi)
-__GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
  #define mpn_copyd __MPN(copyd)
-__GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
  #define mpn_zero __MPN(zero)
-__GMP_DECLSPEC void mpn_zero __GMP_PROTO ((mp_ptr, mp_size_t));
+__GMP_DECLSPEC void mpn_zero (mp_ptr, mp_size_t);
  
  /**************** mpz inlines ****************/
  
@@ -2270,9 +2220,9 @@ enum
  #define __GMP_CFLAGS "@CFLAGS@"
  
  /* Major version number is the value of __GNU_MP__ too, above and in mp.h. */
-#define __GNU_MP_VERSION 5
-#define __GNU_MP_VERSION_MINOR 0
-#define __GNU_MP_VERSION_PATCHLEVEL 5
+#define __GNU_MP_VERSION            5
+#define __GNU_MP_VERSION_MINOR      1
+#define __GNU_MP_VERSION_PATCHLEVEL 3
  #define __GNU_MP_RELEASE (__GNU_MP_VERSION * 10000 + __GNU_MP_VERSION_MINOR * 100 + __GNU_MP_VERSION_PATCHLEVEL)
  
  #define __GMP_H__
diff --git a/gmp-impl.h b/gmp-impl.h

index b424f9dd0a26800b736be00094a013fe16b52630..24cfc0631e95b66c646bdd3ebd9ce772e46dc711 100644 (file)
--- a/gmp-impl.h
+++ b/gmp-impl.h
@@ -4,8 +4,8 @@
     BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
  
  Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
-Inc.
+2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -54,53 +54,86 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /* For fat.h and other fat binary stuff.
     No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
-   declared this way are only used to set function pointers in __gmp_cpuvec,
+   declared this way are only used to set function pointers in __gmpn_cpuvec,
     they're not called directly.  */
  #define DECL_add_n(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
+#define DECL_addlsh1_n(name) \
+  DECL_add_n (name)
+#define DECL_addlsh2_n(name) \
+  DECL_add_n (name)
  #define DECL_addmul_1(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_addmul_2(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
+#define DECL_bdiv_dbm1c(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
+#define DECL_com(name) \
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
  #define DECL_copyd(name) \
-  __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
  #define DECL_copyi(name) \
    DECL_copyd (name)
  #define DECL_divexact_1(name) \
-  __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
  #define DECL_divexact_by3c(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
  #define DECL_divrem_1(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)
  #define DECL_gcd_1(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
  #define DECL_lshift(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned)
+#define DECL_lshiftc(name) \
+  DECL_lshift (name)
  #define DECL_mod_1(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_mod_1_1p(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [])
+#define DECL_mod_1_1p_cps(name) \
+  __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b)
+#define DECL_mod_1s_2p(name) \
+  DECL_mod_1_1p (name)
+#define DECL_mod_1s_2p_cps(name) \
+  DECL_mod_1_1p_cps (name)
+#define DECL_mod_1s_4p(name) \
+  DECL_mod_1_1p (name)
+#define DECL_mod_1s_4p_cps(name) \
+  DECL_mod_1_1p_cps (name)
  #define DECL_mod_34lsub1(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t)
  #define DECL_modexact_1c_odd(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
  #define DECL_mul_1(name) \
    DECL_addmul_1 (name)
  #define DECL_mul_basecase(name) \
-  __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)
+#define DECL_mullo_basecase(name) \
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
  #define DECL_preinv_divrem_1(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)
  #define DECL_preinv_mod_1(name) \
-  __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
+  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
+#define DECL_redc_1(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_redc_2(name) \
+  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
  #define DECL_rshift(name) \
    DECL_lshift (name)
  #define DECL_sqr_basecase(name) \
-  __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
+  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
  #define DECL_sub_n(name) \
    DECL_add_n (name)
+#define DECL_sublsh1_n(name) \
+  DECL_add_n (name)
  #define DECL_submul_1(name) \
    DECL_addmul_1 (name)
  
-#if ! __GMP_WITHIN_CONFIGURE
+#if ! defined (__GMP_WITHIN_CONFIGURE)
  #include "config.h"
  #include "gmp-mparam.h"
  #include "fib_table.h"
+#include "fac_table.h"
  #include "mp_bases.h"
  #if WANT_FAT_BINARY
  #include "fat.h"
@@ -213,13 +246,6 @@ typedef struct {mp_limb_t inv32;} gmp_pi1_t;
  typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
  
  
-/* const and signed must match __gmp_const and __gmp_signed, so follow the
-   decision made for those in gmp.h.    */
-#if ! __GMP_HAVE_CONST
-#define const   /* empty */
-#define signed  /* empty */
-#endif
-
  /* "const" basically means a function does nothing but examine its arguments
     and give a return value, it doesn't read or write any memory (neither
     global nor pointed to by arguments), and has no other side-effects.  This
@@ -252,13 +278,13 @@ typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
  #endif
  
  #if ! HAVE_MEMSET
-#define memset(p, c, n)                 \
-  do {                                  \
-    ASSERT ((n) >= 0);                  \
-    char *__memset__p = (p);            \
-    int  __i;                           \
-    for (__i = 0; __i < (n); __i++)     \
-      __memset__p[__i] = (c);           \
+#define memset(p, c, n)                        \
+  do {                                 \
+    ASSERT ((n) >= 0);                 \
+    char *__memset__p = (p);           \
+    int         __i;                           \
+    for (__i = 0; __i < (n); __i++)    \
+      __memset__p[__i] = (c);          \
    } while (0)
  #endif
  
@@ -288,9 +314,9 @@ extern "C" {
  
  
  /* Usage: TMP_DECL;
-          TMP_MARK;
-          ptr = TMP_ALLOC (bytes);
-          TMP_FREE;
+         TMP_MARK;
+         ptr = TMP_ALLOC (bytes);
+         TMP_FREE;
  
     Small allocations should use TMP_SALLOC, big allocations should use
     TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.
@@ -327,8 +353,8 @@ struct tmp_reentrant_t {
    struct tmp_reentrant_t  *next;
    size_t                 size;   /* bytes, including header */
  };
-__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
-__GMP_DECLSPEC void  __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *));
+__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void  __gmp_tmp_reentrant_free (struct tmp_reentrant_t *);
  #endif
  
  #if WANT_TMP_ALLOCA
@@ -341,9 +367,10 @@ __GMP_DECLSPEC void  __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant
  #define TMP_ALLOC(n)                                                   \
    (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
  #define TMP_SFREE
-#define TMP_FREE                                                          \
-  do {                                                                    \
-    if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
+#define TMP_FREE                                                       \
+  do {                                                                 \
+    if (UNLIKELY (__tmp_marker != 0))                                  \
+      __gmp_tmp_reentrant_free (__tmp_marker);                         \
    } while (0)
  #endif
  
@@ -365,9 +392,9 @@ struct tmp_marker
    struct tmp_stack *which_chunk;
    void *alloc_point;
  };
-__GMP_DECLSPEC void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
-__GMP_DECLSPEC void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *));
-__GMP_DECLSPEC void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *));
+__GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *);
+__GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *);
  #define TMP_SDECL              TMP_DECL
  #define TMP_DECL               struct tmp_marker __tmp_marker
  #define TMP_SMARK              TMP_MARK
@@ -392,15 +419,15 @@ struct tmp_debug_entry_t {
    char                      *block;
    size_t                    size;
  };
-__GMP_DECLSPEC void  __gmp_tmp_debug_mark  __GMP_PROTO ((const char *, int, struct tmp_debug_t **,
-                                                        struct tmp_debug_t *,
-                                                        const char *, const char *));
-__GMP_DECLSPEC void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int,
-                                                        struct tmp_debug_t **, const char *,
-                                                        size_t)) ATTRIBUTE_MALLOC;
-__GMP_DECLSPEC void  __gmp_tmp_debug_free  __GMP_PROTO ((const char *, int, int,
-                                                        struct tmp_debug_t **,
-                                                        const char *, const char *));
+__GMP_DECLSPEC void  __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **,
+                                          struct tmp_debug_t *,
+                                          const char *, const char *);
+__GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int,
+                                           struct tmp_debug_t **, const char *,
+                                           size_t) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void  __gmp_tmp_debug_free (const char *, int, int,
+                                          struct tmp_debug_t **,
+                                          const char *, const char *);
  #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
  #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
  #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
@@ -411,32 +438,32 @@ __GMP_DECLSPEC void  __gmp_tmp_debug_free  __GMP_PROTO ((const char *, int, int,
     warning from the compiler if TMP_FREE is used without a TMP_MARK.
     __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
     these things up too.  */
-#define TMP_DECL_NAME(marker, marker_name)                      \
-  int marker;                                                   \
-  int __tmp_marker_inscope;                                     \
-  const char *__tmp_marker_name = marker_name;                  \
-  struct tmp_debug_t  __tmp_marker_struct;                      \
-  /* don't demand NULL, just cast a zero */                     \
+#define TMP_DECL_NAME(marker, marker_name)                             \
+  int marker;                                                          \
+  int __tmp_marker_inscope;                                            \
+  const char *__tmp_marker_name = marker_name;                         \
+  struct tmp_debug_t  __tmp_marker_struct;                             \
+  /* don't demand NULL, just cast a zero */                            \
    struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
-#define TMP_MARK_NAME(marker, marker_name)                      \
-  do {                                                          \
-    marker = 1;                                                 \
-    __tmp_marker_inscope = 1;                                   \
-    __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,            \
-                           &__tmp_marker, &__tmp_marker_struct, \
-                           __tmp_marker_name, marker_name);     \
+#define TMP_MARK_NAME(marker, marker_name)                             \
+  do {                                                                 \
+    marker = 1;                                                                \
+    __tmp_marker_inscope = 1;                                          \
+    __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,                   \
+                          &__tmp_marker, &__tmp_marker_struct,         \
+                          __tmp_marker_name, marker_name);             \
    } while (0)
  #define TMP_SALLOC(n)          TMP_ALLOC(n)
  #define TMP_BALLOC(n)          TMP_ALLOC(n)
-#define TMP_ALLOC(size)                                                 \
-  __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,                      \
-                         __tmp_marker_inscope,                          \
-                         &__tmp_marker, __tmp_marker_name, size)
-#define TMP_FREE_NAME(marker, marker_name)                      \
-  do {                                                          \
-    __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,            \
-                           marker, &__tmp_marker,               \
-                           __tmp_marker_name, marker_name);     \
+#define TMP_ALLOC(size)                                                        \
+  __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,                     \
+                        __tmp_marker_inscope,                          \
+                        &__tmp_marker, __tmp_marker_name, size)
+#define TMP_FREE_NAME(marker, marker_name)                             \
+  do {                                                                 \
+    __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,                   \
+                          marker, &__tmp_marker,                       \
+                          __tmp_marker_name, marker_name);             \
    } while (0)
  #endif /* WANT_TMP_DEBUG */
  
@@ -457,18 +484,18 @@ __GMP_DECLSPEC void  __gmp_tmp_debug_free  __GMP_PROTO ((const char *, int, int,
     involves copying a chunk of stack (various RISCs), or a call to a stack
     bounds check (mingw).  In any case, when debugging keep separate blocks
     so a redzoning malloc debugger can protect each individually.  */
-#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)           \
-  do {                                                  \
-    if (WANT_TMP_DEBUG)                                 \
-      {                                                 \
-        (xp) = TMP_ALLOC_LIMBS (xsize);                 \
-        (yp) = TMP_ALLOC_LIMBS (ysize);                 \
-      }                                                 \
-    else                                                \
-      {                                                 \
-        (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));     \
-        (yp) = (xp) + (xsize);                          \
-      }                                                 \
+#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)                          \
+  do {                                                                 \
+    if (WANT_TMP_DEBUG)                                                        \
+      {                                                                        \
+       (xp) = TMP_ALLOC_LIMBS (xsize);                                 \
+       (yp) = TMP_ALLOC_LIMBS (ysize);                                 \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));                     \
+       (yp) = (xp) + (xsize);                                          \
+      }                                                                        \
    } while (0)
  
  
@@ -490,16 +517,24 @@ __GMP_DECLSPEC void  __gmp_tmp_debug_free  __GMP_PROTO ((const char *, int, int,
  #define SIZ(x) ((x)->_mp_size)
  #define ABSIZ(x) ABS (SIZ (x))
  #define PTR(x) ((x)->_mp_d)
-#define LIMBS(x) ((x)->_mp_d)
  #define EXP(x) ((x)->_mp_exp)
  #define PREC(x) ((x)->_mp_prec)
  #define ALLOC(x) ((x)->_mp_alloc)
+#define NUM(x) mpq_numref(x)
+#define DEN(x) mpq_denref(x)
  
  /* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
     then that lowest one bit must have been the only bit set.  n==0 will
     return true though, so avoid that.  */
  #define POW2_P(n)  (((n) & ((n) - 1)) == 0)
  
+/* This is intended for constant THRESHOLDs only, where the compiler
+   can completely fold the result.  */
+#define LOG2C(n) \
+ (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
+  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
+  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
+  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
  
  /* The "short" defines are a bit different because shorts are promoted to
     ints by ~ or >> etc.
@@ -603,91 +638,91 @@ __GMP_DECLSPEC void  __gmp_tmp_debug_free  __GMP_PROTO ((const char *, int, int,
  
  /* Swap macros. */
  
-#define MP_LIMB_T_SWAP(x, y)                    \
-  do {                                          \
-    mp_limb_t __mp_limb_t_swap__tmp = (x);      \
-    (x) = (y);                                  \
-    (y) = __mp_limb_t_swap__tmp;                \
+#define MP_LIMB_T_SWAP(x, y)                                           \
+  do {                                                                 \
+    mp_limb_t __mp_limb_t_swap__tmp = (x);                             \
+    (x) = (y);                                                         \
+    (y) = __mp_limb_t_swap__tmp;                                       \
    } while (0)
-#define MP_SIZE_T_SWAP(x, y)                    \
-  do {                                          \
-    mp_size_t __mp_size_t_swap__tmp = (x);      \
-    (x) = (y);                                  \
-    (y) = __mp_size_t_swap__tmp;                \
+#define MP_SIZE_T_SWAP(x, y)                                           \
+  do {                                                                 \
+    mp_size_t __mp_size_t_swap__tmp = (x);                             \
+    (x) = (y);                                                         \
+    (y) = __mp_size_t_swap__tmp;                                       \
    } while (0)
  
-#define MP_PTR_SWAP(x, y)               \
-  do {                                  \
-    mp_ptr __mp_ptr_swap__tmp = (x);    \
-    (x) = (y);                          \
-    (y) = __mp_ptr_swap__tmp;           \
+#define MP_PTR_SWAP(x, y)                                              \
+  do {                                                                 \
+    mp_ptr __mp_ptr_swap__tmp = (x);                                   \
+    (x) = (y);                                                         \
+    (y) = __mp_ptr_swap__tmp;                                          \
    } while (0)
-#define MP_SRCPTR_SWAP(x, y)                    \
-  do {                                          \
-    mp_srcptr __mp_srcptr_swap__tmp = (x);      \
-    (x) = (y);                                  \
-    (y) = __mp_srcptr_swap__tmp;                \
+#define MP_SRCPTR_SWAP(x, y)                                           \
+  do {                                                                 \
+    mp_srcptr __mp_srcptr_swap__tmp = (x);                             \
+    (x) = (y);                                                         \
+    (y) = __mp_srcptr_swap__tmp;                                       \
    } while (0)
  
-#define MPN_PTR_SWAP(xp,xs, yp,ys)      \
-  do {                                  \
-    MP_PTR_SWAP (xp, yp);               \
-    MP_SIZE_T_SWAP (xs, ys);            \
+#define MPN_PTR_SWAP(xp,xs, yp,ys)                                     \
+  do {                                                                 \
+    MP_PTR_SWAP (xp, yp);                                              \
+    MP_SIZE_T_SWAP (xs, ys);                                           \
    } while(0)
-#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)   \
-  do {                                  \
-    MP_SRCPTR_SWAP (xp, yp);            \
-    MP_SIZE_T_SWAP (xs, ys);            \
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)                                  \
+  do {                                                                 \
+    MP_SRCPTR_SWAP (xp, yp);                                           \
+    MP_SIZE_T_SWAP (xs, ys);                                           \
    } while(0)
  
-#define MPZ_PTR_SWAP(x, y)              \
-  do {                                  \
-    mpz_ptr __mpz_ptr_swap__tmp = (x);  \
-    (x) = (y);                          \
-    (y) = __mpz_ptr_swap__tmp;          \
+#define MPZ_PTR_SWAP(x, y)                                             \
+  do {                                                                 \
+    mpz_ptr __mpz_ptr_swap__tmp = (x);                                 \
+    (x) = (y);                                                         \
+    (y) = __mpz_ptr_swap__tmp;                                         \
    } while (0)
-#define MPZ_SRCPTR_SWAP(x, y)                   \
-  do {                                          \
-    mpz_srcptr __mpz_srcptr_swap__tmp = (x);    \
-    (x) = (y);                                  \
-    (y) = __mpz_srcptr_swap__tmp;               \
+#define MPZ_SRCPTR_SWAP(x, y)                                          \
+  do {                                                                 \
+    mpz_srcptr __mpz_srcptr_swap__tmp = (x);                           \
+    (x) = (y);                                                         \
+    (y) = __mpz_srcptr_swap__tmp;                                      \
    } while (0)
  
  
  /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
     but current gcc (3.0) doesn't seem to support that.  */
-__GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
-__GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
-__GMP_DECLSPEC extern void   (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
+__GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
+__GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
+__GMP_DECLSPEC extern void   (*__gmp_free_func) (void *, size_t);
  
-__GMP_DECLSPEC void *__gmp_default_allocate __GMP_PROTO ((size_t));
-__GMP_DECLSPEC void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t));
-__GMP_DECLSPEC void __gmp_default_free __GMP_PROTO ((void *, size_t));
+__GMP_DECLSPEC void *__gmp_default_allocate (size_t);
+__GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t);
+__GMP_DECLSPEC void __gmp_default_free (void *, size_t);
  
  #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
    ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
  #define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
  
-#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
-  ((type *) (*__gmp_reallocate_func)                            \
+#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type)                \
+  ((type *) (*__gmp_reallocate_func)                                   \
     (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
-#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
+#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size)             \
    __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
  
  #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
  #define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
  
-#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)      \
-  do {                                                          \
-    if ((oldsize) != (newsize))                                 \
-      (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
+#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)             \
+  do {                                                                 \
+    if ((oldsize) != (newsize))                                                \
+      (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize);                \
    } while (0)
  
-#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)   \
-  do {                                                                  \
-    if ((oldsize) != (newsize))                                         \
-      (ptr) = (type *) (*__gmp_reallocate_func)                         \
-        (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));    \
+#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)  \
+  do {                                                                 \
+    if ((oldsize) != (newsize))                                                \
+      (ptr) = (type *) (*__gmp_reallocate_func)                                \
+       (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));    \
    } while (0)
  
  
@@ -763,193 +798,374 @@ __GMP_DECLSPEC void __gmp_default_free __GMP_PROTO ((void *, size_t));
  #endif
  
  
-__GMP_DECLSPEC void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1);
+__GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1);
  #define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
  
  #define mpz_n_pow_ui __gmpz_n_pow_ui
-__GMP_DECLSPEC void    mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
+__GMP_DECLSPEC void    mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long);
  
  
  #define mpn_addmul_1c __MPN(addmul_1c)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
  
+#ifndef mpn_addmul_2  /* if not done with cpuvec in a fat binary */
  #define mpn_addmul_2 __MPN(addmul_2)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+#endif
  
  #define mpn_addmul_3 __MPN(addmul_3)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  #define mpn_addmul_4 __MPN(addmul_4)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  #define mpn_addmul_5 __MPN(addmul_5)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  #define mpn_addmul_6 __MPN(addmul_6)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  #define mpn_addmul_7 __MPN(addmul_7)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  #define mpn_addmul_8 __MPN(addmul_8)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+/* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase.  */
+#define mpn_addmul_2s __MPN(addmul_2s)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
-   returns the carry out (0, 1 or 2).  */
+   returns the carry out (0, 1 or 2). Use _ip1 when a=c. */
+#ifndef mpn_addlsh1_n  /* if not done with cpuvec in a fat binary */
  #define mpn_addlsh1_n __MPN(addlsh1_n)
-__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#define mpn_addlsh1_nc __MPN(addlsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1
+#define mpn_addlsh1_n_ip1(dst,src,n) mpn_addlsh1_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_addlsh1_n_ip1 1
+#else
+#define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#define mpn_addlsh1_nc_ip1(dst,src,n,c) mpn_addlsh1_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_addlsh1_nc_ip1 1
+#else
+#define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
  
+#ifndef mpn_addlsh2_n  /* if not done with cpuvec in a fat binary */
  /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
-   returns the carry out (0, ..., 4).  */
+   returns the carry out (0, ..., 4). Use _ip1 when a=c. */
  #define mpn_addlsh2_n __MPN(addlsh2_n)
-__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#define mpn_addlsh2_nc __MPN(addlsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1
+#define mpn_addlsh2_n_ip1(dst,src,n) mpn_addlsh2_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_addlsh2_n_ip1 1
+#else
+#define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#define mpn_addlsh2_nc_ip1(dst,src,n,c) mpn_addlsh2_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_addlsh2_nc_ip1 1
+#else
+#define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
  
  /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
-   returns the carry out (0, ..., 2^k).  */
+   returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */
  #define mpn_addlsh_n __MPN(addlsh_n)
-  __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+#define mpn_addlsh_nc __MPN(addlsh_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh_n_ip1
+#define mpn_addlsh_n_ip1(dst,src,n,s) mpn_addlsh_n(dst,dst,src,n,s)
+#define HAVE_NATIVE_mpn_addlsh_n_ip1 1
+#else
+#define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1)
+  __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh_nc_ip1
+#define mpn_addlsh_nc_ip1(dst,src,n,s,c) mpn_addlsh_nc(dst,dst,src,n,s,c)
+#define HAVE_NATIVE_mpn_addlsh_nc_ip1 1
+#else
+#define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#endif
  
+#ifndef mpn_sublsh1_n  /* if not done with cpuvec in a fat binary */
  /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
-   returns the borrow out (0, 1 or 2).  */
+   returns the borrow out (0, 1 or 2). Use _ip1 when a=c. */
  #define mpn_sublsh1_n __MPN(sublsh1_n)
-__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#define mpn_sublsh1_nc __MPN(sublsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1
+#define mpn_sublsh1_n_ip1(dst,src,n) mpn_sublsh1_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_sublsh1_n_ip1 1
+#else
+#define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#define mpn_sublsh1_nc_ip1(dst,src,n,c) mpn_sublsh1_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_sublsh1_nc_ip1 1
+#else
+#define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
  
  /* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
     returns the carry out (-1, 0, 1).  */
  #define mpn_rsblsh1_n __MPN(rsblsh1_n)
-__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
-   returns the borrow out (FIXME 0, 1, 2 or 3).  */
+   returns the borrow out (0, ..., 4). Use _ip1 when a=c. */
  #define mpn_sublsh2_n __MPN(sublsh2_n)
-__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_sublsh2_nc __MPN(sublsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define mpn_sublsh2_n_ip1(dst,src,n) mpn_sublsh2_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_sublsh2_n_ip1 1
+#else
+#define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#define mpn_sublsh2_nc_ip1(dst,src,n,c) mpn_sublsh2_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_sublsh2_nc_ip1 1
+#else
+#define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+
+/* mpn_sublsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}-2^k*{b,n}, and
+   returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */
+#define mpn_sublsh_n __MPN(sublsh_n)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh_n_ip1
+#define mpn_sublsh_n_ip1(dst,src,n,s) mpn_sublsh_n(dst,dst,src,n,s)
+#define HAVE_NATIVE_mpn_sublsh_n_ip1 1
+#else
+#define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh_nc_ip1
+#define mpn_sublsh_nc_ip1(dst,src,n,s,c) mpn_sublsh_nc(dst,dst,src,n,s,c)
+#define HAVE_NATIVE_mpn_sublsh_nc_ip1 1
+#else
+#define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#endif
  
  /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
     returns the carry out (-1, ..., 3).  */
  #define mpn_rsblsh2_n __MPN(rsblsh2_n)
-__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  /* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and
     returns the carry out (-1, 0, ..., 2^k-1).  */
  #define mpn_rsblsh_n __MPN(rsblsh_n)
-__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+#define mpn_rsblsh_nc __MPN(rsblsh_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
  
  /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
     and returns the bit rshifted out (0 or 1).  */
  #define mpn_rsh1add_n __MPN(rsh1add_n)
-__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  #define mpn_rsh1add_nc __MPN(rsh1add_nc)
-__GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
     and returns the bit rshifted out (0 or 1).  If there's a borrow from the
     subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
     complement negative.  */
  #define mpn_rsh1sub_n __MPN(rsh1sub_n)
-__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  #define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
-__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
  
+#ifndef mpn_lshiftc  /* if not done with cpuvec in a fat binary */
  #define mpn_lshiftc __MPN(lshiftc)
-__GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+
+#define mpn_add_err1_n  __MPN(add_err1_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_add_err2_n  __MPN(add_err2_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_add_err3_n  __MPN(add_err3_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sub_err1_n  __MPN(sub_err1_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sub_err2_n  __MPN(sub_err2_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sub_err3_n  __MPN(sub_err3_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define mpn_add_n_sub_n __MPN(add_n_sub_n)
-__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  
  #define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
-__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
-__GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
  
  #define mpn_divrem_1c __MPN(divrem_1c)
-__GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
  
  #define mpn_dump __MPN(dump)
-__GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t);
  
  #define mpn_fib2_ui __MPN(fib2_ui)
-__GMP_DECLSPEC mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long));
+__GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long);
  
  /* Remap names of internal mpn functions.  */
  #define __clz_tab               __MPN(clz_tab)
  #define mpn_udiv_w_sdiv                __MPN(udiv_w_sdiv)
  
  #define mpn_jacobi_base __MPN(jacobi_base)
-__GMP_DECLSPEC int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST;
+
+#define mpn_jacobi_2 __MPN(jacobi_2)
+__GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned);
+
+#define mpn_jacobi_n __MPN(jacobi_n)
+__GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned);
  
  #define mpn_mod_1c __MPN(mod_1c)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
  
  #define mpn_mul_1c __MPN(mul_1c)
-__GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
  
  #define mpn_mul_2 __MPN(mul_2)
-__GMP_DECLSPEC mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  #define mpn_mul_3 __MPN(mul_3)
-__GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  #define mpn_mul_4 __MPN(mul_4)
-__GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_mul_5 __MPN(mul_5)
+__GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_mul_6 __MPN(mul_6)
+__GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  #ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */
  #define mpn_mul_basecase __MPN(mul_basecase)
-__GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
  #endif
  
  #define mpn_mullo_n __MPN(mullo_n)
-__GMP_DECLSPEC void mpn_mullo_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  
+#ifndef mpn_mullo_basecase  /* if not done with cpuvec in a fat binary */
  #define mpn_mullo_basecase __MPN(mullo_basecase)
-__GMP_DECLSPEC void mpn_mullo_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
  
  #define mpn_sqr __MPN(sqr)
-__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
  
  #ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */
  #define mpn_sqr_basecase __MPN(sqr_basecase)
-__GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t);
  #endif
  
+#define mpn_mulmid_basecase __MPN(mulmid_basecase)
+__GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define mpn_mulmid_n __MPN(mulmid_n)
+__GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define mpn_mulmid __MPN(mulmid)
+__GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
  #define mpn_submul_1c __MPN(submul_1c)
-__GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
  
+#ifndef mpn_redc_1  /* if not done with cpuvec in a fat binary */
  #define mpn_redc_1 __MPN(redc_1)
-__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
  
+#ifndef mpn_redc_2  /* if not done with cpuvec in a fat binary */
  #define mpn_redc_2 __MPN(redc_2)
-__GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+#endif
+
  #define mpn_redc_n __MPN(redc_n)
-__GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
  
  
+#ifndef mpn_mod_1_1p_cps  /* if not done with cpuvec in a fat binary */
  #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
-__GMP_DECLSPEC void mpn_mod_1_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t));
+__GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t);
+#endif
+#ifndef mpn_mod_1_1p  /* if not done with cpuvec in a fat binary */
  #define mpn_mod_1_1p __MPN(mod_1_1p)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4])) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]) __GMP_ATTRIBUTE_PURE;
+#endif
  
+#ifndef mpn_mod_1s_2p_cps  /* if not done with cpuvec in a fat binary */
  #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
-__GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t));
+__GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t);
+#endif
+#ifndef mpn_mod_1s_2p  /* if not done with cpuvec in a fat binary */
  #define mpn_mod_1s_2p __MPN(mod_1s_2p)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5])) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5]) __GMP_ATTRIBUTE_PURE;
+#endif
  
+#ifndef mpn_mod_1s_3p_cps  /* if not done with cpuvec in a fat binary */
  #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
-__GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t));
+__GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t);
+#endif
+#ifndef mpn_mod_1s_3p  /* if not done with cpuvec in a fat binary */
  #define mpn_mod_1s_3p __MPN(mod_1s_3p)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6])) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6]) __GMP_ATTRIBUTE_PURE;
+#endif
  
+#ifndef mpn_mod_1s_4p_cps  /* if not done with cpuvec in a fat binary */
  #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
-__GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t));
+__GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t);
+#endif
+#ifndef mpn_mod_1s_4p  /* if not done with cpuvec in a fat binary */
  #define mpn_mod_1s_4p __MPN(mod_1s_4p)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7])) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7]) __GMP_ATTRIBUTE_PURE;
+#endif
  
  #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
-__GMP_DECLSPEC void mpn_bc_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
  #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
-__GMP_DECLSPEC void mpn_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
-__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
  static inline mp_size_t
  mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
    mp_size_t n, itch;
@@ -960,9 +1176,9 @@ mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
  }
  
  #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
-__GMP_DECLSPEC void mpn_sqrmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
-__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
  static inline mp_size_t
  mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
    mp_size_t n, itch;
@@ -977,10 +1193,10 @@ typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
  
  /* Pseudo-random number generator function pointers structure.  */
  typedef struct {
-  void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
-  void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
-  void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t));
-  void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
+  void (*randseed_fn) (gmp_randstate_t, mpz_srcptr);
+  void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int);
+  void (*randclear_fn) (gmp_randstate_t);
+  void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr);
  } gmp_randfnptr_t;
  
  /* Macro to obtain a void pointer to the function pointers structure.  */
@@ -991,14 +1207,14 @@ typedef struct {
  #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
  
  /* Write a given number of random bits to rp.  */
-#define _gmp_rand(rp, state, bits)                              \
-  do {                                                          \
-    gmp_randstate_ptr  __rstate = (state);                      \
-    (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)   \
-       (__rstate, rp, bits);                                    \
+#define _gmp_rand(rp, state, bits)                                     \
+  do {                                                                 \
+    gmp_randstate_ptr  __rstate = (state);                             \
+    (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)          \
+      (__rstate, rp, bits);                                            \
    } while (0)
  
-__GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
+__GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t);
  
  
  /* __gmp_rands is the global state for the old-style random functions, and
@@ -1016,20 +1232,20 @@ __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
  __GMP_DECLSPEC extern char             __gmp_rands_initialized;
  __GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
  
-#define RANDS                                       \
-  ((__gmp_rands_initialized ? 0                     \
-    : (__gmp_rands_initialized = 1,                 \
-       __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
+#define RANDS                                                          \
+  ((__gmp_rands_initialized ? 0                                                \
+    : (__gmp_rands_initialized = 1,                                    \
+       __gmp_randinit_mt_noseed (__gmp_rands), 0)),                    \
     __gmp_rands)
  
  /* this is used by the test programs, to free memory */
-#define RANDS_CLEAR()                   \
-  do {                                  \
-    if (__gmp_rands_initialized)        \
-      {                                 \
-        __gmp_rands_initialized = 0;    \
-        gmp_randclear (__gmp_rands);    \
-      }                                 \
+#define RANDS_CLEAR()                                                  \
+  do {                                                                 \
+    if (__gmp_rands_initialized)                                       \
+      {                                                                        \
+       __gmp_rands_initialized = 0;                                    \
+       gmp_randclear (__gmp_rands);                                    \
+      }                                                                        \
    } while (0)
  
  
@@ -1039,10 +1255,17 @@ __GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
     be compile-time constants, so the compiler should be able to eliminate
     the code for the unwanted algorithm.  */
  
-#define ABOVE_THRESHOLD(size,thresh)    \
-  ((thresh) == 0                        \
-   || ((thresh) != MP_SIZE_T_MAX        \
+#if ! defined (__GNUC__) || __GNUC__ < 2
+#define ABOVE_THRESHOLD(size,thresh)                                   \
+  ((thresh) == 0                                                       \
+   || ((thresh) != MP_SIZE_T_MAX                                       \
+       && (size) >= (thresh)))
+#else
+#define ABOVE_THRESHOLD(size,thresh)                                   \
+  ((__builtin_constant_p (thresh) && (thresh) == 0)                    \
+   || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX)   \
         && (size) >= (thresh)))
+#endif
  #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
  
  #define MPN_TOOM22_MUL_MINSIZE    4
@@ -1062,258 +1285,289 @@ __GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
  
  #define MPN_TOOM32_MUL_MINSIZE   10
  #define MPN_TOOM42_MUL_MINSIZE   10
-#define MPN_TOOM43_MUL_MINSIZE   49 /* ??? */
-#define MPN_TOOM53_MUL_MINSIZE   49 /* ??? */
+#define MPN_TOOM43_MUL_MINSIZE   25
+#define MPN_TOOM53_MUL_MINSIZE   17
+#define MPN_TOOM54_MUL_MINSIZE   31
  #define MPN_TOOM63_MUL_MINSIZE   49
  
+#define MPN_TOOM42_MULMID_MINSIZE    4
+
  #define   mpn_sqr_diagonal __MPN(sqr_diagonal)
-__GMP_DECLSPEC void      mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void      mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t);
+
+#define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1)
+__GMP_DECLSPEC void      mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
  
  #define   mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
-__GMP_DECLSPEC void      mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t));
+__GMP_DECLSPEC void      mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t);
  
  enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
  #define   mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
-__GMP_DECLSPEC void      mpn_toom_interpolate_6pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t));
+__GMP_DECLSPEC void      mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t);
  
  enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
  #define   mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
-__GMP_DECLSPEC void      mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
  
  #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
-__GMP_DECLSPEC void      mpn_toom_interpolate_8pts __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
  
  #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
-__GMP_DECLSPEC void      mpn_toom_interpolate_12pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
  
  #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
-__GMP_DECLSPEC void      mpn_toom_interpolate_16pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
  
  #define   mpn_toom_couple_handling __MPN(toom_couple_handling)
-__GMP_DECLSPEC void mpn_toom_couple_handling __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int));
+__GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int);
  
  #define   mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
-__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
  
  #define   mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
-__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
  
  #define   mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
-__GMP_DECLSPEC int mpn_toom_eval_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
  
  #define   mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
-__GMP_DECLSPEC int mpn_toom_eval_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
  
  #define   mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
-__GMP_DECLSPEC int mpn_toom_eval_pm2exp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
  
  #define   mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
-__GMP_DECLSPEC int mpn_toom_eval_pm2rexp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
  
  #define   mpn_toom22_mul __MPN(toom22_mul)
-__GMP_DECLSPEC void      mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom32_mul __MPN(toom32_mul)
-__GMP_DECLSPEC void      mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom42_mul __MPN(toom42_mul)
-__GMP_DECLSPEC void      mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom52_mul __MPN(toom52_mul)
-__GMP_DECLSPEC void      mpn_toom52_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom62_mul __MPN(toom62_mul)
-__GMP_DECLSPEC void      mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom2_sqr __MPN(toom2_sqr)
-__GMP_DECLSPEC void      mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom33_mul __MPN(toom33_mul)
-__GMP_DECLSPEC void      mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom43_mul __MPN(toom43_mul)
-__GMP_DECLSPEC void      mpn_toom43_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom53_mul __MPN(toom53_mul)
-__GMP_DECLSPEC void      mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom54_mul __MPN(toom54_mul)
+__GMP_DECLSPEC void      mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom63_mul __MPN(toom63_mul)
-__GMP_DECLSPEC void      mpn_toom63_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom3_sqr __MPN(toom3_sqr)
-__GMP_DECLSPEC void      mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom44_mul __MPN(toom44_mul)
-__GMP_DECLSPEC void      mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom4_sqr __MPN(toom4_sqr)
-__GMP_DECLSPEC void      mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom6h_mul __MPN(toom6h_mul)
-__GMP_DECLSPEC void      mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom6_sqr __MPN(toom6_sqr)
-__GMP_DECLSPEC void      mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom8h_mul __MPN(toom8h_mul)
-__GMP_DECLSPEC void      mpn_toom8h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_toom8_sqr __MPN(toom8_sqr)
-__GMP_DECLSPEC void      mpn_toom8_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+#define   mpn_toom42_mulmid __MPN(toom42_mulmid)
+__GMP_DECLSPEC void      mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_fft_best_k __MPN(fft_best_k)
-__GMP_DECLSPEC int       mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC int       mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST;
  
  #define   mpn_mul_fft __MPN(mul_fft)
-__GMP_DECLSPEC mp_limb_t mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int));
+__GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
  
  #define   mpn_mul_fft_full __MPN(mul_fft_full)
-__GMP_DECLSPEC void      mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void      mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
  
  #define   mpn_nussbaumer_mul __MPN(nussbaumer_mul)
-__GMP_DECLSPEC void      mpn_nussbaumer_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void      mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
  
  #define   mpn_fft_next_size __MPN(fft_next_size)
-__GMP_DECLSPEC mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
+
+#define   mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
+  __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+#define   mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
+  __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t);
  
  #define   mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
-__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define   mpn_sbpi1_div_q __MPN(sbpi1_div_q)
-__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define   mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
-__GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define   mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
  #define   mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
  
  #define   mpn_dcpi1_div_q __MPN(dcpi1_div_q)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
  
  #define   mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
  #define   mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
  
  #define   mpn_mu_div_qr __MPN(mu_div_qr)
-__GMP_DECLSPEC mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int);
  #define   mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
-__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in (mp_size_t, mp_size_t, int);
  
  #define   mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
-__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
-__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t);
  
  #define   mpn_mu_divappr_q __MPN(mu_divappr_q)
-__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int);
  #define   mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
-__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in (mp_size_t, mp_size_t, int);
  
  #define   mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
-__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_mu_div_q __MPN(mu_div_q)
-__GMP_DECLSPEC mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_mu_div_q_itch __MPN(mu_div_q_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int);
  
  #define  mpn_div_q __MPN(div_q)
-__GMP_DECLSPEC void mpn_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  
  #define   mpn_invert __MPN(invert)
-__GMP_DECLSPEC void      mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
  #define mpn_invert_itch(n)  mpn_invertappr_itch(n)
  
  #define   mpn_ni_invertappr __MPN(ni_invertappr)
-__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_invertappr __MPN(invertappr)
-__GMP_DECLSPEC mp_limb_t mpn_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
  #define mpn_invertappr_itch(n)  (3 * (n) + 2)
  
  #define   mpn_binvert __MPN(binvert)
-__GMP_DECLSPEC void      mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_binvert_itch __MPN(binvert_itch)
-__GMP_DECLSPEC mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t);
  
  #define mpn_bdiv_q_1 __MPN(bdiv_q_1)
-__GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
-__GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
+__GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
  
  #define   mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
-__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define   mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
-__GMP_DECLSPEC void      mpn_sbpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC void      mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define   mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
  #define   mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
-__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t);
  
  #define   mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
  #define   mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
-__GMP_DECLSPEC void      mpn_dcpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC void      mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
  
  #define   mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch)
-__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch __GMP_PROTO ((mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch (mp_size_t);
  #define   mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n)
-__GMP_DECLSPEC void      mpn_dcpi1_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_dcpi1_bdiv_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
  
  #define   mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
-__GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t);
  
  #define   mpn_mu_bdiv_q __MPN(mu_bdiv_q)
-__GMP_DECLSPEC void      mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t);
  
  #define   mpn_bdiv_qr __MPN(bdiv_qr)
-__GMP_DECLSPEC mp_limb_t mpn_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
-__GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t);
  
  #define   mpn_bdiv_q __MPN(bdiv_q)
-__GMP_DECLSPEC void      mpn_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_bdiv_q_itch __MPN(bdiv_q_itch)
-__GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t);
  
  #define   mpn_divexact __MPN(divexact)
-__GMP_DECLSPEC void      mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void      mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
  #define   mpn_divexact_itch __MPN(divexact_itch)
-__GMP_DECLSPEC mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t);
  
+#ifndef mpn_bdiv_dbm1c  /* if not done with cpuvec in a fat binary */
  #define   mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
-__GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+#endif
+
  #define   mpn_bdiv_dbm1(dst, src, size, divisor) \
    mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
  
  #define   mpn_powm __MPN(powm)
-__GMP_DECLSPEC void      mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_powlo __MPN(powlo)
-__GMP_DECLSPEC void      mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
  #define   mpn_powm_sec __MPN(powm_sec)
-__GMP_DECLSPEC void      mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_powm_sec (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_powm_sec_itch __MPN(powm_sec_itch)
-__GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
-#define   mpn_subcnd_n __MPN(subcnd_n)
-__GMP_DECLSPEC mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_size_t mpn_powm_sec_itch (mp_size_t, mp_size_t, mp_size_t);
  #define   mpn_tabselect __MPN(tabselect)
-__GMP_DECLSPEC void      mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
-#define mpn_redc_1_sec __MPN(redc_1_sec)
-__GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC void      mpn_tabselect (volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t);
+#define   mpn_addcnd_n __MPN(addcnd_n)
+__GMP_DECLSPEC mp_limb_t mpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#define   mpn_subcnd_n __MPN(subcnd_n)
+__GMP_DECLSPEC mp_limb_t mpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sb_div_qr_sec __MPN(sb_div_qr_sec)
+__GMP_DECLSPEC void mpn_sb_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sbpi1_div_qr_sec __MPN(sbpi1_div_qr_sec)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+#define mpn_sb_div_r_sec __MPN(sb_div_r_sec)
+__GMP_DECLSPEC void mpn_sb_div_r_sec (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sbpi1_div_r_sec __MPN(sbpi1_div_r_sec)
+__GMP_DECLSPEC void mpn_sbpi1_div_r_sec (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+
  
  #ifndef DIVEXACT_BY3_METHOD
  #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
@@ -1340,7 +1594,7 @@ __GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_s
    (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
  #endif
  
-#if GMP_NUMB_BITS % 6 == 0
+#if GMP_NUMB_BITS % 3 == 0
  #define mpn_divexact_by7(dst,src,size) \
    (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
  #endif
@@ -1366,19 +1620,39 @@ __GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_s
  #endif
  
  #define mpz_divexact_gcd  __gmpz_divexact_gcd
-__GMP_DECLSPEC void    mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void    mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_prodlimbs  __gmpz_prodlimbs
+__GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t);
+
+#define mpz_oddfac_1  __gmpz_oddfac_1
+__GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned);
  
  #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
  #ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t  mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t));
+__GMP_DECLSPEC size_t  mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t);
  #endif
  
  #define mpn_divisible_p __MPN(divisible_p)
-__GMP_DECLSPEC int     mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int     mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
  
  #define   mpn_rootrem __MPN(rootrem)
-__GMP_DECLSPEC mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_broot __MPN(broot)
+__GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_broot_invm1 __MPN(broot_invm1)
+__GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_brootinv __MPN(brootinv)
+__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
  
+#define mpn_bsqrt __MPN(bsqrt)
+__GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
+
+#define mpn_bsqrtinv __MPN(bsqrtinv)
+__GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
  
  #if defined (_CRAY)
  #define MPN_COPY_INCR(dst, src, n)                                     \
@@ -1393,42 +1667,42 @@ __GMP_DECLSPEC mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp
  /* used by test programs, hence __GMP_DECLSPEC */
  #ifndef mpn_copyi  /* if not done with cpuvec in a fat binary */
  #define mpn_copyi __MPN(copyi)
-__GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
  #endif
  
  #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
-#define MPN_COPY_INCR(dst, src, size)                   \
-  do {                                                  \
-    ASSERT ((size) >= 0);                               \
-    ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));       \
-    mpn_copyi (dst, src, size);                         \
+#define MPN_COPY_INCR(dst, src, size)                                  \
+  do {                                                                 \
+    ASSERT ((size) >= 0);                                              \
+    ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));                      \
+    mpn_copyi (dst, src, size);                                                \
    } while (0)
  #endif
  
  /* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
  #if ! defined (MPN_COPY_INCR)
-#define MPN_COPY_INCR(dst, src, n)                      \
-  do {                                                  \
-    ASSERT ((n) >= 0);                                  \
-    ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));          \
-    if ((n) != 0)                                       \
-      {                                                 \
-       mp_size_t __n = (n) - 1;                        \
-       mp_ptr __dst = (dst);                           \
-       mp_srcptr __src = (src);                        \
-       mp_limb_t __x;                                  \
-       __x = *__src++;                                 \
-       if (__n != 0)                                   \
-         {                                             \
-           do                                          \
-             {                                         \
-               *__dst++ = __x;                         \
-               __x = *__src++;                         \
-             }                                         \
-           while (--__n);                              \
-         }                                             \
-       *__dst++ = __x;                                 \
-      }                                                 \
+#define MPN_COPY_INCR(dst, src, n)                                     \
+  do {                                                                 \
+    ASSERT ((n) >= 0);                                                 \
+    ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));                         \
+    if ((n) != 0)                                                      \
+      {                                                                        \
+       mp_size_t __n = (n) - 1;                                        \
+       mp_ptr __dst = (dst);                                           \
+       mp_srcptr __src = (src);                                        \
+       mp_limb_t __x;                                                  \
+       __x = *__src++;                                                 \
+       if (__n != 0)                                                   \
+         {                                                             \
+           do                                                          \
+             {                                                         \
+               *__dst++ = __x;                                         \
+               __x = *__src++;                                         \
+             }                                                         \
+           while (--__n);                                              \
+         }                                                             \
+       *__dst++ = __x;                                                 \
+      }                                                                        \
    } while (0)
  #endif
  
@@ -1446,71 +1720,71 @@ __GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
  /* used by test programs, hence __GMP_DECLSPEC */
  #ifndef mpn_copyd  /* if not done with cpuvec in a fat binary */
  #define mpn_copyd __MPN(copyd)
-__GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
  #endif
  
  #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
-#define MPN_COPY_DECR(dst, src, size)                   \
-  do {                                                  \
-    ASSERT ((size) >= 0);                               \
-    ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));       \
-    mpn_copyd (dst, src, size);                         \
+#define MPN_COPY_DECR(dst, src, size)                                  \
+  do {                                                                 \
+    ASSERT ((size) >= 0);                                              \
+    ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));                      \
+    mpn_copyd (dst, src, size);                                                \
    } while (0)
  #endif
  
  /* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
  #if ! defined (MPN_COPY_DECR)
-#define MPN_COPY_DECR(dst, src, n)                      \
-  do {                                                  \
-    ASSERT ((n) >= 0);                                  \
-    ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));          \
-    if ((n) != 0)                                       \
-      {                                                 \
-       mp_size_t __n = (n) - 1;                        \
-       mp_ptr __dst = (dst) + __n;                     \
-       mp_srcptr __src = (src) + __n;                  \
-       mp_limb_t __x;                                  \
-       __x = *__src--;                                 \
-       if (__n != 0)                                   \
-         {                                             \
-           do                                          \
-             {                                         \
-               *__dst-- = __x;                         \
-               __x = *__src--;                         \
-             }                                         \
-           while (--__n);                              \
-         }                                             \
-       *__dst-- = __x;                                 \
-      }                                                 \
+#define MPN_COPY_DECR(dst, src, n)                                     \
+  do {                                                                 \
+    ASSERT ((n) >= 0);                                                 \
+    ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));                         \
+    if ((n) != 0)                                                      \
+      {                                                                        \
+       mp_size_t __n = (n) - 1;                                        \
+       mp_ptr __dst = (dst) + __n;                                     \
+       mp_srcptr __src = (src) + __n;                                  \
+       mp_limb_t __x;                                                  \
+       __x = *__src--;                                                 \
+       if (__n != 0)                                                   \
+         {                                                             \
+           do                                                          \
+             {                                                         \
+               *__dst-- = __x;                                         \
+               __x = *__src--;                                         \
+             }                                                         \
+           while (--__n);                                              \
+         }                                                             \
+       *__dst-- = __x;                                                 \
+      }                                                                        \
    } while (0)
  #endif
  
  
  #ifndef MPN_COPY
-#define MPN_COPY(d,s,n)                         \
-  do {                                          \
-    ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));  \
-    MPN_COPY_INCR (d, s, n);                    \
+#define MPN_COPY(d,s,n)                                                        \
+  do {                                                                 \
+    ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));                         \
+    MPN_COPY_INCR (d, s, n);                                           \
    } while (0)
  #endif
  
  
  /* Set {dst,size} to the limbs of {src,size} in reverse order. */
-#define MPN_REVERSE(dst, src, size)                     \
-  do {                                                  \
-    mp_ptr     __dst = (dst);                           \
-    mp_size_t  __size = (size);                         \
-    mp_srcptr  __src = (src) + __size - 1;              \
-    mp_size_t  __i;                                     \
-    ASSERT ((size) >= 0);                               \
-    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
-    CRAY_Pragma ("_CRI ivdep");                         \
-    for (__i = 0; __i < __size; __i++)                  \
-      {                                                 \
-        *__dst = *__src;                                \
-        __dst++;                                        \
-        __src--;                                        \
-      }                                                 \
+#define MPN_REVERSE(dst, src, size)                                    \
+  do {                                                                 \
+    mp_ptr     __dst = (dst);                                          \
+    mp_size_t  __size = (size);                                                \
+    mp_srcptr  __src = (src) + __size - 1;                             \
+    mp_size_t  __i;                                                    \
+    ASSERT ((size) >= 0);                                              \
+    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));                   \
+    CRAY_Pragma ("_CRI ivdep");                                                \
+    for (__i = 0; __i < __size; __i++)                                 \
+      {                                                                        \
+       *__dst = *__src;                                                \
+       __dst++;                                                        \
+       __src--;                                                        \
+      }                                                                        \
    } while (0)
  
  
@@ -1534,32 +1808,32 @@ __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
     would be good when on a GNU system.  */
  
  #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
-#define MPN_ZERO(dst, n)                       \
-  do {                                         \
-    ASSERT ((n) >= 0);                         \
-    if ((n) != 0)                              \
-      {                                                \
-       mp_ptr __dst = (dst) - 1;               \
-       mp_size_t __n = (n);                    \
-       do                                      \
-         *++__dst = 0;                         \
-       while (--__n);                          \
-      }                                                \
+#define MPN_ZERO(dst, n)                                               \
+  do {                                                                 \
+    ASSERT ((n) >= 0);                                                 \
+    if ((n) != 0)                                                      \
+      {                                                                        \
+       mp_ptr __dst = (dst) - 1;                                       \
+       mp_size_t __n = (n);                                            \
+       do                                                              \
+         *++__dst = 0;                                                 \
+       while (--__n);                                                  \
+      }                                                                        \
    } while (0)
  #endif
  
  #ifndef MPN_ZERO
-#define MPN_ZERO(dst, n)                       \
-  do {                                         \
-    ASSERT ((n) >= 0);                         \
-    if ((n) != 0)                              \
-      {                                                \
-       mp_ptr __dst = (dst);                   \
-       mp_size_t __n = (n);                    \
-       do                                      \
-         *__dst++ = 0;                         \
-       while (--__n);                          \
-      }                                                \
+#define MPN_ZERO(dst, n)                                               \
+  do {                                                                 \
+    ASSERT ((n) >= 0);                                                 \
+    if ((n) != 0)                                                      \
+      {                                                                        \
+       mp_ptr __dst = (dst);                                           \
+       mp_size_t __n = (n);                                            \
+       do                                                              \
+         *__dst++ = 0;                                                 \
+       while (--__n);                                                  \
+      }                                                                        \
    } while (0)
  #endif
  
@@ -1570,16 +1844,16 @@ __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
     std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
     low zeros).
  
-                std   cld
-           P5    18    16
-           P6    46    38
-           K6    36    13
-           K7    21    20
+               std   cld
+          P5    18    16
+          P6    46    38
+          K6    36    13
+          K7    21    20
  */
  #ifndef MPN_NORMALIZE
  #define MPN_NORMALIZE(DST, NLIMBS) \
    do {                                                                 \
-    while ((NLIMBS) > 0)                                                \
+    while ((NLIMBS) > 0)                                               \
        {                                                                        \
         if ((DST)[(NLIMBS) - 1] != 0)                                   \
           break;                                                        \
@@ -1588,15 +1862,15 @@ __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
    } while (0)
  #endif
  #ifndef MPN_NORMALIZE_NOT_ZERO
-#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)     \
-  do {                                          \
-    ASSERT ((NLIMBS) >= 1);                     \
-    while (1)                                   \
-      {                                         \
-       if ((DST)[(NLIMBS) - 1] != 0)           \
-         break;                                \
-       (NLIMBS)--;                             \
-      }                                         \
+#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)                            \
+  do {                                                                 \
+    while (1)                                                          \
+      {                                                                        \
+       ASSERT ((NLIMBS) >= 1);                                         \
+       if ((DST)[(NLIMBS) - 1] != 0)                                   \
+         break;                                                        \
+       (NLIMBS)--;                                                     \
+      }                                                                        \
    } while (0)
  #endif
  
@@ -1604,36 +1878,51 @@ __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
     and decrementing size.  low should be ptr[0], and will be the new ptr[0]
     on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
     somewhere a non-zero limb.  */
-#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)    \
-  do {                                                  \
-    ASSERT ((size) >= 1);                               \
-    ASSERT ((low) == (ptr)[0]);                         \
-                                                        \
-    while ((low) == 0)                                  \
-      {                                                 \
-        (size)--;                                       \
-        ASSERT ((size) >= 1);                           \
-        (ptr)++;                                        \
-        (low) = *(ptr);                                 \
-      }                                                 \
+#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)                   \
+  do {                                                                 \
+    ASSERT ((size) >= 1);                                              \
+    ASSERT ((low) == (ptr)[0]);                                                \
+                                                                       \
+    while ((low) == 0)                                                 \
+      {                                                                        \
+       (size)--;                                                       \
+       ASSERT ((size) >= 1);                                           \
+       (ptr)++;                                                        \
+       (low) = *(ptr);                                                 \
+      }                                                                        \
    } while (0)
  
  /* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
     temporary variable; it will be automatically cleared out at function
     return.  We use __x here to make it possible to accept both mpz_ptr and
     mpz_t arguments.  */
-#define MPZ_TMP_INIT(X, NLIMBS)                                         \
-  do {                                                                  \
-    mpz_ptr __x = (X);                                                  \
-    ASSERT ((NLIMBS) >= 1);                                             \
-    __x->_mp_alloc = (NLIMBS);                                          \
+#define MPZ_TMP_INIT(X, NLIMBS)                                                \
+  do {                                                                 \
+    mpz_ptr __x = (X);                                                 \
+    ASSERT ((NLIMBS) >= 1);                                            \
+    __x->_mp_alloc = (NLIMBS);                                         \
      __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS);                             \
    } while (0)
  
+#if WANT_ASSERT
+static inline void *
+_mpz_newalloc (mpz_ptr z, mp_size_t n)
+{
+  void * res = _mpz_realloc(z,n);
+  /* If we are checking the code, force a random change to limbs. */
+  ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1];
+  return res;
+}
+#else
+#define _mpz_newalloc _mpz_realloc
+#endif
  /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
-#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))     \
-                          ? (mp_ptr) _mpz_realloc(z,n)  \
-                          : PTR(z))
+#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))                    \
+                         ? (mp_ptr) _mpz_realloc(z,n)                  \
+                         : PTR(z))
+#define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))                   \
+                          ? (mp_ptr) _mpz_newalloc(z,n)                \
+                          : PTR(z))
  
  #define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
  
@@ -1671,6 +1960,20 @@ __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
  __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
  #define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
  
+extern const mp_limb_t __gmp_oddfac_table[];
+extern const mp_limb_t __gmp_odd2fac_table[];
+extern const unsigned char __gmp_fac2cnt_table[];
+extern const mp_limb_t __gmp_limbroots_table[];
+
+/* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */
+static inline unsigned
+log_n_max (mp_limb_t n)
+{
+  unsigned log;
+  for (log = 8; n > __gmp_limbroots_table[log - 1]; log--);
+  return log;
+}
+
  #define SIEVESIZE 512          /* FIXME: Allow gmp_init_primesieve to choose */
  typedef struct
  {
@@ -1686,6 +1989,9 @@ __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
  #define gmp_nextprime __gmp_nextprime
  __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
  
+#define gmp_primesieve __gmp_primesieve
+__GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
+
  
  #ifndef MUL_TOOM22_THRESHOLD
  #define MUL_TOOM22_THRESHOLD             30
@@ -1731,6 +2037,10 @@ __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD  110
  #endif
  
+#ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD  150
+#endif
+
  /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD.  In a
     normal build MUL_TOOM22_THRESHOLD is a constant and we use that.  In a fat
     binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
@@ -1776,6 +2086,10 @@ __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
  #define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD
  #endif
  
+#ifndef MULMID_TOOM42_THRESHOLD
+#define MULMID_TOOM42_THRESHOLD     MUL_TOOM22_THRESHOLD
+#endif
+
  #ifndef DC_DIV_QR_THRESHOLD
  #define DC_DIV_QR_THRESHOLD              50
  #endif
@@ -1891,23 +2205,23 @@ __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
     where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
     etc.  See mpn_fft_best_k(). */
  #ifndef MUL_FFT_TABLE
-#define MUL_FFT_TABLE                           \
-  { MUL_TOOM33_THRESHOLD * 4,   /* k=5 */        \
-    MUL_TOOM33_THRESHOLD * 8,   /* k=6 */        \
-    MUL_TOOM33_THRESHOLD * 16,  /* k=7 */        \
-    MUL_TOOM33_THRESHOLD * 32,  /* k=8 */        \
-    MUL_TOOM33_THRESHOLD * 96,  /* k=9 */        \
-    MUL_TOOM33_THRESHOLD * 288, /* k=10 */       \
+#define MUL_FFT_TABLE                                                  \
+  { MUL_TOOM33_THRESHOLD * 4,   /* k=5 */                              \
+    MUL_TOOM33_THRESHOLD * 8,   /* k=6 */                              \
+    MUL_TOOM33_THRESHOLD * 16,  /* k=7 */                              \
+    MUL_TOOM33_THRESHOLD * 32,  /* k=8 */                              \
+    MUL_TOOM33_THRESHOLD * 96,  /* k=9 */                              \
+    MUL_TOOM33_THRESHOLD * 288, /* k=10 */                             \
      0 }
  #endif
  #ifndef SQR_FFT_TABLE
-#define SQR_FFT_TABLE                           \
-  { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */        \
-    SQR_TOOM3_THRESHOLD * 8,   /* k=6 */        \
-    SQR_TOOM3_THRESHOLD * 16,  /* k=7 */        \
-    SQR_TOOM3_THRESHOLD * 32,  /* k=8 */        \
-    SQR_TOOM3_THRESHOLD * 96,  /* k=9 */        \
-    SQR_TOOM3_THRESHOLD * 288, /* k=10 */       \
+#define SQR_FFT_TABLE                                                  \
+  { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */                               \
+    SQR_TOOM3_THRESHOLD * 8,   /* k=6 */                               \
+    SQR_TOOM3_THRESHOLD * 16,  /* k=7 */                               \
+    SQR_TOOM3_THRESHOLD * 32,  /* k=8 */                               \
+    SQR_TOOM3_THRESHOLD * 96,  /* k=9 */                               \
+    SQR_TOOM3_THRESHOLD * 288, /* k=10 */                              \
      0 }
  #endif
  
@@ -1944,32 +2258,40 @@ struct fft_table_nk
  #define SET_STR_PRECOMPUTE_THRESHOLD   2000
  #endif
  
+#ifndef FAC_ODD_THRESHOLD
+#define FAC_ODD_THRESHOLD    35
+#endif
+
+#ifndef FAC_DSC_THRESHOLD
+#define FAC_DSC_THRESHOLD   400
+#endif
+
  /* Return non-zero if xp,xsize and yp,ysize overlap.
     If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
     overlap.  If both these are false, there's an overlap. */
-#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
+#define MPN_OVERLAP_P(xp, xsize, yp, ysize)                            \
    ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
-#define MEM_OVERLAP_P(xp, xsize, yp, ysize)     \
-  (   (char *) (xp) + (xsize) > (char *) (yp)   \
+#define MEM_OVERLAP_P(xp, xsize, yp, ysize)                            \
+  (   (char *) (xp) + (xsize) > (char *) (yp)                          \
     && (char *) (yp) + (ysize) > (char *) (xp))
  
  /* Return non-zero if xp,xsize and yp,ysize are either identical or not
     overlapping.  Return zero if they're partially overlapping. */
-#define MPN_SAME_OR_SEPARATE_P(xp, yp, size)    \
+#define MPN_SAME_OR_SEPARATE_P(xp, yp, size)                           \
    MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
-#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)           \
+#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)                  \
    ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
  
  /* Return non-zero if dst,dsize and src,ssize are either identical or
     overlapping in a way suitable for an incrementing/decrementing algorithm.
     Return zero if they're partially overlapping in an unsuitable fashion. */
-#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)             \
+#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)                    \
    ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
-#define MPN_SAME_OR_INCR_P(dst, src, size)      \
+#define MPN_SAME_OR_INCR_P(dst, src, size)                             \
    MPN_SAME_OR_INCR2_P(dst, size, src, size)
-#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)             \
+#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)                    \
    ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
-#define MPN_SAME_OR_DECR_P(dst, src, size)      \
+#define MPN_SAME_OR_DECR_P(dst, src, size)                             \
    MPN_SAME_OR_DECR2_P(dst, size, src, size)
  
  
@@ -1994,8 +2316,8 @@ struct fft_table_nk
  #define ASSERT_FILE  ""
  #endif
  
-__GMP_DECLSPEC void __gmp_assert_header __GMP_PROTO ((const char *, int));
-__GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_assert_header (const char *, int);
+__GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN;
  
  #if HAVE_STRINGIZE
  #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
@@ -2003,10 +2325,10 @@ __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const cha
  #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
  #endif
  
-#define ASSERT_ALWAYS(expr)     \
-  do {                          \
-    if (!(expr))                \
-      ASSERT_FAIL (expr);       \
+#define ASSERT_ALWAYS(expr)                                            \
+  do {                                                                 \
+    if (UNLIKELY (!(expr)))                                            \
+      ASSERT_FAIL (expr);                                              \
    } while (0)
  
  #if WANT_ASSERT
@@ -2044,43 +2366,43 @@ __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const cha
     protection on routines like mpq_equal which give wrong results on
     non-canonical inputs.  */
  #if WANT_ASSERT
-#define ASSERT_MPQ_CANONICAL(q)                         \
-  do {                                                  \
-    ASSERT (q->_mp_den._mp_size > 0);                   \
-    if (q->_mp_num._mp_size == 0)                       \
-      {                                                 \
-        /* zero should be 0/1 */                        \
-        ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);   \
-      }                                                 \
-    else                                                \
-      {                                                 \
-        /* no common factors */                         \
-        mpz_t  __g;                                     \
-        mpz_init (__g);                                 \
-        mpz_gcd (__g, mpq_numref(q), mpq_denref(q));    \
-        ASSERT (mpz_cmp_ui (__g, 1) == 0);              \
-        mpz_clear (__g);                                \
-      }                                                 \
+#define ASSERT_MPQ_CANONICAL(q)                                                \
+  do {                                                                 \
+    ASSERT (q->_mp_den._mp_size > 0);                                  \
+    if (q->_mp_num._mp_size == 0)                                      \
+      {                                                                        \
+       /* zero should be 0/1 */                                        \
+       ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);                   \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       /* no common factors */                                         \
+       mpz_t  __g;                                                     \
+       mpz_init (__g);                                                 \
+       mpz_gcd (__g, mpq_numref(q), mpq_denref(q));                    \
+       ASSERT (mpz_cmp_ui (__g, 1) == 0);                              \
+       mpz_clear (__g);                                                \
+      }                                                                        \
    } while (0)
  #else
-#define ASSERT_MPQ_CANONICAL(q)  do {} while (0)
+#define ASSERT_MPQ_CANONICAL(q)         do {} while (0)
  #endif
  
  /* Check that the nail parts are zero. */
-#define ASSERT_ALWAYS_LIMB(limb)                \
-  do {                                          \
-    mp_limb_t  __nail = (limb) & GMP_NAIL_MASK; \
-    ASSERT_ALWAYS (__nail == 0);                \
+#define ASSERT_ALWAYS_LIMB(limb)                                       \
+  do {                                                                 \
+    mp_limb_t  __nail = (limb) & GMP_NAIL_MASK;                                \
+    ASSERT_ALWAYS (__nail == 0);                                       \
    } while (0)
-#define ASSERT_ALWAYS_MPN(ptr, size)            \
-  do {                                          \
-    /* let whole loop go dead when no nails */  \
-    if (GMP_NAIL_BITS != 0)                     \
-      {                                         \
-        mp_size_t  __i;                         \
-        for (__i = 0; __i < (size); __i++)      \
-          ASSERT_ALWAYS_LIMB ((ptr)[__i]);      \
-      }                                         \
+#define ASSERT_ALWAYS_MPN(ptr, size)                                   \
+  do {                                                                 \
+    /* let whole loop go dead when no nails */                         \
+    if (GMP_NAIL_BITS != 0)                                            \
+      {                                                                        \
+       mp_size_t  __i;                                                 \
+       for (__i = 0; __i < (size); __i++)                              \
+         ASSERT_ALWAYS_LIMB ((ptr)[__i]);                              \
+      }                                                                        \
    } while (0)
  #if WANT_ASSERT
  #define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
@@ -2094,25 +2416,25 @@ __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const cha
  /* Assert that an mpn region {ptr,size} is zero, or non-zero.
     size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
  #if WANT_ASSERT
-#define ASSERT_MPN_ZERO_P(ptr,size)     \
-  do {                                  \
-    mp_size_t  __i;                     \
-    ASSERT ((size) >= 0);               \
-    for (__i = 0; __i < (size); __i++)  \
-      ASSERT ((ptr)[__i] == 0);         \
+#define ASSERT_MPN_ZERO_P(ptr,size)                                    \
+  do {                                                                 \
+    mp_size_t  __i;                                                    \
+    ASSERT ((size) >= 0);                                              \
+    for (__i = 0; __i < (size); __i++)                                 \
+      ASSERT ((ptr)[__i] == 0);                                                \
    } while (0)
-#define ASSERT_MPN_NONZERO_P(ptr,size)  \
-  do {                                  \
-    mp_size_t  __i;                     \
-    int        __nonzero = 0;           \
-    ASSERT ((size) >= 0);               \
-    for (__i = 0; __i < (size); __i++)  \
-      if ((ptr)[__i] != 0)              \
-        {                               \
-          __nonzero = 1;                \
-          break;                        \
-        }                               \
-    ASSERT (__nonzero);                 \
+#define ASSERT_MPN_NONZERO_P(ptr,size)                                 \
+  do {                                                                 \
+    mp_size_t  __i;                                                    \
+    int               __nonzero = 0;                                           \
+    ASSERT ((size) >= 0);                                              \
+    for (__i = 0; __i < (size); __i++)                                 \
+      if ((ptr)[__i] != 0)                                             \
+       {                                                               \
+         __nonzero = 1;                                                \
+         break;                                                        \
+       }                                                               \
+    ASSERT (__nonzero);                                                        \
    } while (0)
  #else
  #define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
@@ -2122,16 +2444,16 @@ __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const cha
  
  #if ! HAVE_NATIVE_mpn_com
  #undef mpn_com
-#define mpn_com(d,s,n)                                  \
-  do {                                                  \
-    mp_ptr     __d = (d);                               \
-    mp_srcptr  __s = (s);                               \
-    mp_size_t  __n = (n);                               \
-    ASSERT (__n >= 1);                                  \
-    ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));    \
-    do                                                  \
-      *__d++ = (~ *__s++) & GMP_NUMB_MASK;              \
-    while (--__n);                                      \
+#define mpn_com(d,s,n)                                                 \
+  do {                                                                 \
+    mp_ptr     __d = (d);                                              \
+    mp_srcptr  __s = (s);                                              \
+    mp_size_t  __n = (n);                                              \
+    ASSERT (__n >= 1);                                                 \
+    ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));                   \
+    do                                                                 \
+      *__d++ = (~ *__s++) & GMP_NUMB_MASK;                             \
+    while (--__n);                                                     \
    } while (0)
  #endif
  
@@ -2206,51 +2528,51 @@ __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const cha
  #endif
  
  #define mpn_trialdiv __MPN(trialdiv)
-__GMP_DECLSPEC mp_limb_t mpn_trialdiv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, int *));
+__GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *);
  
  #define mpn_remove __MPN(remove)
-__GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t));
+__GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t);
  
  
  /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
  #if GMP_NAIL_BITS == 0
-#define ADDC_LIMB(cout, w, x, y)        \
-  do {                                  \
-    mp_limb_t  __x = (x);               \
-    mp_limb_t  __y = (y);               \
-    mp_limb_t  __w = __x + __y;         \
-    (w) = __w;                          \
-    (cout) = __w < __x;                 \
+#define ADDC_LIMB(cout, w, x, y)                                       \
+  do {                                                                 \
+    mp_limb_t  __x = (x);                                              \
+    mp_limb_t  __y = (y);                                              \
+    mp_limb_t  __w = __x + __y;                                                \
+    (w) = __w;                                                         \
+    (cout) = __w < __x;                                                        \
    } while (0)
  #else
-#define ADDC_LIMB(cout, w, x, y)        \
-  do {                                  \
-    mp_limb_t  __w;                     \
-    ASSERT_LIMB (x);                    \
-    ASSERT_LIMB (y);                    \
-    __w = (x) + (y);                    \
-    (w) = __w & GMP_NUMB_MASK;          \
-    (cout) = __w >> GMP_NUMB_BITS;      \
+#define ADDC_LIMB(cout, w, x, y)                                       \
+  do {                                                                 \
+    mp_limb_t  __w;                                                    \
+    ASSERT_LIMB (x);                                                   \
+    ASSERT_LIMB (y);                                                   \
+    __w = (x) + (y);                                                   \
+    (w) = __w & GMP_NUMB_MASK;                                         \
+    (cout) = __w >> GMP_NUMB_BITS;                                     \
    } while (0)
  #endif
  
  /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
     subtract.  */
  #if GMP_NAIL_BITS == 0
-#define SUBC_LIMB(cout, w, x, y)        \
-  do {                                  \
-    mp_limb_t  __x = (x);               \
-    mp_limb_t  __y = (y);               \
-    mp_limb_t  __w = __x - __y;         \
-    (w) = __w;                          \
-    (cout) = __w > __x;                 \
+#define SUBC_LIMB(cout, w, x, y)                                       \
+  do {                                                                 \
+    mp_limb_t  __x = (x);                                              \
+    mp_limb_t  __y = (y);                                              \
+    mp_limb_t  __w = __x - __y;                                                \
+    (w) = __w;                                                         \
+    (cout) = __w > __x;                                                        \
    } while (0)
  #else
-#define SUBC_LIMB(cout, w, x, y)        \
-  do {                                  \
-    mp_limb_t  __w = (x) - (y);         \
-    (w) = __w & GMP_NUMB_MASK;          \
-    (cout) = __w >> (GMP_LIMB_BITS-1);  \
+#define SUBC_LIMB(cout, w, x, y)                                       \
+  do {                                                                 \
+    mp_limb_t  __w = (x) - (y);                                                \
+    (w) = __w & GMP_NUMB_MASK;                                         \
+    (cout) = __w >> (GMP_LIMB_BITS-1);                                 \
    } while (0)
  #endif
  
@@ -2270,160 +2592,170 @@ __GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr,
     declaring their operand sizes, then remove the former.  This is purely
     for the benefit of assertion checking.  */
  
-#if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0      \
-  && GMP_LIMB_BITS == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
+#if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM)     \
+  && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \
+  && ! WANT_ASSERT
  /* Better flags handling than the generic C gives on i386, saving a few
     bytes of code and maybe a cycle or two.  */
  
  #define MPN_IORD_U(ptr, incr, aors)                                    \
    do {                                                                 \
      mp_ptr  __ptr_dummy;                                               \
-    if (__builtin_constant_p (incr) && (incr) == 1)                    \
+    if (__builtin_constant_p (incr) && (incr) == 0)                    \
        {                                                                        \
-        __asm__ __volatile__                                           \
-          ("\n" ASM_L(top) ":\n"                                       \
-           "\t" aors " $1, (%0)\n"                                     \
-           "\tleal 4(%0),%0\n"                                         \
-           "\tjc " ASM_L(top)                                          \
-           : "=r" (__ptr_dummy)                                                \
-           : "0"  (ptr)                                                        \
-           : "memory");                                                        \
+      }                                                                        \
+    else if (__builtin_constant_p (incr) && (incr) == 1)               \
+      {                                                                        \
+       __asm__ __volatile__                                            \
+         ("\n" ASM_L(top) ":\n"                                        \
+          "\t" aors "\t$1, (%0)\n"                                     \
+          "\tlea\t%c2(%0), %0\n"                                       \
+          "\tjc\t" ASM_L(top)                                          \
+          : "=r" (__ptr_dummy)                                         \
+          : "0"  (ptr), "n" (sizeof(mp_limb_t))                        \
+          : "memory");                                                 \
        }                                                                        \
      else                                                               \
        {                                                                        \
-        __asm__ __volatile__                                           \
-          (   aors  " %2,(%0)\n"                                       \
-           "\tjnc " ASM_L(done) "\n"                                   \
-           ASM_L(top) ":\n"                                            \
-           "\t" aors " $1,4(%0)\n"                                     \
-           "\tleal 4(%0),%0\n"                                         \
-           "\tjc " ASM_L(top) "\n"                                     \
-           ASM_L(done) ":\n"                                           \
-           : "=r" (__ptr_dummy)                                                \
-           : "0"  (ptr),                                               \
-             "ri" (incr)                                               \
-           : "memory");                                                        \
+       __asm__ __volatile__                                            \
+         (   aors  "\t%2, (%0)\n"                                      \
+          "\tjnc\t" ASM_L(done) "\n"                                   \
+          ASM_L(top) ":\n"                                             \
+          "\t" aors "\t$1, %c3(%0)\n"                                  \
+          "\tlea\t%c3(%0), %0\n"                                       \
+          "\tjc\t" ASM_L(top) "\n"                                     \
+          ASM_L(done) ":\n"                                            \
+          : "=r" (__ptr_dummy)                                         \
+          : "0"  (ptr),                                                \
+            "ri" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t))         \
+          : "memory");                                                 \
        }                                                                        \
    } while (0)
  
+#if GMP_LIMB_BITS == 32
  #define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
  #define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
+#endif
+#if GMP_LIMB_BITS == 64
+#define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addq")
+#define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subq")
+#endif
  #define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
  #define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
  #endif
  
  #if GMP_NAIL_BITS == 0
  #ifndef mpn_incr_u
-#define mpn_incr_u(p,incr)                              \
-  do {                                                  \
-    mp_limb_t __x;                                      \
-    mp_ptr __p = (p);                                   \
-    if (__builtin_constant_p (incr) && (incr) == 1)     \
-      {                                                 \
-        while (++(*(__p++)) == 0)                       \
-          ;                                             \
-      }                                                 \
-    else                                                \
-      {                                                 \
-        __x = *__p + (incr);                            \
-        *__p = __x;                                     \
-        if (__x < (incr))                               \
-          while (++(*(++__p)) == 0)                     \
-            ;                                           \
-      }                                                 \
+#define mpn_incr_u(p,incr)                                             \
+  do {                                                                 \
+    mp_limb_t __x;                                                     \
+    mp_ptr __p = (p);                                                  \
+    if (__builtin_constant_p (incr) && (incr) == 1)                    \
+      {                                                                        \
+       while (++(*(__p++)) == 0)                                       \
+         ;                                                             \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       __x = *__p + (incr);                                            \
+       *__p = __x;                                                     \
+       if (__x < (incr))                                               \
+         while (++(*(++__p)) == 0)                                     \
+           ;                                                           \
+      }                                                                        \
    } while (0)
  #endif
  #ifndef mpn_decr_u
-#define mpn_decr_u(p,incr)                              \
-  do {                                                  \
-    mp_limb_t __x;                                      \
-    mp_ptr __p = (p);                                   \
-    if (__builtin_constant_p (incr) && (incr) == 1)     \
-      {                                                 \
-        while ((*(__p++))-- == 0)                       \
-          ;                                             \
-      }                                                 \
-    else                                                \
-      {                                                 \
-        __x = *__p;                                     \
-        *__p = __x - (incr);                            \
-        if (__x < (incr))                               \
-          while ((*(++__p))-- == 0)                     \
-            ;                                           \
-      }                                                 \
+#define mpn_decr_u(p,incr)                                             \
+  do {                                                                 \
+    mp_limb_t __x;                                                     \
+    mp_ptr __p = (p);                                                  \
+    if (__builtin_constant_p (incr) && (incr) == 1)                    \
+      {                                                                        \
+       while ((*(__p++))-- == 0)                                       \
+         ;                                                             \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       __x = *__p;                                                     \
+       *__p = __x - (incr);                                            \
+       if (__x < (incr))                                               \
+         while ((*(++__p))-- == 0)                                     \
+           ;                                                           \
+      }                                                                        \
    } while (0)
  #endif
  #endif
  
  #if GMP_NAIL_BITS >= 1
  #ifndef mpn_incr_u
-#define mpn_incr_u(p,incr)                              \
-  do {                                                 \
-    mp_limb_t __x;                                     \
-    mp_ptr __p = (p);                                  \
-    if (__builtin_constant_p (incr) && (incr) == 1)    \
-      {                                                        \
-       do                                              \
-         {                                             \
-           __x = (*__p + 1) & GMP_NUMB_MASK;           \
-           *__p++ = __x;                               \
-         }                                             \
-       while (__x == 0);                               \
-      }                                                        \
-    else                                               \
-      {                                                        \
-       __x = (*__p + (incr));                          \
-       *__p++ = __x & GMP_NUMB_MASK;                   \
-       if (__x >> GMP_NUMB_BITS != 0)                  \
-         {                                             \
-           do                                          \
-             {                                         \
-               __x = (*__p + 1) & GMP_NUMB_MASK;       \
-               *__p++ = __x;                           \
-             }                                         \
-           while (__x == 0);                           \
-         }                                             \
-      }                                                        \
+#define mpn_incr_u(p,incr)                                             \
+  do {                                                                 \
+    mp_limb_t __x;                                                     \
+    mp_ptr __p = (p);                                                  \
+    if (__builtin_constant_p (incr) && (incr) == 1)                    \
+      {                                                                        \
+       do                                                              \
+         {                                                             \
+           __x = (*__p + 1) & GMP_NUMB_MASK;                           \
+           *__p++ = __x;                                               \
+         }                                                             \
+       while (__x == 0);                                               \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       __x = (*__p + (incr));                                          \
+       *__p++ = __x & GMP_NUMB_MASK;                                   \
+       if (__x >> GMP_NUMB_BITS != 0)                                  \
+         {                                                             \
+           do                                                          \
+             {                                                         \
+               __x = (*__p + 1) & GMP_NUMB_MASK;                       \
+               *__p++ = __x;                                           \
+             }                                                         \
+           while (__x == 0);                                           \
+         }                                                             \
+      }                                                                        \
    } while (0)
  #endif
  #ifndef mpn_decr_u
-#define mpn_decr_u(p,incr)                             \
-  do {                                                 \
-    mp_limb_t __x;                                     \
-    mp_ptr __p = (p);                                  \
-    if (__builtin_constant_p (incr) && (incr) == 1)    \
-      {                                                        \
-       do                                              \
-         {                                             \
-           __x = *__p;                                 \
-           *__p++ = (__x - 1) & GMP_NUMB_MASK;         \
-         }                                             \
-       while (__x == 0);                               \
-      }                                                        \
-    else                                               \
-      {                                                        \
-       __x = *__p - (incr);                            \
-       *__p++ = __x & GMP_NUMB_MASK;                   \
-       if (__x >> GMP_NUMB_BITS != 0)                  \
-         {                                             \
-           do                                          \
-             {                                         \
-               __x = *__p;                             \
-               *__p++ = (__x - 1) & GMP_NUMB_MASK;     \
-             }                                         \
-           while (__x == 0);                           \
-         }                                             \
-      }                                                        \
+#define mpn_decr_u(p,incr)                                             \
+  do {                                                                 \
+    mp_limb_t __x;                                                     \
+    mp_ptr __p = (p);                                                  \
+    if (__builtin_constant_p (incr) && (incr) == 1)                    \
+      {                                                                        \
+       do                                                              \
+         {                                                             \
+           __x = *__p;                                                 \
+           *__p++ = (__x - 1) & GMP_NUMB_MASK;                         \
+         }                                                             \
+       while (__x == 0);                                               \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       __x = *__p - (incr);                                            \
+       *__p++ = __x & GMP_NUMB_MASK;                                   \
+       if (__x >> GMP_NUMB_BITS != 0)                                  \
+         {                                                             \
+           do                                                          \
+             {                                                         \
+               __x = *__p;                                             \
+               *__p++ = (__x - 1) & GMP_NUMB_MASK;                     \
+             }                                                         \
+           while (__x == 0);                                           \
+         }                                                             \
+      }                                                                        \
    } while (0)
  #endif
  #endif
  
  #ifndef MPN_INCR_U
  #if WANT_ASSERT
-#define MPN_INCR_U(ptr, size, n)                        \
-  do {                                                  \
-    ASSERT ((size) >= 1);                               \
-    ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));     \
+#define MPN_INCR_U(ptr, size, n)                                       \
+  do {                                                                 \
+    ASSERT ((size) >= 1);                                              \
+    ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));                    \
    } while (0)
  #else
  #define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
@@ -2432,10 +2764,10 @@ __GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr,
  
  #ifndef MPN_DECR_U
  #if WANT_ASSERT
-#define MPN_DECR_U(ptr, size, n)                        \
-  do {                                                  \
-    ASSERT ((size) >= 1);                               \
-    ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));     \
+#define MPN_DECR_U(ptr, size, n)                                       \
+  do {                                                                 \
+    ASSERT ((size) >= 1);                                              \
+    ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));                    \
    } while (0)
  #else
  #define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
@@ -2443,8 +2775,7 @@ __GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr,
  #endif
  
  
-/* Structure for conversion between internal binary format and
-   strings in base 2..36.  */
+/* Structure for conversion between internal binary format and strings.  */
  struct bases
  {
    /* Number of digits in the conversion base that always fits in an mp_limb_t.
@@ -2453,7 +2784,10 @@ struct bases
    int chars_per_limb;
  
    /* log(2)/log(conversion_base) */
-  double chars_per_bit_exactly;
+  mp_limb_t logb2;
+
+  /* log(conversion_base)/log(2) */
+  mp_limb_t log2b;
  
    /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
       factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
@@ -2470,6 +2804,25 @@ struct bases
  __GMP_DECLSPEC extern const struct bases mp_bases[257];
  
  
+/* Compute the number of digits in base for nbits bits, making sure the result
+   is never too small.  The two variants of the macro implement the same
+   function; the GT2 variant below works just for bases > 2.  */
+#define DIGITS_IN_BASE_FROM_BITS(res, nbits, b)                                \
+  do {                                                                 \
+    mp_limb_t _ph, _dummy;                                             \
+    size_t _nbits = (nbits);                                           \
+    umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits);                        \
+    _ph += (_dummy + _nbits < _dummy);                                 \
+    res = _ph + 1;                                                     \
+  } while (0)
+#define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b)                     \
+  do {                                                                 \
+    mp_limb_t _ph, _dummy;                                             \
+    size_t _nbits = (nbits);                                           \
+    umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits);            \
+    res = _ph + 1;                                                     \
+  } while (0)
+
  /* For power of 2 bases this is exact.  For other bases the result is either
     exact or one too big.
  
@@ -2479,55 +2832,48 @@ __GMP_DECLSPEC extern const struct bases mp_bases[257];
     limbs to increase the probability of being exact, but that doesn't seem
     worth bothering with.  */
  
-#define MPN_SIZEINBASE(result, ptr, size, base)                         \
-  do {                                                                  \
-    int       __lb_base, __cnt;                                         \
-    size_t __totbits;                                                   \
-                                                                        \
-    ASSERT ((size) >= 0);                                               \
-    ASSERT ((base) >= 2);                                               \
-    ASSERT ((base) < numberof (mp_bases));                              \
-                                                                        \
-    /* Special case for X == 0.  */                                     \
-    if ((size) == 0)                                                    \
-      (result) = 1;                                                     \
-    else                                                                \
-      {                                                                 \
-        /* Calculate the total number of significant bits of X.  */     \
-        count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
-        __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
-                                                                        \
-        if (POW2_P (base))                                              \
-          {                                                             \
-            __lb_base = mp_bases[base].big_base;                        \
-            (result) = (__totbits + __lb_base - 1) / __lb_base;         \
-          }                                                             \
-        else                                                            \
-          (result) = (size_t)                                           \
-            (__totbits * mp_bases[base].chars_per_bit_exactly) + 1;     \
-      }                                                                 \
+#define MPN_SIZEINBASE(result, ptr, size, base)                                \
+  do {                                                                 \
+    int           __lb_base, __cnt;                                            \
+    size_t __totbits;                                                  \
+                                                                       \
+    ASSERT ((size) >= 0);                                              \
+    ASSERT ((base) >= 2);                                              \
+    ASSERT ((base) < numberof (mp_bases));                             \
+                                                                       \
+    /* Special case for X == 0.  */                                    \
+    if ((size) == 0)                                                   \
+      (result) = 1;                                                    \
+    else                                                               \
+      {                                                                        \
+       /* Calculate the total number of significant bits of X.  */     \
+       count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
+       __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
+                                                                       \
+       if (POW2_P (base))                                              \
+         {                                                             \
+           __lb_base = mp_bases[base].big_base;                        \
+           (result) = (__totbits + __lb_base - 1) / __lb_base;         \
+         }                                                             \
+       else                                                            \
+         {                                                             \
+           DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base);      \
+         }                                                             \
+      }                                                                        \
    } while (0)
  
-/* eliminate mp_bases lookups for base==16 */
-#define MPN_SIZEINBASE_16(result, ptr, size)                            \
-  do {                                                                  \
-    int       __cnt;                                                    \
-    mp_size_t __totbits;                                                \
-                                                                        \
-    ASSERT ((size) >= 0);                                               \
-                                                                        \
-    /* Special case for X == 0.  */                                     \
-    if ((size) == 0)                                                    \
-      (result) = 1;                                                     \
-    else                                                                \
-      {                                                                 \
-        /* Calculate the total number of significant bits of X.  */     \
-        count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
-        __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
-        (result) = (__totbits + 4 - 1) / 4;                             \
-      }                                                                 \
+#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp)                       \
+  do {                                                                         \
+    int          __cnt;                                                                \
+    mp_bitcnt_t  __totbits;                                                    \
+    ASSERT ((size) > 0);                                                       \
+    ASSERT ((ptr)[(size)-1] != 0);                                             \
+    count_leading_zeros (__cnt, (ptr)[(size)-1]);                              \
+    __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);        \
+    (result) = (__totbits + (base2exp)-1) / (base2exp);                                \
    } while (0)
  
+
  /* bit count to limb count, rounding up */
  #define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
  
@@ -2537,27 +2883,27 @@ __GMP_DECLSPEC extern const struct bases mp_bases[257];
  #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
  
  #define LIMBS_PER_ULONG 1
-#define MPN_SET_UI(zp, zn, u)   \
-  (zp)[0] = (u);                \
+#define MPN_SET_UI(zp, zn, u)                                          \
+  (zp)[0] = (u);                                                       \
    (zn) = ((zp)[0] != 0);
-#define MPZ_FAKE_UI(z, zp, u)   \
-  (zp)[0] = (u);                \
-  PTR (z) = (zp);               \
-  SIZ (z) = ((zp)[0] != 0);     \
+#define MPZ_FAKE_UI(z, zp, u)                                          \
+  (zp)[0] = (u);                                                       \
+  PTR (z) = (zp);                                                      \
+  SIZ (z) = ((zp)[0] != 0);                                            \
    ASSERT_CODE (ALLOC (z) = 1);
  
  #else /* need two limbs per ulong */
  
  #define LIMBS_PER_ULONG 2
-#define MPN_SET_UI(zp, zn, u)                          \
-  (zp)[0] = (u) & GMP_NUMB_MASK;                       \
-  (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
+#define MPN_SET_UI(zp, zn, u)                                          \
+  (zp)[0] = (u) & GMP_NUMB_MASK;                                       \
+  (zp)[1] = (u) >> GMP_NUMB_BITS;                                      \
    (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
-#define MPZ_FAKE_UI(z, zp, u)                          \
-  (zp)[0] = (u) & GMP_NUMB_MASK;                       \
-  (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
-  SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
-  PTR (z) = (zp);                                      \
+#define MPZ_FAKE_UI(z, zp, u)                                          \
+  (zp)[0] = (u) & GMP_NUMB_MASK;                                       \
+  (zp)[1] = (u) >> GMP_NUMB_BITS;                                      \
+  SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);                 \
+  PTR (z) = (zp);                                                      \
    ASSERT_CODE (ALLOC (z) = 2);
  
  #endif
@@ -2583,193 +2929,132 @@ __GMP_DECLSPEC extern const struct bases mp_bases[257];
     shift on past versions too (in particular since an important use of
     LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */
  
-#define LIMB_HIGHBIT_TO_MASK(n)                                 \
-  (((mp_limb_signed_t) -1 >> 1) < 0                             \
-   ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)              \
+#define LIMB_HIGHBIT_TO_MASK(n)                                                \
+  (((mp_limb_signed_t) -1 >> 1) < 0                                    \
+   ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)                     \
     : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
  
  
  /* Use a library function for invert_limb, if available. */
-#define   mpn_invert_limb __MPN(invert_limb)
-__GMP_DECLSPEC mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
+#define  mpn_invert_limb __MPN(invert_limb)
+__GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST;
  #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
-#define invert_limb(invxl,xl)           \
-  do {                                  \
-    (invxl) = mpn_invert_limb (xl);     \
+#define invert_limb(invxl,xl)                                          \
+  do {                                                                 \
+    (invxl) = mpn_invert_limb (xl);                                    \
    } while (0)
  #endif
  
  #ifndef invert_limb
-#define invert_limb(invxl,xl)                   \
-  do {                                          \
-    mp_limb_t dummy;                            \
-    ASSERT ((xl) != 0);                         \
-    udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl);  \
-  } while (0)
-#endif
-
-#define invert_pi1(dinv, d1, d0)                               \
-  do {                                                         \
-    mp_limb_t v, p, t1, t0, mask;                              \
-    invert_limb (v, d1);                                       \
-    p = d1 * v;                                                        \
-    p += d0;                                                   \
-    if (p < d0)                                                        \
-      {                                                                \
-       v--;                                                    \
-       mask = -(p >= d1);                                      \
-       p -= d1;                                                \
-       v += mask;                                              \
-       p -= mask & d1;                                         \
-      }                                                                \
-    umul_ppmm (t1, t0, d0, v);                                 \
-    p += t1;                                                   \
-    if (p < t1)                                                        \
-      {                                                                \
-        v--;                                                   \
-       if (UNLIKELY (p >= d1))                                 \
-         {                                                     \
-           if (p > d1 || t0 >= d0)                             \
-             v--;                                              \
-         }                                                     \
-      }                                                                \
-    (dinv).inv32 = v;                                          \
+#define invert_limb(invxl,xl)                                          \
+  do {                                                                 \
+    mp_limb_t _dummy;                                                  \
+    ASSERT ((xl) != 0);                                                        \
+    udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl);              \
    } while (0)
-
-
-#ifndef udiv_qrnnd_preinv
-#define udiv_qrnnd_preinv udiv_qrnnd_preinv3
  #endif
  
-/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
-   limb not larger than (2**(2*GMP_LIMB_BITS))/D - (2**GMP_LIMB_BITS).
-   If this would yield overflow, DI should be the largest possible number
-   (i.e., only ones).  For correct operation, the most significant bit of D
-   has to be set.  Put the quotient in Q and the remainder in R.  */
-#define udiv_qrnnd_preinv1(q, r, nh, nl, d, di)                                \
+#define invert_pi1(dinv, d1, d0)                                       \
    do {                                                                 \
-    mp_limb_t _q, _ql, _r;                                             \
-    mp_limb_t _xh, _xl;                                                        \
-    ASSERT ((d) != 0);                                                 \
-    umul_ppmm (_q, _ql, (nh), (di));                                   \
-    _q += (nh);        /* Compensate, di is 2**GMP_LIMB_BITS too small */      \
-    umul_ppmm (_xh, _xl, _q, (d));                                     \
-    sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);                                \
-    if (_xh != 0)                                                      \
+    mp_limb_t _v, _p, _t1, _t0, _mask;                                 \
+    invert_limb (_v, d1);                                              \
+    _p = (d1) * _v;                                                    \
+    _p += (d0);                                                                \
+    if (_p < (d0))                                                     \
        {                                                                        \
-       sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                          \
-       _q += 1;                                                        \
-       if (_xh != 0)                                                   \
-         {                                                             \
-           _r -= (d);                                                  \
-           _q += 1;                                                    \
-         }                                                             \
+       _v--;                                                           \
+       _mask = -(mp_limb_t) (_p >= (d1));                              \
+       _p -= (d1);                                                     \
+       _v += _mask;                                                    \
+       _p -= _mask & (d1);                                             \
        }                                                                        \
-    if (_r >= (d))                                                     \
+    umul_ppmm (_t1, _t0, d0, _v);                                      \
+    _p += _t1;                                                         \
+    if (_p < _t1)                                                      \
        {                                                                        \
-       _r -= (d);                                                      \
-       _q += 1;                                                        \
+       _v--;                                                           \
+       if (UNLIKELY (_p >= (d1)))                                      \
+         {                                                             \
+           if (_p > (d1) || _t0 >= (d0))                               \
+             _v--;                                                     \
+         }                                                             \
        }                                                                        \
-    (r) = _r;                                                          \
-    (q) = _q;                                                          \
-  } while (0)
-
-/* Like udiv_qrnnd_preinv, but branch-free. */
-#define udiv_qrnnd_preinv2(q, r, nh, nl, d, di)                                \
-  do {                                                                 \
-    mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                           \
-    mp_limb_t _xh, _xl;                                                        \
-    _n2 = (nh);                                                                \
-    _n10 = (nl);                                                       \
-    _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                              \
-    _nadj = _n10 + (_nmask & (d));                                     \
-    umul_ppmm (_xh, _xl, di, _n2 - _nmask);                            \
-    add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                       \
-    _q1 = ~_xh;                                                                \
-    umul_ppmm (_xh, _xl, _q1, d);                                      \
-    add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                           \
-    _xh -= (d);                                        /* xh = 0 or -1 */      \
-    (r) = _xl + ((d) & _xh);                                           \
-    (q) = _xh - _q1;                                                   \
-  } while (0)
-
-/* Like udiv_qrnnd_preinv2, but for for any value D.  DNORM is D shifted left
-   so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
-#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
-  do {                                                                 \
-    mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                           \
-    mp_limb_t _xh, _xl;                                                        \
-    _n2 = ((nh) << (GMP_LIMB_BITS - (lgup))) + ((nl) >> 1 >> (l - 1)); \
-    _n10 = (nl) << (GMP_LIMB_BITS - (lgup));                           \
-    _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                              \
-    _nadj = _n10 + (_nmask & (dnorm));                                 \
-    umul_ppmm (_xh, _xl, di, _n2 - _nmask);                            \
-    add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                       \
-    _q1 = ~_xh;                                                                \
-    umul_ppmm (_xh, _xl, _q1, d);                                      \
-    add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                           \
-    _xh -= (d);                                                                \
-    (r) = _xl + ((d) & _xh);                                           \
-    (q) = _xh - _q1;                                                   \
+    (dinv).inv32 = _v;                                                 \
    } while (0)
  
-/* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
  
+/* udiv_qrnnd_preinv -- Based on work by Niels Möller and Torbjörn Granlund.
     We write things strangely below, to help gcc.  A more straightforward
     version:
-
-   _r = (nl) - _qh * (d);
-   _t = _r + (d);
-   if (_r >= _ql)
-     {
-       _qh--;
-       _r = _t;
-     }
-
+       _r = (nl) - _qh * (d);
+       _t = _r + (d);
+       if (_r >= _ql)
+         {
+           _qh--;
+           _r = _t;
+         }
     For one operation shorter critical path, one may want to use this form:
-
-   _p = _qh * (d)
-   _s = (nl) + (d);
-   _r = (nl) - _p;
-   _t = _s - _p;
-   if (_r >= _ql)
-     {
-       _qh--;
-       _r = _t;
-     }
+       _p = _qh * (d)
+       _s = (nl) + (d);
+       _r = (nl) - _p;
+       _t = _s - _p;
+       if (_r >= _ql)
+         {
+           _qh--;
+           _r = _t;
+         }
  */
-#define udiv_qrnnd_preinv3(q, r, nh, nl, d, di)                                \
+#define udiv_qrnnd_preinv(q, r, nh, nl, d, di)                         \
    do {                                                                 \
-    mp_limb_t _qh, _ql, _r;                                            \
+    mp_limb_t _qh, _ql, _r, _mask;                                     \
      umul_ppmm (_qh, _ql, (nh), (di));                                  \
      if (__builtin_constant_p (nl) && (nl) == 0)                                \
-      _qh += (nh) + 1;                                                 \
-    else                                                               \
-      add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));                 \
-    _r = (nl) - _qh * (d);                                             \
-    if (_r > _ql)      /* both > and >= should be OK */                \
        {                                                                        \
-       _r += (d);                                                      \
-       _qh--;                                                          \
+       _qh += (nh) + 1;                                                \
+       _r = - _qh * (d);                                               \
+       _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */     \
+       _qh += _mask;                                                   \
+       _r += _mask & (d);                                              \
        }                                                                        \
-    if (UNLIKELY (_r >= (d)))                                          \
+    else                                                               \
        {                                                                        \
-       _r -= (d);                                                      \
-       _qh++;                                                          \
+       add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));                \
+       _r = (nl) - _qh * (d);                                          \
+       _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */     \
+       _qh += _mask;                                                   \
+       _r += _mask & (d);                                              \
+       if (UNLIKELY (_r >= (d)))                                       \
+         {                                                             \
+           _r -= (d);                                                  \
+           _qh++;                                                      \
+         }                                                             \
        }                                                                        \
      (r) = _r;                                                          \
      (q) = _qh;                                                         \
    } while (0)
  
-/* Compute r = nh*B mod d, where di is the inverse of d.  */
-#define udiv_rnd_preinv(r, nh, d, di)                                  \
+/* Dividing (NH, NL) by D, returning the remainder only. Unlike
+   udiv_qrnnd_preinv, works also for the case NH == D, where the
+   quotient doesn't quite fit in a single limb. */
+#define udiv_rnnd_preinv(r, nh, nl, d, di)                             \
    do {                                                                 \
-    mp_limb_t _qh, _ql, _r;                                            \
+    mp_limb_t _qh, _ql, _r, _mask;                                     \
      umul_ppmm (_qh, _ql, (nh), (di));                                  \
-    _qh += (nh) + 1;                                                   \
-    _r = - _qh * (d);                                                  \
-    if (_r > _ql)                                                      \
-      _r += (d);                                                       \
+    if (__builtin_constant_p (nl) && (nl) == 0)                                \
+      {                                                                        \
+       _r = ~(_qh + (nh)) * (d);                                       \
+       _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */     \
+       _r += _mask & (d);                                              \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));                \
+       _r = (nl) - _qh * (d);                                          \
+       _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */     \
+       _r += _mask & (d);                                              \
+       if (UNLIKELY (_r >= (d)))                                       \
+         _r -= (d);                                                    \
+      }                                                                        \
      (r) = _r;                                                          \
    } while (0)
  
@@ -2789,8 +3074,7 @@ __GMP_DECLSPEC mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CON
                                                                         \
      /* Compute the two most significant limbs of n - q'd */            \
      (r1) = (n1) - (d1) * (q);                                          \
-    (r0) = (n0);                                                       \
-    sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));                   \
+    sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0));                   \
      umul_ppmm (_t1, _t0, (d0), (q));                                   \
      sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);                     \
      (q)++;                                                             \
@@ -2811,7 +3095,7 @@ __GMP_DECLSPEC mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CON
  
  #ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
  #define   mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
-__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
+__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
  #endif
  
  
@@ -2836,15 +3120,15 @@ __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp
  
  /* This selection may seem backwards.  The reason mpn_mod_1 typically takes
     over for larger sizes is that it uses the mod_1_1 function.  */
-#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)              \
+#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)              \
    (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD)             \
     ? mpn_preinv_mod_1 (src, size, divisor, inverse)                    \
     : mpn_mod_1 (src, size, divisor))
  
  
  #ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
-#define   mpn_mod_34lsub1 __MPN(mod_34lsub1)
-__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+#define mpn_mod_34lsub1 __MPN(mod_34lsub1)
+__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
  #endif
  
  
@@ -2862,28 +3146,28 @@ __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __
  
  #ifndef mpn_divexact_1  /* if not done with cpuvec in a fat binary */
  #define mpn_divexact_1 __MPN(divexact_1)
-__GMP_DECLSPEC void    mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
-#endif
-
-#define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor)                     \
-  do {                                                                        \
-    if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD))                         \
-      ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
-    else                                                                      \
-      {                                                                       \
-        ASSERT (mpn_mod_1 (src, size, divisor) == 0);                         \
-        mpn_divexact_1 (dst, src, size, divisor);                             \
-      }                                                                       \
+__GMP_DECLSPEC void    mpn_divexact_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+
+#define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d)                         \
+  do {                                                                 \
+    if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD))                     \
+      ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d));     \
+    else                                                               \
+      {                                                                        \
+       ASSERT (mpn_mod_1 (up, n, d) == 0);                             \
+       mpn_divexact_1 (rp, up, n, d);                                  \
+      }                                                                        \
    } while (0)
  
  #ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */
-#define   mpn_modexact_1c_odd __MPN(modexact_1c_odd)
-__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+#define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
+__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
  #endif
  
  #if HAVE_NATIVE_mpn_modexact_1_odd
  #define   mpn_modexact_1_odd  __MPN(modexact_1_odd)
-__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
  #else
  #define mpn_modexact_1_odd(src,size,divisor) \
    mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
@@ -2968,17 +3252,17 @@ __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
                                                                         \
      if ((a) <= (d))                                                    \
        {                                                                        \
-        /* small a is reasonably likely */                             \
-        (r) = (d) - (a);                                               \
+       /* small a is reasonably likely */                              \
+       (r) = (d) - (a);                                                \
        }                                                                        \
      else                                                               \
        {                                                                        \
-        unsigned   __twos;                                             \
-        mp_limb_t  __dnorm;                                            \
-        count_leading_zeros (__twos, d);                               \
-        __twos -= GMP_NAIL_BITS;                                       \
-        __dnorm = (d) << __twos;                                       \
-        (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);            \
+       unsigned   __twos;                                              \
+       mp_limb_t  __dnorm;                                             \
+       count_leading_zeros (__twos, d);                                \
+       __twos -= GMP_NAIL_BITS;                                        \
+       __dnorm = (d) << __twos;                                        \
+       (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);             \
        }                                                                        \
                                                                         \
      ASSERT_LIMB (r);                                                   \
@@ -3051,8 +3335,8 @@ __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
      int  __p = 0;                                                      \
      do                                                                 \
        {                                                                        \
-        __p ^= 0x96696996L >> (__n & 0x1F);                            \
-        __n >>= 5;                                                     \
+       __p ^= 0x96696996L >> (__n & 0x1F);                             \
+       __n >>= 5;                                                      \
        }                                                                        \
      while (__n != 0);                                                  \
                                                                         \
@@ -3125,64 +3409,64 @@ __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
  
  #if ! defined (BSWAP_LIMB)
  #if GMP_LIMB_BITS == 8
-#define BSWAP_LIMB(dst, src)            \
+#define BSWAP_LIMB(dst, src)                           \
    do { (dst) = (src); } while (0)
  #endif
  #if GMP_LIMB_BITS == 16
-#define BSWAP_LIMB(dst, src)                    \
-  do {                                          \
-    (dst) = ((src) << 8) + ((src) >> 8);        \
+#define BSWAP_LIMB(dst, src)                                           \
+  do {                                                                 \
+    (dst) = ((src) << 8) + ((src) >> 8);                               \
    } while (0)
  #endif
  #if GMP_LIMB_BITS == 32
-#define BSWAP_LIMB(dst, src)    \
-  do {                          \
-    (dst) =                     \
-      ((src) << 24)             \
-      + (((src) & 0xFF00) << 8) \
-      + (((src) >> 8) & 0xFF00) \
-      + ((src) >> 24);          \
+#define BSWAP_LIMB(dst, src)                                           \
+  do {                                                                 \
+    (dst) =                                                            \
+      ((src) << 24)                                                    \
+      + (((src) & 0xFF00) << 8)                                                \
+      + (((src) >> 8) & 0xFF00)                                                \
+      + ((src) >> 24);                                                 \
    } while (0)
  #endif
  #if GMP_LIMB_BITS == 64
-#define BSWAP_LIMB(dst, src)            \
-  do {                                  \
-    (dst) =                             \
-      ((src) << 56)                     \
-      + (((src) & 0xFF00) << 40)        \
-      + (((src) & 0xFF0000) << 24)      \
-      + (((src) & 0xFF000000) << 8)     \
-      + (((src) >> 8) & 0xFF000000)     \
-      + (((src) >> 24) & 0xFF0000)      \
-      + (((src) >> 40) & 0xFF00)        \
-      + ((src) >> 56);                  \
+#define BSWAP_LIMB(dst, src)                                           \
+  do {                                                                 \
+    (dst) =                                                            \
+      ((src) << 56)                                                    \
+      + (((src) & 0xFF00) << 40)                                       \
+      + (((src) & 0xFF0000) << 24)                                     \
+      + (((src) & 0xFF000000) << 8)                                    \
+      + (((src) >> 8) & 0xFF000000)                                    \
+      + (((src) >> 24) & 0xFF0000)                                     \
+      + (((src) >> 40) & 0xFF00)                                       \
+      + ((src) >> 56);                                                 \
    } while (0)
  #endif
  #endif
  
  #if ! defined (BSWAP_LIMB)
-#define BSWAP_LIMB(dst, src)                            \
-  do {                                                  \
-    mp_limb_t  __bswapl_src = (src);                    \
-    mp_limb_t  __dst = 0;                               \
-    int        __i;                                     \
-    for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)       \
-      {                                                 \
-        __dst = (__dst << 8) | (__bswapl_src & 0xFF);   \
-        __bswapl_src >>= 8;                             \
-      }                                                 \
-    (dst) = __dst;                                      \
+#define BSWAP_LIMB(dst, src)                                           \
+  do {                                                                 \
+    mp_limb_t  __bswapl_src = (src);                                   \
+    mp_limb_t  __dstl = 0;                                             \
+    int               __i;                                                     \
+    for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)                      \
+      {                                                                        \
+       __dstl = (__dstl << 8) | (__bswapl_src & 0xFF);                 \
+       __bswapl_src >>= 8;                                             \
+      }                                                                        \
+    (dst) = __dstl;                                                    \
    } while (0)
  #endif
  
  
  /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
     those we know are fast.  */
-#if defined (__GNUC__) && ! defined (NO_ASM)                            \
-  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN                        \
-  && (HAVE_HOST_CPU_powerpc604                                          \
-      || HAVE_HOST_CPU_powerpc604e                                      \
-      || HAVE_HOST_CPU_powerpc750                                       \
+#if defined (__GNUC__) && ! defined (NO_ASM)                           \
+  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN                       \
+  && (HAVE_HOST_CPU_powerpc604                                         \
+      || HAVE_HOST_CPU_powerpc604e                                     \
+      || HAVE_HOST_CPU_powerpc750                                      \
        || HAVE_HOST_CPU_powerpc7400)
  #define BSWAP_LIMB_FETCH(limb, src)                                    \
    do {                                                                 \
@@ -3203,11 +3487,11 @@ __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
  
  /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
     know are fast.  FIXME: Is this necessary?  */
-#if defined (__GNUC__) && ! defined (NO_ASM)                            \
-  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN                        \
-  && (HAVE_HOST_CPU_powerpc604                                          \
-      || HAVE_HOST_CPU_powerpc604e                                      \
-      || HAVE_HOST_CPU_powerpc750                                       \
+#if defined (__GNUC__) && ! defined (NO_ASM)                           \
+  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN                       \
+  && (HAVE_HOST_CPU_powerpc604                                         \
+      || HAVE_HOST_CPU_powerpc604e                                     \
+      || HAVE_HOST_CPU_powerpc750                                      \
        || HAVE_HOST_CPU_powerpc7400)
  #define BSWAP_LIMB_STORE(dst, limb)                                    \
    do {                                                                 \
@@ -3226,39 +3510,39 @@ __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
  
  
  /* Byte swap limbs from {src,size} and store at {dst,size}. */
-#define MPN_BSWAP(dst, src, size)                       \
-  do {                                                  \
-    mp_ptr     __dst = (dst);                           \
-    mp_srcptr  __src = (src);                           \
-    mp_size_t  __size = (size);                         \
-    mp_size_t  __i;                                     \
-    ASSERT ((size) >= 0);                               \
-    ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));   \
-    CRAY_Pragma ("_CRI ivdep");                         \
-    for (__i = 0; __i < __size; __i++)                  \
-      {                                                 \
-        BSWAP_LIMB_FETCH (*__dst, __src);               \
-        __dst++;                                        \
-        __src++;                                        \
-      }                                                 \
+#define MPN_BSWAP(dst, src, size)                                      \
+  do {                                                                 \
+    mp_ptr     __dst = (dst);                                          \
+    mp_srcptr  __src = (src);                                          \
+    mp_size_t  __size = (size);                                                \
+    mp_size_t  __i;                                                    \
+    ASSERT ((size) >= 0);                                              \
+    ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));                  \
+    CRAY_Pragma ("_CRI ivdep");                                                \
+    for (__i = 0; __i < __size; __i++)                                 \
+      {                                                                        \
+       BSWAP_LIMB_FETCH (*__dst, __src);                               \
+       __dst++;                                                        \
+       __src++;                                                        \
+      }                                                                        \
    } while (0)
  
  /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
-#define MPN_BSWAP_REVERSE(dst, src, size)               \
-  do {                                                  \
-    mp_ptr     __dst = (dst);                           \
-    mp_size_t  __size = (size);                         \
-    mp_srcptr  __src = (src) + __size - 1;              \
-    mp_size_t  __i;                                     \
-    ASSERT ((size) >= 0);                               \
-    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
-    CRAY_Pragma ("_CRI ivdep");                         \
-    for (__i = 0; __i < __size; __i++)                  \
-      {                                                 \
-        BSWAP_LIMB_FETCH (*__dst, __src);               \
-        __dst++;                                        \
-        __src--;                                        \
-      }                                                 \
+#define MPN_BSWAP_REVERSE(dst, src, size)                              \
+  do {                                                                 \
+    mp_ptr     __dst = (dst);                                          \
+    mp_size_t  __size = (size);                                                \
+    mp_srcptr  __src = (src) + __size - 1;                             \
+    mp_size_t  __i;                                                    \
+    ASSERT ((size) >= 0);                                              \
+    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));                   \
+    CRAY_Pragma ("_CRI ivdep");                                                \
+    for (__i = 0; __i < __size; __i++)                                 \
+      {                                                                        \
+       BSWAP_LIMB_FETCH (*__dst, __src);                               \
+       __dst++;                                                        \
+       __src--;                                                        \
+      }                                                                        \
    } while (0)
  
  
@@ -3283,9 +3567,9 @@ __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
  
  /* Cray intrinsic. */
  #ifdef _CRAY
-#define popc_limb(result, input)        \
-  do {                                  \
-    (result) = _popcnt (input);         \
+#define popc_limb(result, input)                                       \
+  do {                                                                 \
+    (result) = _popcnt (input);                                                \
    } while (0)
  #endif
  
@@ -3312,8 +3596,8 @@ __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
      mp_limb_t  __x = (input);                                          \
      __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                               \
      __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);    \
-    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                       \
-    (result) = __x & 0xff;                                             \
+    __x = ((__x >> 4) + __x);                                          \
+    (result) = __x & 0x0f;                                             \
    } while (0)
  #endif
  
@@ -3438,6 +3722,21 @@ union ieee_double_extract
  };
  #endif
  
+#if HAVE_DOUBLE_VAX_D
+union double_extract
+{
+  struct
+    {
+      gmp_uint_least32_t man3:7;       /* highest 7 bits */
+      gmp_uint_least32_t exp:8;                /* excess-128 exponent */
+      gmp_uint_least32_t sig:1;
+      gmp_uint_least32_t man2:16;
+      gmp_uint_least32_t man1:16;
+      gmp_uint_least32_t man0:16;      /* lowest 16 bits */
+    } s;
+  double d;
+};
+#endif
  
  /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
     that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
@@ -3446,10 +3745,10 @@ union ieee_double_extract
     We assume doubles have 53 mantissa bits.  */
  #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
  
-__GMP_DECLSPEC int __gmp_extract_double __GMP_PROTO ((mp_ptr, double));
+__GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double);
  
  #define mpn_get_d __gmpn_get_d
-__GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE;
  
  
  /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
@@ -3457,17 +3756,17 @@ __GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, l
     branch prediction.  */
  
  #if _GMP_IEEE_FLOATS
-#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
-  do {                                          \
-    union ieee_double_extract  u;               \
-    u.d = (x);                                  \
-    if (UNLIKELY (u.s.exp == 0x7FF))            \
-      {                                         \
-        if (u.s.manl == 0 && u.s.manh == 0)     \
-          { a_inf; }                            \
-        else                                    \
-          { a_nan; }                            \
-      }                                         \
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)                         \
+  do {                                                                 \
+    union ieee_double_extract  u;                                      \
+    u.d = (x);                                                         \
+    if (UNLIKELY (u.s.exp == 0x7FF))                                   \
+      {                                                                        \
+       if (u.s.manl == 0 && u.s.manh == 0)                             \
+         { a_inf; }                                                    \
+       else                                                            \
+         { a_nan; }                                                    \
+      }                                                                        \
    } while (0)
  #endif
  
@@ -3481,14 +3780,14 @@ __GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, l
  /* Unknown format, try something generic.
     NaN should be "unordered", so x!=x.
     Inf should be bigger than DBL_MAX.  */
-#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)                  \
-  do {                                                          \
-    {                                                           \
-      if (UNLIKELY ((x) != (x)))                                \
-        { a_nan; }                                              \
-      else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))      \
-        { a_inf; }                                              \
-    }                                                           \
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)                         \
+  do {                                                                 \
+    {                                                                  \
+      if (UNLIKELY ((x) != (x)))                                       \
+       { a_nan; }                                                      \
+      else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))             \
+       { a_inf; }                                                      \
+    }                                                                  \
    } while (0)
  #endif
  
@@ -3518,28 +3817,22 @@ __GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, l
  #endif
  
  
+__GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[];
+
  __GMP_DECLSPEC extern int __gmp_junk;
  __GMP_DECLSPEC extern const int __gmp_0;
-__GMP_DECLSPEC void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN;
-__GMP_DECLSPEC void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
-__GMP_DECLSPEC void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
-__GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN;
  #define GMP_ERROR(code)   __gmp_exception (code)
  #define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
  #define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
  
  #if defined _LONG_LONG_LIMB
-#if __GMP_HAVE_TOKEN_PASTE
  #define CNST_LIMB(C) ((mp_limb_t) C##LL)
-#else
-#define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
-#endif
  #else /* not _LONG_LONG_LIMB */
-#if __GMP_HAVE_TOKEN_PASTE
  #define CNST_LIMB(C) ((mp_limb_t) C##L)
-#else
-#define CNST_LIMB(C) ((mp_limb_t) C/**/L)
-#endif
  #endif /* _LONG_LONG_LIMB */
  
  /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
@@ -3573,8 +3866,6 @@ __GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORET
  #define PP_FIRST_OMITTED 3
  #endif
  
-
-
  /* BIT1 means a result value in bit 1 (second least significant bit), with a
     zero bit representing +1 and a one bit representing -1.  Bits other than
     bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
@@ -3592,6 +3883,9 @@ __GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORET
  /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
  #define JACOBI_U0(a)   ((a) == 1)
  
+/* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and
+   come up with a better name. */
+
  /* (a/0), with a given by low and size;
     is 1 if a=+/-1, 0 otherwise */
  #define JACOBI_LS0(alow,asize) \
@@ -3666,22 +3960,22 @@ __GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORET
     decrementing b_size.  b_low should be b_ptr[0] on entry, and will be
     updated for the new b_ptr.  result_bit1 is updated according to the
     factors of 2 stripped, as per (a/2).  */
-#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)    \
-  do {                                                                  \
-    ASSERT ((b_size) >= 1);                                             \
-    ASSERT ((b_low) == (b_ptr)[0]);                                     \
-                                                                        \
-    while (UNLIKELY ((b_low) == 0))                                     \
-      {                                                                 \
-        (b_size)--;                                                     \
-        ASSERT ((b_size) >= 1);                                         \
-        (b_ptr)++;                                                      \
-        (b_low) = *(b_ptr);                                             \
-                                                                        \
-        ASSERT (((a) & 1) != 0);                                        \
-        if ((GMP_NUMB_BITS % 2) == 1)                                   \
-          (result_bit1) ^= JACOBI_TWO_U_BIT1(a);                        \
-      }                                                                 \
+#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)   \
+  do {                                                                 \
+    ASSERT ((b_size) >= 1);                                            \
+    ASSERT ((b_low) == (b_ptr)[0]);                                    \
+                                                                       \
+    while (UNLIKELY ((b_low) == 0))                                    \
+      {                                                                        \
+       (b_size)--;                                                     \
+       ASSERT ((b_size) >= 1);                                         \
+       (b_ptr)++;                                                      \
+       (b_low) = *(b_ptr);                                             \
+                                                                       \
+       ASSERT (((a) & 1) != 0);                                        \
+       if ((GMP_NUMB_BITS % 2) == 1)                                   \
+         (result_bit1) ^= JACOBI_TWO_U_BIT1(a);                        \
+      }                                                                        \
    } while (0)
  
  /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
@@ -3699,33 +3993,97 @@ __GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORET
     or not skip a divide step, or something. */
  
  #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
-  do {                                                                     \
-    mp_srcptr  __a_ptr  = (a_ptr);                                         \
-    mp_size_t  __a_size = (a_size);                                        \
-    mp_limb_t  __b      = (b);                                             \
-                                                                           \
-    ASSERT (__a_size >= 1);                                                \
-    ASSERT (__b & 1);                                                      \
-                                                                           \
-    if ((GMP_NUMB_BITS % 2) != 0                                           \
-        || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD))          \
-      {                                                                    \
-        (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \
-      }                                                                    \
-    else                                                                   \
-      {                                                                    \
-        (result_bit1) ^= JACOBI_N1B_BIT1 (__b);                            \
-        (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);             \
-      }                                                                    \
+  do {                                                                    \
+    mp_srcptr  __a_ptr = (a_ptr);                                         \
+    mp_size_t  __a_size = (a_size);                                       \
+    mp_limb_t  __b     = (b);                                             \
+                                                                          \
+    ASSERT (__a_size >= 1);                                               \
+    ASSERT (__b & 1);                                                     \
+                                                                          \
+    if ((GMP_NUMB_BITS % 2) != 0                                          \
+       || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD))          \
+      {                                                                           \
+       (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \
+      }                                                                           \
+    else                                                                  \
+      {                                                                           \
+       (result_bit1) ^= JACOBI_N1B_BIT1 (__b);                            \
+       (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);             \
+      }                                                                           \
    } while (0)
  
+/* State for the Jacobi computation using Lehmer. */
+#define jacobi_table __gmp_jacobi_table
+__GMP_DECLSPEC extern const unsigned char jacobi_table[208];
+
+/* Bit layout for the initial state. b must be odd.
+
+      3  2  1 0
+   +--+--+--+--+
+   |a1|a0|b1| s|
+   +--+--+--+--+
+
+ */
+static inline unsigned
+mpn_jacobi_init (unsigned a, unsigned b, unsigned s)
+{
+  ASSERT (b & 1);
+  ASSERT (s <= 1);
+  return ((a & 3) << 2) + (b & 2) + s;
+}
+
+static inline int
+mpn_jacobi_finish (unsigned bits)
+{
+  /* (a, b) = (1,0) or (0,1) */
+  ASSERT ( (bits & 14) == 0);
+
+  return 1-2*(bits & 1);
+}
+
+static inline unsigned
+mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)
+{
+  /* FIXME: Could halve table size by not including the e bit in the
+   * index, and instead xor when updating. Then the lookup would be
+   * like
+   *
+   *   bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];
+   */
+
+  ASSERT (bits < 26);
+  ASSERT (denominator < 2);
+  ASSERT (q < 4);
+
+  /* For almost all calls, denominator is constant and quite often q
+     is constant too. So use addition rather than or, so the compiler
+     can put the constant part can into the offset of an indexed
+     addressing instruction.
+
+     With constant denominator, the below table lookup is compiled to
+
+       C Constant q = 1, constant denominator = 1
+       movzbl table+5(%eax,8), %eax
+
+     or
+
+       C q in %edx, constant denominator = 1
+       movzbl table+4(%edx,%eax,8), %eax
+
+     One could maintain the state preshifted 3 bits, to save a shift
+     here, but at least on x86, that's no real saving.
+  */
+  return bits = jacobi_table[(bits << 3) + (denominator << 2) + q];
+}
+
  /* Matrix multiplication */
  #define   mpn_matrix22_mul __MPN(matrix22_mul)
-__GMP_DECLSPEC void      mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
-__GMP_DECLSPEC void      mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void      mpn_matrix22_mul_strassen (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
  #define   mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
-__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t);
  
  #ifndef MATRIX22_STRASSEN_THRESHOLD
  #define MATRIX22_STRASSEN_THRESHOLD 30
@@ -3763,13 +4121,16 @@ struct hgcd_matrix1
  };
  
  #define mpn_hgcd2 __MPN (hgcd2)
-__GMP_DECLSPEC int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *));
+__GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,      struct hgcd_matrix1 *);
  
  #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
+
+#define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector)
+__GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
  
-#define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
+#define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi)
+__GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *);
  
  struct hgcd_matrix
  {
@@ -3781,43 +4142,72 @@ struct hgcd_matrix
  #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
  
  #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
-__GMP_DECLSPEC void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr);
+
+#define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q)
+__GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr);
+
+#define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1)
+__GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr);
  
  #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
-__GMP_DECLSPEC void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr));
+__GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr);
  
  #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
+
+#define mpn_hgcd_step __MPN(hgcd_step)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+
+#define mpn_hgcd_reduce __MPN(hgcd_reduce)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
+
+#define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t);
  
  #define mpn_hgcd_itch __MPN (hgcd_itch)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t);
  
  #define mpn_hgcd __MPN (hgcd)
-__GMP_DECLSPEC mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+
+#define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t);
+
+#define mpn_hgcd_appr __MPN (hgcd_appr)
+__GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
  
-#define MPN_HGCD_LEHMER_ITCH(n) (n)
+#define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr);
  
-#define mpn_hgcd_lehmer __MPN (hgcd_lehmer)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
  
  /* Needs storage for the quotient */
  #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
  
  #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
-__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr);
  
-#define MPN_GCD_LEHMER_N_ITCH(n) (n)
-
-#define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n)
-__GMP_DECLSPEC mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+struct gcdext_ctx
+{
+  /* Result parameters. */
+  mp_ptr gp;
+  mp_size_t gn;
+  mp_ptr up;
+  mp_size_t *usize;
+
+  /* Cofactors updated in each step. */
+  mp_size_t un;
+  mp_ptr u0, u1, tp;
+};
  
-#define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step)
-__GMP_DECLSPEC mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr));
+#define mpn_gcdext_hook __MPN (gcdext_hook)
+gcd_subdiv_step_hook mpn_gcdext_hook;
  
  #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
  
  #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
-__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
  
  /* 4*(an + 1) + 4*(bn + 1) + an */
  #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
@@ -3826,6 +4216,14 @@ __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_si
  #define HGCD_THRESHOLD 400
  #endif
  
+#ifndef HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD 400
+#endif
+
+#ifndef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 1000
+#endif
+
  #ifndef GCD_DC_THRESHOLD
  #define GCD_DC_THRESHOLD 1000
  #endif
@@ -3850,11 +4248,11 @@ typedef struct powers powers_t;
  #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
  
  #define   mpn_dc_set_str __MPN(dc_set_str)
-__GMP_DECLSPEC mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr);
  #define   mpn_bc_set_str __MPN(bc_set_str)
-__GMP_DECLSPEC mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
  #define   mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
-__GMP_DECLSPEC void      mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int));
+__GMP_DECLSPEC void      mpn_set_str_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
  
  
  /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
@@ -3868,6 +4266,25 @@ __GMP_DECLSPEC void      mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp
  
  __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
  
+/* Compute the number of base-b digits corresponding to nlimbs limbs, rounding
+   down.  */
+#define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b)                                \
+  do {                                                                 \
+    mp_limb_t _ph, _pl;                                                        \
+    umul_ppmm (_ph, _pl,                                               \
+              mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\
+    res = _ph;                                                         \
+  } while (0)
+
+/* Compute the number of limbs corresponding to ndigits base-b digits, rounding
+   up.  */
+#define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b)                       \
+  do {                                                                 \
+    mp_limb_t _ph, _dummy;                                             \
+    umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits)); \
+    res = 8 * _ph / GMP_NUMB_BITS + 2;                                 \
+  } while (0)
+
  
  /* Set n to the number of significant digits an mpf of the given _mp_prec
     field, in the given base.  This is a rounded up value, designed to ensure
@@ -3881,11 +4298,12 @@ __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
     GMP_LIMB_BITS then the +2 is unnecessary.  This happens always for
     base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
  
-#define MPF_SIGNIFICANT_DIGITS(n, base, prec)                           \
-  do {                                                                  \
-    ASSERT (base >= 2 && base < numberof (mp_bases));                   \
-    (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS)         \
-                        * mp_bases[(base)].chars_per_bit_exactly);      \
+#define MPF_SIGNIFICANT_DIGITS(n, base, prec)                          \
+  do {                                                                 \
+    size_t rawn;                                                       \
+    ASSERT (base >= 2 && base < numberof (mp_bases));                  \
+    DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base);                  \
+    n = rawn + 2;                                                      \
    } while (0)
  
  
@@ -3944,10 +4362,10 @@ struct doprnt_params_t {
  
  #if _GMP_H_HAVE_VA_LIST
  
-__GMP_DECLSPEC typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list));
-__GMP_DECLSPEC typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t));
-__GMP_DECLSPEC typedef int (*doprnt_reps_t)   __GMP_PROTO ((void *, int, int));
-__GMP_DECLSPEC typedef int (*doprnt_final_t)  __GMP_PROTO ((void *));
+typedef int (*doprnt_format_t) (void *, const char *, va_list);
+typedef int (*doprnt_memory_t) (void *, const char *, size_t);
+typedef int (*doprnt_reps_t)   (void *, int, int);
+typedef int (*doprnt_final_t)  (void *);
  
  struct doprnt_funs_t {
    doprnt_format_t  format;
@@ -3973,35 +4391,35 @@ struct gmp_asprintf_t {
    size_t  alloc;
  };
  
-#define GMP_ASPRINTF_T_INIT(d, output)                          \
-  do {                                                          \
-    (d).result = (output);                                      \
-    (d).alloc = 256;                                            \
-    (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);      \
-    (d).size = 0;                                               \
+#define GMP_ASPRINTF_T_INIT(d, output)                                 \
+  do {                                                                 \
+    (d).result = (output);                                             \
+    (d).alloc = 256;                                                   \
+    (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);             \
+    (d).size = 0;                                                      \
    } while (0)
  
  /* If a realloc is necessary, use twice the size actually required, so as to
     avoid repeated small reallocs.  */
-#define GMP_ASPRINTF_T_NEED(d, n)                                       \
-  do {                                                                  \
-    size_t  alloc, newsize, newalloc;                                   \
-    ASSERT ((d)->alloc >= (d)->size + 1);                               \
-                                                                        \
-    alloc = (d)->alloc;                                                 \
-    newsize = (d)->size + (n);                                          \
-    if (alloc <= newsize)                                               \
-      {                                                                 \
-        newalloc = 2*newsize;                                           \
-        (d)->alloc = newalloc;                                          \
-        (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,                \
-                                               alloc, newalloc, char);  \
-      }                                                                 \
+#define GMP_ASPRINTF_T_NEED(d, n)                                      \
+  do {                                                                 \
+    size_t  alloc, newsize, newalloc;                                  \
+    ASSERT ((d)->alloc >= (d)->size + 1);                              \
+                                                                       \
+    alloc = (d)->alloc;                                                        \
+    newsize = (d)->size + (n);                                         \
+    if (alloc <= newsize)                                              \
+      {                                                                        \
+       newalloc = 2*newsize;                                           \
+       (d)->alloc = newalloc;                                          \
+       (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,                \
+                                              alloc, newalloc, char);  \
+      }                                                                        \
    } while (0)
  
-__GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t));
-__GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int));
-__GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *));
+__GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t);
+__GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int);
+__GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *);
  
  /* buf is where to write the next output, and size is how much space is left
     there.  If the application passed size==0 then that's what we'll have
@@ -4013,54 +4431,54 @@ struct gmp_snprintf_t {
  
  /* Add the bytes printed by the call to the total retval, or bail out on an
     error.  */
-#define DOPRNT_ACCUMULATE(call) \
-  do {                          \
-    int  __ret;                 \
-    __ret = call;               \
-    if (__ret == -1)            \
-      goto error;               \
-    retval += __ret;            \
+#define DOPRNT_ACCUMULATE(call)                                                \
+  do {                                                                 \
+    int  __ret;                                                                \
+    __ret = call;                                                      \
+    if (__ret == -1)                                                   \
+      goto error;                                                      \
+    retval += __ret;                                                   \
    } while (0)
-#define DOPRNT_ACCUMULATE_FUN(fun, params)      \
-  do {                                          \
-    ASSERT ((fun) != NULL);                     \
-    DOPRNT_ACCUMULATE ((*(fun)) params);        \
+#define DOPRNT_ACCUMULATE_FUN(fun, params)                             \
+  do {                                                                 \
+    ASSERT ((fun) != NULL);                                            \
+    DOPRNT_ACCUMULATE ((*(fun)) params);                               \
    } while (0)
  
-#define DOPRNT_FORMAT(fmt, ap)                          \
+#define DOPRNT_FORMAT(fmt, ap)                                         \
    DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
-#define DOPRNT_MEMORY(ptr, len)                                 \
+#define DOPRNT_MEMORY(ptr, len)                                                \
    DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
-#define DOPRNT_REPS(c, n)                               \
+#define DOPRNT_REPS(c, n)                                              \
    DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
  
  #define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
  
-#define DOPRNT_REPS_MAYBE(c, n) \
-  do {                          \
-    if ((n) != 0)               \
-      DOPRNT_REPS (c, n);       \
+#define DOPRNT_REPS_MAYBE(c, n)                                                \
+  do {                                                                 \
+    if ((n) != 0)                                                      \
+      DOPRNT_REPS (c, n);                                              \
    } while (0)
-#define DOPRNT_MEMORY_MAYBE(ptr, len)   \
-  do {                                  \
-    if ((len) != 0)                     \
-      DOPRNT_MEMORY (ptr, len);         \
+#define DOPRNT_MEMORY_MAYBE(ptr, len)                                  \
+  do {                                                                 \
+    if ((len) != 0)                                                    \
+      DOPRNT_MEMORY (ptr, len);                                                \
    } while (0)
  
-__GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
-__GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
+__GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list);
+__GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *);
  
  #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
-__GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
+__GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr);
  
-__GMP_DECLSPEC int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list));
+__GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list);
  #endif /* _GMP_H_HAVE_VA_LIST */
  
  
-typedef int (*gmp_doscan_scan_t)  __GMP_PROTO ((void *, const char *, ...));
-typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int));
-typedef int (*gmp_doscan_get_t)   __GMP_PROTO ((void *));
-typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *));
+typedef int (*gmp_doscan_scan_t)  (void *, const char *, ...);
+typedef void *(*gmp_doscan_step_t) (void *, int);
+typedef int (*gmp_doscan_get_t)   (void *);
+typedef int (*gmp_doscan_unget_t) (int, void *);
  
  struct gmp_doscan_funs_t {
    gmp_doscan_scan_t   scan;
@@ -4072,53 +4490,53 @@ extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
  extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
  
  #if _GMP_H_HAVE_VA_LIST
-__GMP_DECLSPEC int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *, const char *, va_list));
+__GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list);
  #endif
  
  
  /* For testing and debugging.  */
-#define MPZ_CHECK_FORMAT(z)                                    \
-  do {                                                          \
-    ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);  \
-    ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));                      \
-    ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));                       \
+#define MPZ_CHECK_FORMAT(z)                                            \
+  do {                                                                 \
+    ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);          \
+    ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));                              \
+    ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));                              \
    } while (0)
  
-#define MPQ_CHECK_FORMAT(q)                             \
-  do {                                                  \
-    MPZ_CHECK_FORMAT (mpq_numref (q));                  \
-    MPZ_CHECK_FORMAT (mpq_denref (q));                  \
-    ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);            \
-                                                        \
-    if (SIZ(mpq_numref(q)) == 0)                        \
-      {                                                 \
-        /* should have zero as 0/1 */                   \
-        ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1          \
-                       && PTR(mpq_denref(q))[0] == 1);  \
-      }                                                 \
-    else                                                \
-      {                                                 \
-        /* should have no common factors */             \
-        mpz_t  g;                                       \
-        mpz_init (g);                                   \
-        mpz_gcd (g, mpq_numref(q), mpq_denref(q));      \
-        ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);         \
-        mpz_clear (g);                                  \
-      }                                                 \
+#define MPQ_CHECK_FORMAT(q)                                            \
+  do {                                                                 \
+    MPZ_CHECK_FORMAT (mpq_numref (q));                                 \
+    MPZ_CHECK_FORMAT (mpq_denref (q));                                 \
+    ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);                           \
+                                                                       \
+    if (SIZ(mpq_numref(q)) == 0)                                       \
+      {                                                                        \
+       /* should have zero as 0/1 */                                   \
+       ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1                          \
+                      && PTR(mpq_denref(q))[0] == 1);                  \
+      }                                                                        \
+    else                                                               \
+      {                                                                        \
+       /* should have no common factors */                             \
+       mpz_t  g;                                                       \
+       mpz_init (g);                                                   \
+       mpz_gcd (g, mpq_numref(q), mpq_denref(q));                      \
+       ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);                         \
+       mpz_clear (g);                                                  \
+      }                                                                        \
    } while (0)
  
-#define MPF_CHECK_FORMAT(f)                             \
-  do {                                                  \
-    ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
-    ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);              \
-    if (SIZ(f) == 0)                                    \
-      ASSERT_ALWAYS (EXP(f) == 0);                      \
-    if (SIZ(f) != 0)                                    \
-      ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);        \
+#define MPF_CHECK_FORMAT(f)                                            \
+  do {                                                                 \
+    ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53));                        \
+    ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);                             \
+    if (SIZ(f) == 0)                                                   \
+      ASSERT_ALWAYS (EXP(f) == 0);                                     \
+    if (SIZ(f) != 0)                                                   \
+      ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);                       \
    } while (0)
  
  
-#define MPZ_PROVOKE_REALLOC(z)                                 \
+#define MPZ_PROVOKE_REALLOC(z)                                         \
    do { ALLOC(z) = ABSIZ(z); } while (0)
  
  
@@ -4133,46 +4551,62 @@ __GMP_DECLSPEC int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *,
     in mpn/x86/x86-defs.m4.  Be sure to update that when changing here.  */
  struct cpuvec_t {
    DECL_add_n           ((*add_n));
+  DECL_addlsh1_n       ((*addlsh1_n));
+  DECL_addlsh2_n       ((*addlsh2_n));
    DECL_addmul_1        ((*addmul_1));
+  DECL_addmul_2        ((*addmul_2));
+  DECL_bdiv_dbm1c      ((*bdiv_dbm1c));
+  DECL_com             ((*com));
    DECL_copyd           ((*copyd));
    DECL_copyi           ((*copyi));
    DECL_divexact_1      ((*divexact_1));
-  DECL_divexact_by3c   ((*divexact_by3c));
    DECL_divrem_1        ((*divrem_1));
    DECL_gcd_1           ((*gcd_1));
    DECL_lshift          ((*lshift));
+  DECL_lshiftc         ((*lshiftc));
    DECL_mod_1           ((*mod_1));
+  DECL_mod_1_1p        ((*mod_1_1p));
+  DECL_mod_1_1p_cps    ((*mod_1_1p_cps));
+  DECL_mod_1s_2p       ((*mod_1s_2p));
+  DECL_mod_1s_2p_cps   ((*mod_1s_2p_cps));
+  DECL_mod_1s_4p       ((*mod_1s_4p));
+  DECL_mod_1s_4p_cps   ((*mod_1s_4p_cps));
    DECL_mod_34lsub1     ((*mod_34lsub1));
    DECL_modexact_1c_odd ((*modexact_1c_odd));
    DECL_mul_1           ((*mul_1));
    DECL_mul_basecase    ((*mul_basecase));
+  DECL_mullo_basecase  ((*mullo_basecase));
    DECL_preinv_divrem_1 ((*preinv_divrem_1));
    DECL_preinv_mod_1    ((*preinv_mod_1));
+  DECL_redc_1          ((*redc_1));
+  DECL_redc_2          ((*redc_2));
    DECL_rshift          ((*rshift));
    DECL_sqr_basecase    ((*sqr_basecase));
    DECL_sub_n           ((*sub_n));
+  DECL_sublsh1_n       ((*sublsh1_n));
    DECL_submul_1        ((*submul_1));
-  int                  initialized;
    mp_size_t            mul_toom22_threshold;
    mp_size_t            mul_toom33_threshold;
    mp_size_t            sqr_toom2_threshold;
    mp_size_t            sqr_toom3_threshold;
+  mp_size_t            bmod_1_to_mod_1_threshold;
  };
  __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
+__GMP_DECLSPEC extern int __gmpn_cpuvec_initialized;
  #endif /* x86 fat binary */
  
-__GMP_DECLSPEC void __gmpn_cpuvec_init __GMP_PROTO ((void));
+__GMP_DECLSPEC void __gmpn_cpuvec_init (void);
  
  /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
     if that hasn't yet been done (to establish the right values).  */
-#define CPUVEC_THRESHOLD(field)                                               \
-  ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
+#define CPUVEC_THRESHOLD(field)                                                      \
+  ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
     __gmpn_cpuvec.field)
  
  
  #if HAVE_NATIVE_mpn_add_nc
  #define mpn_add_nc __MPN(add_nc)
-__GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
  #else
  static inline
  mp_limb_t
@@ -4187,7 +4621,7 @@ mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
  
  #if HAVE_NATIVE_mpn_sub_nc
  #define mpn_sub_nc __MPN(sub_nc)
-__GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
  #else
  static inline mp_limb_t
  mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
@@ -4256,6 +4690,10 @@ extern mp_size_t                 mul_toom42_to_toom53_threshold;
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
  extern mp_size_t                       mul_toom42_to_toom63_threshold;
  
+#undef  MUL_TOOM43_TO_TOOM54_THRESHOLD
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD mul_toom43_to_toom54_threshold;
+extern mp_size_t                       mul_toom43_to_toom54_threshold;
+
  #undef MUL_FFT_THRESHOLD
  #define MUL_FFT_THRESHOLD              mul_fft_threshold
  extern mp_size_t                       mul_fft_threshold;
@@ -4329,6 +4767,14 @@ extern mp_size_t                 mullo_dc_threshold;
  #define MULLO_MUL_N_THRESHOLD          mullo_mul_n_threshold
  extern mp_size_t                       mullo_mul_n_threshold;
  
+#undef MULMID_TOOM42_THRESHOLD
+#define MULMID_TOOM42_THRESHOLD                mulmid_toom42_threshold
+extern mp_size_t                       mulmid_toom42_threshold;
+
+#undef DIV_QR_2_PI2_THRESHOLD
+#define DIV_QR_2_PI2_THRESHOLD         div_qr_2_pi2_threshold
+extern mp_size_t                       div_qr_2_pi2_threshold;
+
  #undef DC_DIV_QR_THRESHOLD
  #define DC_DIV_QR_THRESHOLD            dc_div_qr_threshold
  extern mp_size_t                       dc_div_qr_threshold;
@@ -4401,6 +4847,14 @@ extern mp_size_t                 matrix22_strassen_threshold;
  #define HGCD_THRESHOLD                 hgcd_threshold
  extern mp_size_t                       hgcd_threshold;
  
+#undef HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD            hgcd_appr_threshold
+extern mp_size_t                       hgcd_appr_threshold;
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD          hgcd_reduce_threshold
+extern mp_size_t                       hgcd_reduce_threshold;
+
  #undef GCD_DC_THRESHOLD
  #define GCD_DC_THRESHOLD               gcd_dc_threshold
  extern mp_size_t                       gcd_dc_threshold;
@@ -4425,6 +4879,10 @@ extern mp_size_t                 mod_1_norm_threshold;
  #define MOD_1_UNNORM_THRESHOLD         mod_1_unnorm_threshold
  extern mp_size_t                       mod_1_unnorm_threshold;
  
+#undef  MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD                        mod_1_1p_method
+extern int                             mod_1_1p_method;
+
  #undef MOD_1N_TO_MOD_1_1_THRESHOLD
  #define MOD_1N_TO_MOD_1_1_THRESHOLD    mod_1n_to_mod_1_1_threshold
  extern mp_size_t                       mod_1n_to_mod_1_1_threshold;
@@ -4475,6 +4933,14 @@ extern mp_size_t                 set_str_dc_threshold;
  #define SET_STR_PRECOMPUTE_THRESHOLD   set_str_precompute_threshold
  extern mp_size_t                       set_str_precompute_threshold;
  
+#undef  FAC_ODD_THRESHOLD
+#define FAC_ODD_THRESHOLD              fac_odd_threshold
+extern  mp_size_t                      fac_odd_threshold;
+
+#undef  FAC_DSC_THRESHOLD
+#define FAC_DSC_THRESHOLD              fac_dsc_threshold
+extern  mp_size_t                      fac_dsc_threshold;
+
  #undef  FFT_TABLE_ATTRS
  #define FFT_TABLE_ATTRS
  extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
@@ -4498,6 +4964,7 @@ extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
  #define SQR_TOOM8_THRESHOLD_LIMIT      1200
  #define MULLO_BASECASE_THRESHOLD_LIMIT  200
  #define GET_STR_THRESHOLD_LIMIT         150
+#define FAC_DSC_THRESHOLD_LIMIT        2048
  
  #endif /* TUNE_PROGRAM_BUILD */
  
@@ -4538,14 +5005,17 @@ extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
    (3 * (an) + GMP_NUMB_BITS)
  
  #define mpn_toom6_sqr_itch(n)                                          \
-( ((n) - SQR_TOOM6_THRESHOLD)*2 +                                      \
-   MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6,                        \
-       mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)) )
+  (((n) - SQR_TOOM6_THRESHOLD)*2 +                                     \
+   MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6,                                \
+       mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)))
  
+#define MUL_TOOM6H_MIN                                                 \
+  ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ?                     \
+    MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD)
  #define mpn_toom6_mul_n_itch(n)                                                \
-( ((n) - MUL_TOOM6H_THRESHOLD)*2 +                                     \
-   MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,                       \
-       mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
+  (((n) - MUL_TOOM6H_MIN)*2 +                                          \
+   MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6,                             \
+       mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN)))
  
  static inline mp_size_t
  mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
@@ -4555,14 +5025,17 @@ mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
  }
  
  #define mpn_toom8_sqr_itch(n)                                          \
-( (((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) +                      \
-   MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,                \
-       mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)) )
+  ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) +                     \
+   MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,                        \
+       mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)))
  
+#define MUL_TOOM8H_MIN                                                 \
+  ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ?                           \
+    MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN)
  #define mpn_toom8_mul_n_itch(n)                                                \
-( (((n)*15)>>3) - ((MUL_TOOM8H_THRESHOLD*15)>>3) +                     \
-   MAX(((MUL_TOOM8H_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,               \
-       mpn_toom6_mul_n_itch(MUL_TOOM8H_THRESHOLD)) )
+  ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) +                          \
+   MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6,                     \
+       mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN)))
  
  static inline mp_size_t
  mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
@@ -4623,6 +5096,18 @@ mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
    return 9 * n + 3;
  }
  
+static inline mp_size_t
+mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn)
+{
+  mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+  return 9 * n + 3;
+}
+
+/* let S(n) = space required for input size n,
+   then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)).   */
+#define mpn_toom42_mulmid_itch(n) \
+  (3 * (n) + GMP_NUMB_BITS)
+
  #if 0
  #define mpn_fft_mul mpn_mul_fft_full
  #else
@@ -4658,8 +5143,8 @@ class gmp_allocated_string {
  std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
  int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
  void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
-void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
-std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
+void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &);
+std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *);
  extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
  
  #endif /* __cplusplus */
diff --git a/gmpxx.h b/gmpxx.h

index 7490312d3075ae82d5e2500981419ad5af7a9aba..c27a2f47fa0c2430b6b69fb72bf02e207b343d68 100644 (file)
--- a/gmpxx.h
+++ b/gmpxx.h
@@ -1,6 +1,7 @@
  /* gmpxx.h -- C++ class wrapper for GMP types.  -*- C++ -*-
  
-Copyright 2001, 2002, 2003, 2006, 2008 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2003, 2006, 2008, 2011, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -17,29 +18,117 @@ License for more details.
  You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
-/* the C++ compiler must implement the following features:
-   - member templates
-   - partial specialization of templates
-   - namespace support
-   for g++, this means version 2.91 or higher
-   for other compilers, I don't know */
-#ifdef __GNUC__
-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91)
-#error gmpxx.h requires g++ version 2.91 (egcs 1.1.2) or higher
-#endif
-#endif
-
  #ifndef __GMP_PLUSPLUS__
  #define __GMP_PLUSPLUS__
  
  #include <iosfwd>
  
  #include <cstring>  /* for strlen */
+#include <limits>  /* numeric_limits */
+#include <utility>
+#include <algorithm>  /* swap */
  #include <string>
  #include <stdexcept>
  #include <cfloat>
  #include <gmp.h>
  
+// wrapper for gcc's __builtin_constant_p
+// __builtin_constant_p has been in gcc since forever,
+// but g++-3.4 miscompiles it.
+#if __GMP_GNUC_PREREQ(4, 2)
+#define __GMPXX_CONSTANT(X) __builtin_constant_p(X)
+#else
+#define __GMPXX_CONSTANT(X) false
+#endif
+
+// Use C++11 features
+#ifndef __GMPXX_USE_CXX11
+#if __cplusplus >= 201103L
+#define __GMPXX_USE_CXX11 1
+#else
+#define __GMPXX_USE_CXX11 0
+#endif
+#endif
+
+#if __GMPXX_USE_CXX11
+#define __GMPXX_NOEXCEPT noexcept
+#include <type_traits> // for common_type
+#else
+#define __GMPXX_NOEXCEPT
+#endif
+
+// Max allocations for plain types when converted to mpz_t
+#define __GMPZ_DBL_LIMBS (2 + DBL_MAX_EXP / GMP_NUMB_BITS)
+
+#if GMP_NAIL_BITS != 0 && ! defined _LONG_LONG_LIMB
+#define __GMPZ_ULI_LIMBS 2
+#else
+#define __GMPZ_ULI_LIMBS 1
+#endif
+
+inline void __mpz_set_ui_safe(mpz_ptr p, unsigned long l)
+{
+  p->_mp_size = (l != 0);
+  p->_mp_d[0] = l & GMP_NUMB_MASK;
+#if __GMPZ_ULI_LIMBS > 1
+  l >>= GMP_NUMB_BITS;
+  p->_mp_d[1] = l;
+  p->_mp_size += (l != 0);
+#endif
+}
+
+inline void __mpz_set_si_safe(mpz_ptr p, long l)
+{
+  if(l < 0)
+  {
+    __mpz_set_ui_safe(p, -static_cast<unsigned long>(l));
+    mpz_neg(p, p);
+  }
+  else
+    __mpz_set_ui_safe(p, l);
+    // Note: we know the high bit of l is 0 so we could do slightly better
+}
+
+// Fake temporary variables
+#define __GMPXX_TMPZ_UI                                                        \
+  mpz_t temp;                                                          \
+  mp_limb_t limbs[__GMPZ_ULI_LIMBS];                                   \
+  temp->_mp_d = limbs;                                                 \
+  __mpz_set_ui_safe (temp, l)
+#define __GMPXX_TMPZ_SI                                                        \
+  mpz_t temp;                                                          \
+  mp_limb_t limbs[__GMPZ_ULI_LIMBS];                                   \
+  temp->_mp_d = limbs;                                                 \
+  __mpz_set_si_safe (temp, l)
+#define __GMPXX_TMPZ_D                                                 \
+  mpz_t temp;                                                          \
+  mp_limb_t limbs[__GMPZ_DBL_LIMBS];                                   \
+  temp->_mp_d = limbs;                                                 \
+  temp->_mp_alloc = __GMPZ_DBL_LIMBS;                                  \
+  mpz_set_d (temp, d)
+
+#define __GMPXX_TMPQ_UI                                                        \
+  mpq_t temp;                                                          \
+  mp_limb_t limbs[__GMPZ_ULI_LIMBS+1];                                 \
+  mpq_numref(temp)->_mp_d = limbs;                                     \
+  __mpz_set_ui_safe (mpq_numref(temp), l);                             \
+  mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS;                  \
+  mpq_denref(temp)->_mp_size = 1;                                      \
+  mpq_denref(temp)->_mp_d[0] = 1
+#define __GMPXX_TMPQ_SI                                                        \
+  mpq_t temp;                                                          \
+  mp_limb_t limbs[__GMPZ_ULI_LIMBS+1];                                 \
+  mpq_numref(temp)->_mp_d = limbs;                                     \
+  __mpz_set_si_safe (mpq_numref(temp), l);                             \
+  mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS;                  \
+  mpq_denref(temp)->_mp_size = 1;                                      \
+  mpq_denref(temp)->_mp_d[0] = 1
+
+inline unsigned long __gmpxx_abs_ui (signed long l)
+{
+  return l >= 0 ? static_cast<unsigned long>(l)
+         : -static_cast<unsigned long>(l);
+}
  
  /**************** Function objects ****************/
  /* Any evaluation of a __gmp_expr ends up calling one of these functions
@@ -72,61 +161,59 @@ struct __gmp_binary_plus
    { mpz_add(z, w, v); }
  
    static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
-  { mpz_add_ui(z, w, l); }
-  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
-  { mpz_add_ui(z, w, l); }
-  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
    {
-    if (l >= 0)
-      mpz_add_ui(z, w, l);
+    // Ideally, those checks should happen earlier so that the tree
+    // generated for a+0+b would just be sum(a,b).
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      if (z != w) mpz_set(z, w);
+    }
      else
-      mpz_sub_ui(z, w, -l);
+      mpz_add_ui(z, w, l);
    }
-  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
    {
      if (l >= 0)
-      mpz_add_ui(z, w, l);
+      eval(z, w, static_cast<unsigned long>(l));
      else
-      mpz_sub_ui(z, w, -l);
+      mpz_sub_ui(z, w, -static_cast<unsigned long>(l));
    }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { eval(z, w, l); }
    static void eval(mpz_ptr z, mpz_srcptr w, double d)
-  {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_add(z, w, temp);
-    mpz_clear(temp);
-  }
+  {  __GMPXX_TMPZ_D;    mpz_add (z, w, temp); }
    static void eval(mpz_ptr z, double d, mpz_srcptr w)
-  {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_add(z, temp, w);
-    mpz_clear(temp);
-  }
+  { eval(z, w, d); }
  
    static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
    { mpq_add(q, r, s); }
  
    static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
-  { mpq_set(q, r); mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); }
-  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
-  { mpq_set(q, r); mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); }
-  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
    {
-    mpq_set(q, r);
-    if (l >= 0)
-      mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      if (q != r) mpq_set(q, r);
+    }
      else
-      mpz_submul_ui(mpq_numref(q), mpq_denref(q), -l);
+    {
+      if (q == r)
+        mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+      else
+      {
+        mpz_mul_ui(mpq_numref(q), mpq_denref(r), l);
+        mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r));
+        mpz_set(mpq_denref(q), mpq_denref(r));
+      }
+    }
    }
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  { eval(q, r, l); }
+  static inline void eval(mpq_ptr q, mpq_srcptr r, signed long int l);
+  // defined after __gmp_binary_minus
    static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
-  {
-    mpq_set(q, r);
-    if (l >= 0)
-      mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
-    else
-      mpz_submul_ui(mpq_numref(q), mpq_denref(q), -l);
-  }
+  { eval(q, r, l); }
    static void eval(mpq_ptr q, mpq_srcptr r, double d)
    {
      mpq_t temp;
@@ -136,18 +223,21 @@ struct __gmp_binary_plus
      mpq_clear(temp);
    }
    static void eval(mpq_ptr q, double d, mpq_srcptr r)
-  {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_d(temp, d);
-    mpq_add(q, temp, r);
-    mpq_clear(temp);
-  }
+  { eval(q, r, d); }
  
    static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)
-  { mpq_set(q, r); mpz_addmul(mpq_numref(q), mpq_denref(q), z); }
+  {
+    if (q == r)
+      mpz_addmul(mpq_numref(q), mpq_denref(q), z);
+    else
+    {
+      mpz_mul(mpq_numref(q), mpq_denref(r), z);
+      mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r));
+      mpz_set(mpq_denref(q), mpq_denref(r));
+    }
+  }
    static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)
-  { mpq_set(q, r); mpz_addmul(mpq_numref(q), mpq_denref(q), z); }
+  { eval(q, r, z); }
  
    static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
    { mpf_add(f, g, h); }
@@ -161,15 +251,10 @@ struct __gmp_binary_plus
      if (l >= 0)
        mpf_add_ui(f, g, l);
      else
-      mpf_sub_ui(f, g, -l);
+      mpf_sub_ui(f, g, -static_cast<unsigned long>(l));
    }
    static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
-  {
-    if (l >= 0)
-      mpf_add_ui(f, g, l);
-    else
-      mpf_sub_ui(f, g, -l);
-  }
+  { eval(f, g, l); }
    static void eval(mpf_ptr f, mpf_srcptr g, double d)
    {
      mpf_t temp;
@@ -179,13 +264,7 @@ struct __gmp_binary_plus
      mpf_clear(temp);
    }
    static void eval(mpf_ptr f, double d, mpf_srcptr g)
-  {
-    mpf_t temp;
-    mpf_init2(temp, 8*sizeof(double));
-    mpf_set_d(temp, d);
-    mpf_add(f, temp, g);
-    mpf_clear(temp);
-  }
+  { eval(f, g, d); }
  };
  
  struct __gmp_binary_minus
@@ -194,64 +273,77 @@ struct __gmp_binary_minus
    { mpz_sub(z, w, v); }
  
    static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
-  { mpz_sub_ui(z, w, l); }
+  {
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      if (z != w) mpz_set(z, w);
+    }
+    else
+      mpz_sub_ui(z, w, l);
+  }
    static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
-  { mpz_ui_sub(z, l, w); }
+  {
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      mpz_neg(z, w);
+    }
+    else
+      mpz_ui_sub(z, l, w);
+  }
    static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
    {
      if (l >= 0)
-      mpz_sub_ui(z, w, l);
+      eval(z, w, static_cast<unsigned long>(l));
      else
-      mpz_add_ui(z, w, -l);
+      mpz_add_ui(z, w, -static_cast<unsigned long>(l));
    }
    static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
    {
      if (l >= 0)
-      mpz_ui_sub(z, l, w);
+      eval(z, static_cast<unsigned long>(l), w);
      else
        {
-        mpz_add_ui(z, w, -l);
+        mpz_add_ui(z, w, -static_cast<unsigned long>(l));
          mpz_neg(z, z);
        }
    }
    static void eval(mpz_ptr z, mpz_srcptr w, double d)
-  {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_sub(z, w, temp);
-    mpz_clear(temp);
-  }
+  {  __GMPXX_TMPZ_D;    mpz_sub (z, w, temp); }
    static void eval(mpz_ptr z, double d, mpz_srcptr w)
-  {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_sub(z, temp, w);
-    mpz_clear(temp);
-  }
+  {  __GMPXX_TMPZ_D;    mpz_sub (z, temp, w); }
  
    static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
    { mpq_sub(q, r, s); }
  
    static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
-  { mpq_set(q, r); mpz_submul_ui(mpq_numref(q), mpq_denref(q), l); }
-  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
-  { mpq_neg(q, r); mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); }
-  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
    {
-    mpq_set(q, r);
-    if (l >= 0)
-      mpz_submul_ui(mpq_numref(q), mpq_denref(q), l);
+    if (__GMPXX_CONSTANT(l) && l == 0)
+    {
+      if (q != r) mpq_set(q, r);
+    }
      else
-      mpz_addmul_ui(mpq_numref(q), mpq_denref(q), -l);
+    {
+      if (q == r)
+        mpz_submul_ui(mpq_numref(q), mpq_denref(q), l);
+      else
+      {
+        mpz_mul_ui(mpq_numref(q), mpq_denref(r), l);
+        mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q));
+        mpz_set(mpq_denref(q), mpq_denref(r));
+      }
+    }
    }
-  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+  { eval(q, r, l); mpq_neg(q, q); }
+  static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
    {
-    mpq_neg(q, r);
      if (l >= 0)
-      mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+      eval(q, r, static_cast<unsigned long>(l));
      else
-      mpz_submul_ui(mpq_numref(q), mpq_denref(q), -l);
+      __gmp_binary_plus::eval(q, r, -static_cast<unsigned long>(l));
    }
+  static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+  { eval(q, r, l); mpq_neg(q, q); }
    static void eval(mpq_ptr q, mpq_srcptr r, double d)
    {
      mpq_t temp;
@@ -270,9 +362,18 @@ struct __gmp_binary_minus
    }
  
    static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)
-  { mpq_set(q, r); mpz_submul(mpq_numref(q), mpq_denref(q), z); }
+  {
+    if (q == r)
+      mpz_submul(mpq_numref(q), mpq_denref(q), z);
+    else
+    {
+      mpz_mul(mpq_numref(q), mpq_denref(r), z);
+      mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q));
+      mpz_set(mpq_denref(q), mpq_denref(r));
+    }
+  }
    static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)
-  { mpq_neg(q, r); mpz_addmul(mpq_numref(q), mpq_denref(q), z); }
+  { eval(q, r, z); mpq_neg(q, q); }
  
    static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
    { mpf_sub(f, g, h); }
@@ -286,14 +387,14 @@ struct __gmp_binary_minus
      if (l >= 0)
        mpf_sub_ui(f, g, l);
      else
-      mpf_add_ui(f, g, -l);
+      mpf_add_ui(f, g, -static_cast<unsigned long>(l));
    }
    static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
    {
      if (l >= 0)
        mpf_sub_ui(f, g, l);
      else
-      mpf_add_ui(f, g, -l);
+      mpf_add_ui(f, g, -static_cast<unsigned long>(l));
      mpf_neg(f, f);
    }
    static void eval(mpf_ptr f, mpf_srcptr g, double d)
@@ -314,69 +415,158 @@ struct __gmp_binary_minus
    }
  };
  
+// defined here so it can reference __gmp_binary_minus
+inline void
+__gmp_binary_plus::eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+{
+  if (l >= 0)
+    eval(q, r, static_cast<unsigned long>(l));
+  else
+    __gmp_binary_minus::eval(q, r, -static_cast<unsigned long>(l));
+}
+
+struct __gmp_binary_lshift
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l)
+  {
+    if (__GMPXX_CONSTANT(l) && (l == 0))
+    {
+      if (z != w) mpz_set(z, w);
+    }
+    else
+      mpz_mul_2exp(z, w, l);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l)
+  {
+    if (__GMPXX_CONSTANT(l) && (l == 0))
+    {
+      if (q != r) mpq_set(q, r);
+    }
+    else
+      mpq_mul_2exp(q, r, l);
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l)
+  { mpf_mul_2exp(f, g, l); }
+};
+
+struct __gmp_binary_rshift
+{
+  static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l)
+  {
+    if (__GMPXX_CONSTANT(l) && (l == 0))
+    {
+      if (z != w) mpz_set(z, w);
+    }
+    else
+      mpz_fdiv_q_2exp(z, w, l);
+  }
+  static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l)
+  {
+    if (__GMPXX_CONSTANT(l) && (l == 0))
+    {
+      if (q != r) mpq_set(q, r);
+    }
+    else
+      mpq_div_2exp(q, r, l);
+  }
+  static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l)
+  { mpf_div_2exp(f, g, l); }
+};
+
  struct __gmp_binary_multiplies
  {
    static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
    { mpz_mul(z, w, v); }
  
    static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
-  { mpz_mul_ui(z, w, l); }
+  {
+// gcc-3.3 doesn't have __builtin_ctzl. Don't bother optimizing for old gcc.
+#if __GMP_GNUC_PREREQ(3, 4)
+    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0)
+    {
+      if (l == 0)
+      {
+        z->_mp_size = 0;
+      }
+      else
+      {
+        __gmp_binary_lshift::eval(z, w, __builtin_ctzl(l));
+      }
+    }
+    else
+#endif
+      mpz_mul_ui(z, w, l);
+  }
    static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
-  { mpz_mul_ui(z, w, l); }
+  { eval(z, w, l); }
    static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
-  { mpz_mul_si (z, w, l); }
-  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
-  { mpz_mul_si (z, w, l); }
-  static void eval(mpz_ptr z, mpz_srcptr w, double d)
    {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_mul(z, w, temp);
-    mpz_clear(temp);
+    if (__GMPXX_CONSTANT(l))
+    {
+      if (l >= 0)
+        eval(z, w, static_cast<unsigned long>(l));
+      else
+      {
+        eval(z, w, -static_cast<unsigned long>(l));
+       mpz_neg(z, z);
+      }
+    }
+    else
+      mpz_mul_si (z, w, l);
    }
+  static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+  { eval(z, w, l); }
+  static void eval(mpz_ptr z, mpz_srcptr w, double d)
+  {  __GMPXX_TMPZ_D;    mpz_mul (z, w, temp); }
    static void eval(mpz_ptr z, double d, mpz_srcptr w)
-  {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_mul(z, temp, w);
-    mpz_clear(temp);
-  }
+  { eval(z, w, d); }
  
    static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
    { mpq_mul(q, r, s); }
  
    static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
    {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_ui(temp, l, 1);
-    mpq_mul(q, r, temp);
-    mpq_clear(temp);
+#if __GMP_GNUC_PREREQ(3, 4)
+    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0)
+    {
+      if (l == 0)
+      {
+       mpq_set_ui(q, 0, 1);
+      }
+      else
+      {
+        __gmp_binary_lshift::eval(q, r, __builtin_ctzl(l));
+      }
+    }
+    else
+#endif
+    {
+      __GMPXX_TMPQ_UI;
+      mpq_mul (q, r, temp);
+    }
    }
    static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
-  {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_ui(temp, l, 1);
-    mpq_mul(q, temp, r);
-    mpq_clear(temp);
-  }
+  { eval(q, r, l); }
    static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
    {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_si(temp, l, 1);
-    mpq_mul(q, r, temp);
-    mpq_clear(temp);
+    if (__GMPXX_CONSTANT(l))
+    {
+      if (l >= 0)
+        eval(q, r, static_cast<unsigned long>(l));
+      else
+      {
+        eval(q, r, -static_cast<unsigned long>(l));
+       mpq_neg(q, q);
+      }
+    }
+    else
+    {
+      __GMPXX_TMPQ_SI;
+      mpq_mul (q, r, temp);
+    }
    }
    static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
-  {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_si(temp, l, 1);
-    mpq_mul(q, temp, r);
-    mpq_clear(temp);
-  }
+  { eval(q, r, l); }
    static void eval(mpq_ptr q, mpq_srcptr r, double d)
    {
      mpq_t temp;
@@ -386,13 +576,7 @@ struct __gmp_binary_multiplies
      mpq_clear(temp);
    }
    static void eval(mpq_ptr q, double d, mpq_srcptr r)
-  {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_d(temp, d);
-    mpq_mul(q, temp, r);
-    mpq_clear(temp);
-  }
+  { eval(q, r, d); }
  
    static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
    { mpf_mul(f, g, h); }
@@ -407,20 +591,12 @@ struct __gmp_binary_multiplies
        mpf_mul_ui(f, g, l);
      else
        {
-       mpf_mul_ui(f, g, -l);
+       mpf_mul_ui(f, g, -static_cast<unsigned long>(l));
         mpf_neg(f, f);
        }
    }
    static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
-  {
-    if (l >= 0)
-      mpf_mul_ui(f, g, l);
-    else
-      {
-       mpf_mul_ui(f, g, -l);
-       mpf_neg(f, f);
-      }
-  }
+  { eval(f, g, l); }
    static void eval(mpf_ptr f, mpf_srcptr g, double d)
    {
      mpf_t temp;
@@ -430,13 +606,7 @@ struct __gmp_binary_multiplies
      mpf_clear(temp);
    }
    static void eval(mpf_ptr f, double d, mpf_srcptr g)
-  {
-    mpf_t temp;
-    mpf_init2(temp, 8*sizeof(double));
-    mpf_set_d(temp, d);
-    mpf_mul(f, temp, g);
-    mpf_clear(temp);
-  }
+  { eval(f, g, d); }
  };
  
  struct __gmp_binary_divides
@@ -445,7 +615,23 @@ struct __gmp_binary_divides
    { mpz_tdiv_q(z, w, v); }
  
    static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
-  { mpz_tdiv_q_ui(z, w, l); }
+  {
+#if __GMP_GNUC_PREREQ(3, 4)
+    // Don't optimize division by 0...
+    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0)
+    {
+      if (l == 1)
+      {
+        if (z != w) mpz_set(z, w);
+      }
+      else
+        mpz_tdiv_q_2exp(z, w, __builtin_ctzl(l));
+        // warning: do not use rshift (fdiv)
+    }
+    else
+#endif
+      mpz_tdiv_q_ui(z, w, l);
+  }
    static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
    {
      if (mpz_sgn(w) >= 0)
@@ -470,10 +656,10 @@ struct __gmp_binary_divides
    static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
    {
      if (l >= 0)
-      mpz_tdiv_q_ui(z, w, l);
+      eval(z, w, static_cast<unsigned long>(l));
      else
        {
-       mpz_tdiv_q_ui(z, w, -l);
+       eval(z, w, -static_cast<unsigned long>(l));
         mpz_neg(z, z);
        }
    }
@@ -485,59 +671,51 @@ struct __gmp_binary_divides
        {
          /* if w is bigger than a long then the quotient must be zero, unless
             l==LONG_MIN and w==-LONG_MIN in which case the quotient is -1 */
-        mpz_set_si (z, (mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? -1 : 0));
+        mpz_set_si (z, (mpz_cmpabs_ui (w, __gmpxx_abs_ui(l)) == 0 ? -1 : 0));
        }
    }
    static void eval(mpz_ptr z, mpz_srcptr w, double d)
-  {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_tdiv_q(z, w, temp);
-    mpz_clear(temp);
-  }
+  {  __GMPXX_TMPZ_D;    mpz_tdiv_q (z, w, temp); }
    static void eval(mpz_ptr z, double d, mpz_srcptr w)
-  {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_tdiv_q(z, temp, w);
-    mpz_clear(temp);
-  }
+  {  __GMPXX_TMPZ_D;    mpz_tdiv_q (z, temp, w); }
  
    static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
    { mpq_div(q, r, s); }
  
    static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
    {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_ui(temp, l, 1);
-    mpq_div(q, r, temp);
-    mpq_clear(temp);
+#if __GMP_GNUC_PREREQ(3, 4)
+    if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0)
+      __gmp_binary_rshift::eval(q, r, __builtin_ctzl(l));
+    else
+#endif
+    {
+      __GMPXX_TMPQ_UI;
+      mpq_div (q, r, temp);
+    }
    }
    static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
-  {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_ui(temp, l, 1);
-    mpq_div(q, temp, r);
-    mpq_clear(temp);
-  }
+  {  __GMPXX_TMPQ_UI;   mpq_div (q, temp, r); }
    static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
    {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_si(temp, l, 1);
-    mpq_div(q, r, temp);
-    mpq_clear(temp);
+    if (__GMPXX_CONSTANT(l))
+    {
+      if (l >= 0)
+        eval(q, r, static_cast<unsigned long>(l));
+      else
+      {
+        eval(q, r, -static_cast<unsigned long>(l));
+       mpq_neg(q, q);
+      }
+    }
+    else
+    {
+      __GMPXX_TMPQ_SI;
+      mpq_div (q, r, temp);
+    }
    }
    static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
-  {
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_si(temp, l, 1);
-    mpq_div(q, temp, r);
-    mpq_clear(temp);
-  }
+  {  __GMPXX_TMPQ_SI;   mpq_div (q, temp, r); }
    static void eval(mpq_ptr q, mpq_srcptr r, double d)
    {
      mpq_t temp;
@@ -568,7 +746,7 @@ struct __gmp_binary_divides
        mpf_div_ui(f, g, l);
      else
        {
-       mpf_div_ui(f, g, -l);
+       mpf_div_ui(f, g, -static_cast<unsigned long>(l));
         mpf_neg(f, f);
        }
    }
@@ -578,7 +756,7 @@ struct __gmp_binary_divides
        mpf_ui_div(f, l, g);
      else
        {
-       mpf_ui_div(f, -l, g);
+       mpf_ui_div(f, -static_cast<unsigned long>(l), g);
         mpf_neg(f, f);
        }
    }
@@ -627,7 +805,7 @@ struct __gmp_binary_modulus
    }
    static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
    {
-    mpz_tdiv_r_ui (z, w, (l >= 0 ? l : -l));
+    mpz_tdiv_r_ui (z, w, __gmpxx_abs_ui(l));
    }
    static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
    {
@@ -637,65 +815,32 @@ struct __gmp_binary_modulus
        {
          /* if w is bigger than a long then the remainder is l unchanged,
             unless l==LONG_MIN and w==-LONG_MIN in which case it's 0 */
-        mpz_set_si (z, mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? 0 : l);
+        mpz_set_si (z, mpz_cmpabs_ui (w, __gmpxx_abs_ui(l)) == 0 ? 0 : l);
        }
    }
    static void eval(mpz_ptr z, mpz_srcptr w, double d)
-  {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_tdiv_r(z, w, temp);
-    mpz_clear(temp);
-  }
+  {  __GMPXX_TMPZ_D;    mpz_tdiv_r (z, w, temp); }
    static void eval(mpz_ptr z, double d, mpz_srcptr w)
-  {
-    mpz_t temp;
-    mpz_init_set_d(temp, d);
-    mpz_tdiv_r(z, temp, w);
-    mpz_clear(temp);
-  }
+  {  __GMPXX_TMPZ_D;    mpz_tdiv_r (z, temp, w); }
  };
  
-// Max allocations for plain types when converted to mpz_t
-#define __GMP_DBL_LIMBS (2 + DBL_MAX_EXP / GMP_NUMB_BITS)
-#define __GMP_ULI_LIMBS (1 + (8 * sizeof (long) - 1) / GMP_NUMB_BITS)
-
-#define __GMPXX_TMP_UI                                                 \
-  mpz_t temp;                                                          \
-  mp_limb_t limbs[__GMP_ULI_LIMBS];                                    \
-  temp->_mp_d = limbs;                                                 \
-  temp->_mp_alloc = __GMP_ULI_LIMBS;                                   \
-  mpz_set_ui (temp, l)
-#define __GMPXX_TMP_SI                                                 \
-  mpz_t temp;                                                          \
-  mp_limb_t limbs[__GMP_ULI_LIMBS];                                    \
-  temp->_mp_d = limbs;                                                 \
-  temp->_mp_alloc = __GMP_ULI_LIMBS;                                   \
-  mpz_set_si (temp, l)
-#define __GMPXX_TMP_D                                                  \
-  mpz_t temp;                                                          \
-  mp_limb_t limbs[__GMP_DBL_LIMBS];                                    \
-  temp->_mp_d = limbs;                                                 \
-  temp->_mp_alloc = __GMP_DBL_LIMBS;                                   \
-  mpz_set_d (temp, d)
-
  struct __gmp_binary_and
  {
    static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
    { mpz_and(z, w, v); }
  
    static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
-  {  __GMPXX_TMP_UI;   mpz_and (z, w, temp);  }
+  {  __GMPXX_TMPZ_UI;   mpz_and (z, w, temp);  }
    static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
-  {  __GMPXX_TMP_UI;   mpz_and (z, w, temp);  }
+  { eval(z, w, l);  }
    static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
-  {  __GMPXX_TMP_SI;   mpz_and (z, w, temp);  }
+  {  __GMPXX_TMPZ_SI;   mpz_and (z, w, temp);  }
    static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
-  {  __GMPXX_TMP_SI;   mpz_and (z, w, temp);  }
+  { eval(z, w, l);  }
    static void eval(mpz_ptr z, mpz_srcptr w, double d)
-  {  __GMPXX_TMP_D;    mpz_and (z, w, temp); }
+  {  __GMPXX_TMPZ_D;    mpz_and (z, w, temp); }
    static void eval(mpz_ptr z, double d, mpz_srcptr w)
-  {  __GMPXX_TMP_D;    mpz_and (z, w, temp); }
+  { eval(z, w, d);  }
  };
  
  struct __gmp_binary_ior
@@ -703,17 +848,17 @@ struct __gmp_binary_ior
    static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
    { mpz_ior(z, w, v); }
    static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
-  {  __GMPXX_TMP_UI;   mpz_ior (z, w, temp);  }
+  {  __GMPXX_TMPZ_UI;   mpz_ior (z, w, temp);  }
    static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
-  {  __GMPXX_TMP_UI;   mpz_ior (z, w, temp);  }
+  { eval(z, w, l);  }
    static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
-  {  __GMPXX_TMP_SI;   mpz_ior (z, w, temp);  }
+  {  __GMPXX_TMPZ_SI;   mpz_ior (z, w, temp);  }
    static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
-  {  __GMPXX_TMP_SI;   mpz_ior (z, w, temp);  }
+  { eval(z, w, l);  }
    static void eval(mpz_ptr z, mpz_srcptr w, double d)
-  {  __GMPXX_TMP_D;    mpz_ior (z, w, temp); }
+  {  __GMPXX_TMPZ_D;    mpz_ior (z, w, temp); }
    static void eval(mpz_ptr z, double d, mpz_srcptr w)
-  {  __GMPXX_TMP_D;    mpz_ior (z, w, temp); }
+  { eval(z, w, d);  }
  };
  
  struct __gmp_binary_xor
@@ -721,37 +866,17 @@ struct __gmp_binary_xor
    static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
    { mpz_xor(z, w, v); }
    static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
-  {  __GMPXX_TMP_UI;   mpz_xor (z, w, temp);  }
+  {  __GMPXX_TMPZ_UI;   mpz_xor (z, w, temp);  }
    static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
-  {  __GMPXX_TMP_UI;   mpz_xor (z, w, temp);  }
+  { eval(z, w, l);  }
    static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
-  {  __GMPXX_TMP_SI;   mpz_xor (z, w, temp);  }
+  {  __GMPXX_TMPZ_SI;   mpz_xor (z, w, temp);  }
    static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
-  {  __GMPXX_TMP_SI;   mpz_xor (z, w, temp);  }
+  { eval(z, w, l);  }
    static void eval(mpz_ptr z, mpz_srcptr w, double d)
-  {  __GMPXX_TMP_D;    mpz_xor (z, w, temp); }
+  {  __GMPXX_TMPZ_D;    mpz_xor (z, w, temp); }
    static void eval(mpz_ptr z, double d, mpz_srcptr w)
-  {  __GMPXX_TMP_D;    mpz_xor (z, w, temp); }
-};
-
-struct __gmp_binary_lshift
-{
-  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
-  { mpz_mul_2exp(z, w, l); }
-  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
-  { mpq_mul_2exp(q, r, l); }
-  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
-  { mpf_mul_2exp(f, g, l); }
-};
-
-struct __gmp_binary_rshift
-{
-  static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
-  { mpz_fdiv_q_2exp(z, w, l); }
-  static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
-  { mpq_div_2exp(q, r, l); }
-  static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
-  { mpf_div_2exp(f, g, l); }
+  { eval(z, w, d);  }
  };
  
  struct __gmp_binary_equal
@@ -794,13 +919,7 @@ struct __gmp_binary_equal
    }
    static bool eval(double d, mpq_srcptr q)
    {
-    bool b;
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_d(temp, d);
-    b = (mpq_equal(temp, q) != 0);
-    mpq_clear(temp);
-    return b;
+    return eval(q, d);
    }
  
    static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) == 0; }
@@ -810,178 +929,49 @@ struct __gmp_binary_equal
    static bool eval(unsigned long int l, mpf_srcptr f)
    { return mpf_cmp_ui(f, l) == 0; }
    static bool eval(mpf_srcptr f, signed long int l)
-  { return mpf_cmp_si(f, l) == 0; }
-  static bool eval(signed long int l, mpf_srcptr f)
-  { return mpf_cmp_si(f, l) == 0; }
-  static bool eval(mpf_srcptr f, double d)
-  { return mpf_cmp_d(f, d) == 0; }
-  static bool eval(double d, mpf_srcptr f)
-  { return mpf_cmp_d(f, d) == 0; }
-};
-
-struct __gmp_binary_not_equal
-{
-  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) != 0; }
-
-  static bool eval(mpz_srcptr z, unsigned long int l)
-  { return mpz_cmp_ui(z, l) != 0; }
-  static bool eval(unsigned long int l, mpz_srcptr z)
-  { return mpz_cmp_ui(z, l) != 0; }
-  static bool eval(mpz_srcptr z, signed long int l)
-  { return mpz_cmp_si(z, l) != 0; }
-  static bool eval(signed long int l, mpz_srcptr z)
-  { return mpz_cmp_si(z, l) != 0; }
-  static bool eval(mpz_srcptr z, double d)
-  { return mpz_cmp_d(z, d) != 0; }
-  static bool eval(double d, mpz_srcptr z)
-  { return mpz_cmp_d(z, d) != 0; }
-
-  static bool eval(mpq_srcptr q, mpq_srcptr r)
-  { return mpq_equal(q, r) == 0; }
-
-  static bool eval(mpq_srcptr q, unsigned long int l)
-  { return mpq_cmp_ui(q, l, 1) != 0; }
-  static bool eval(unsigned long int l, mpq_srcptr q)
-  { return mpq_cmp_ui(q, l, 1) != 0; }
-  static bool eval(mpq_srcptr q, signed long int l)
-  { return mpq_cmp_si(q, l, 1) != 0; }
-  static bool eval(signed long int l, mpq_srcptr q)
-  { return mpq_cmp_si(q, l, 1) != 0; }
-  static bool eval(mpq_srcptr q, double d)
-  {
-    bool b;
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_d(temp, d);
-    b = (mpq_equal(q, temp) == 0);
-    mpq_clear(temp);
-    return b;
-  }
-  static bool eval(double d, mpq_srcptr q)
-  {
-    bool b;
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_d(temp, d);
-    b = (mpq_equal(temp, q) == 0);
-    mpq_clear(temp);
-    return b;
-  }
-
-  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) != 0; }
-
-  static bool eval(mpf_srcptr f, unsigned long int l)
-  { return mpf_cmp_ui(f, l) != 0; }
-  static bool eval(unsigned long int l, mpf_srcptr f)
-  { return mpf_cmp_ui(f, l) != 0; }
-  static bool eval(mpf_srcptr f, signed long int l)
-  { return mpf_cmp_si(f, l) != 0; }
-  static bool eval(signed long int l, mpf_srcptr f)
-  { return mpf_cmp_si(f, l) != 0; }
-  static bool eval(mpf_srcptr f, double d)
-  { return mpf_cmp_d(f, d) != 0; }
-  static bool eval(double d, mpf_srcptr f)
-  { return mpf_cmp_d(f, d) != 0; }
-};
-
-struct __gmp_binary_less
-{
-  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) < 0; }
-
-  static bool eval(mpz_srcptr z, unsigned long int l)
-  { return mpz_cmp_ui(z, l) < 0; }
-  static bool eval(unsigned long int l, mpz_srcptr z)
-  { return mpz_cmp_ui(z, l) > 0; }
-  static bool eval(mpz_srcptr z, signed long int l)
-  { return mpz_cmp_si(z, l) < 0; }
-  static bool eval(signed long int l, mpz_srcptr z)
-  { return mpz_cmp_si(z, l) > 0; }
-  static bool eval(mpz_srcptr z, double d)
-  { return mpz_cmp_d(z, d) < 0; }
-  static bool eval(double d, mpz_srcptr z)
-  { return mpz_cmp_d(z, d) > 0; }
-
-  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) < 0; }
-
-  static bool eval(mpq_srcptr q, unsigned long int l)
-  { return mpq_cmp_ui(q, l, 1) < 0; }
-  static bool eval(unsigned long int l, mpq_srcptr q)
-  { return mpq_cmp_ui(q, l, 1) > 0; }
-  static bool eval(mpq_srcptr q, signed long int l)
-  { return mpq_cmp_si(q, l, 1) < 0; }
-  static bool eval(signed long int l, mpq_srcptr q)
-  { return mpq_cmp_si(q, l, 1) > 0; }
-  static bool eval(mpq_srcptr q, double d)
-  {
-    bool b;
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_d(temp, d);
-    b = (mpq_cmp(q, temp) < 0);
-    mpq_clear(temp);
-    return b;
-  }
-  static bool eval(double d, mpq_srcptr q)
-  {
-    bool b;
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_d(temp, d);
-    b = (mpq_cmp(temp, q) < 0);
-    mpq_clear(temp);
-    return b;
-  }
-
-  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) < 0; }
-
-  static bool eval(mpf_srcptr f, unsigned long int l)
-  { return mpf_cmp_ui(f, l) < 0; }
-  static bool eval(unsigned long int l, mpf_srcptr f)
-  { return mpf_cmp_ui(f, l) > 0; }
-  static bool eval(mpf_srcptr f, signed long int l)
-  { return mpf_cmp_si(f, l) < 0; }
+  { return mpf_cmp_si(f, l) == 0; }
    static bool eval(signed long int l, mpf_srcptr f)
-  { return mpf_cmp_si(f, l) > 0; }
+  { return mpf_cmp_si(f, l) == 0; }
    static bool eval(mpf_srcptr f, double d)
-  { return mpf_cmp_d(f, d) < 0; }
+  { return mpf_cmp_d(f, d) == 0; }
    static bool eval(double d, mpf_srcptr f)
-  { return mpf_cmp_d(f, d) > 0; }
+  { return mpf_cmp_d(f, d) == 0; }
  };
  
-struct __gmp_binary_less_equal
+struct __gmp_binary_less
  {
-  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) <= 0; }
+  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) < 0; }
  
    static bool eval(mpz_srcptr z, unsigned long int l)
-  { return mpz_cmp_ui(z, l) <= 0; }
+  { return mpz_cmp_ui(z, l) < 0; }
    static bool eval(unsigned long int l, mpz_srcptr z)
-  { return mpz_cmp_ui(z, l) >= 0; }
+  { return mpz_cmp_ui(z, l) > 0; }
    static bool eval(mpz_srcptr z, signed long int l)
-  { return mpz_cmp_si(z, l) <= 0; }
+  { return mpz_cmp_si(z, l) < 0; }
    static bool eval(signed long int l, mpz_srcptr z)
-  { return mpz_cmp_si(z, l) >= 0; }
+  { return mpz_cmp_si(z, l) > 0; }
    static bool eval(mpz_srcptr z, double d)
-  { return mpz_cmp_d(z, d) <= 0; }
+  { return mpz_cmp_d(z, d) < 0; }
    static bool eval(double d, mpz_srcptr z)
-  { return mpz_cmp_d(z, d) >= 0; }
+  { return mpz_cmp_d(z, d) > 0; }
  
-  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) <= 0; }
+  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) < 0; }
  
    static bool eval(mpq_srcptr q, unsigned long int l)
-  { return mpq_cmp_ui(q, l, 1) <= 0; }
+  { return mpq_cmp_ui(q, l, 1) < 0; }
    static bool eval(unsigned long int l, mpq_srcptr q)
-  { return mpq_cmp_ui(q, l, 1) >= 0; }
+  { return mpq_cmp_ui(q, l, 1) > 0; }
    static bool eval(mpq_srcptr q, signed long int l)
-  { return mpq_cmp_si(q, l, 1) <= 0; }
+  { return mpq_cmp_si(q, l, 1) < 0; }
    static bool eval(signed long int l, mpq_srcptr q)
-  { return mpq_cmp_si(q, l, 1) >= 0; }
+  { return mpq_cmp_si(q, l, 1) > 0; }
    static bool eval(mpq_srcptr q, double d)
    {
      bool b;
      mpq_t temp;
      mpq_init(temp);
      mpq_set_d(temp, d);
-    b = (mpq_cmp(q, temp) <= 0);
+    b = (mpq_cmp(q, temp) < 0);
      mpq_clear(temp);
      return b;
    }
@@ -991,25 +981,25 @@ struct __gmp_binary_less_equal
      mpq_t temp;
      mpq_init(temp);
      mpq_set_d(temp, d);
-    b = (mpq_cmp(temp, q) <= 0);
+    b = (mpq_cmp(temp, q) < 0);
      mpq_clear(temp);
      return b;
    }
  
-  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) <= 0; }
+  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) < 0; }
  
    static bool eval(mpf_srcptr f, unsigned long int l)
-  { return mpf_cmp_ui(f, l) <= 0; }
+  { return mpf_cmp_ui(f, l) < 0; }
    static bool eval(unsigned long int l, mpf_srcptr f)
-  { return mpf_cmp_ui(f, l) >= 0; }
+  { return mpf_cmp_ui(f, l) > 0; }
    static bool eval(mpf_srcptr f, signed long int l)
-  { return mpf_cmp_si(f, l) <= 0; }
+  { return mpf_cmp_si(f, l) < 0; }
    static bool eval(signed long int l, mpf_srcptr f)
-  { return mpf_cmp_si(f, l) >= 0; }
+  { return mpf_cmp_si(f, l) > 0; }
    static bool eval(mpf_srcptr f, double d)
-  { return mpf_cmp_d(f, d) <= 0; }
+  { return mpf_cmp_d(f, d) < 0; }
    static bool eval(double d, mpf_srcptr f)
-  { return mpf_cmp_d(f, d) >= 0; }
+  { return mpf_cmp_d(f, d) > 0; }
  };
  
  struct __gmp_binary_greater
@@ -1076,70 +1066,6 @@ struct __gmp_binary_greater
    { return mpf_cmp_d(f, d) < 0; }
  };
  
-struct __gmp_binary_greater_equal
-{
-  static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) >= 0; }
-
-  static bool eval(mpz_srcptr z, unsigned long int l)
-  { return mpz_cmp_ui(z, l) >= 0; }
-  static bool eval(unsigned long int l, mpz_srcptr z)
-  { return mpz_cmp_ui(z, l) <= 0; }
-  static bool eval(mpz_srcptr z, signed long int l)
-  { return mpz_cmp_si(z, l) >= 0; }
-  static bool eval(signed long int l, mpz_srcptr z)
-  { return mpz_cmp_si(z, l) <= 0; }
-  static bool eval(mpz_srcptr z, double d)
-  { return mpz_cmp_d(z, d) >= 0; }
-  static bool eval(double d, mpz_srcptr z)
-  { return mpz_cmp_d(z, d) <= 0; }
-
-  static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) >= 0; }
-
-  static bool eval(mpq_srcptr q, unsigned long int l)
-  { return mpq_cmp_ui(q, l, 1) >= 0; }
-  static bool eval(unsigned long int l, mpq_srcptr q)
-  { return mpq_cmp_ui(q, l, 1) <= 0; }
-  static bool eval(mpq_srcptr q, signed long int l)
-  { return mpq_cmp_si(q, l, 1) >= 0; }
-  static bool eval(signed long int l, mpq_srcptr q)
-  { return mpq_cmp_si(q, l, 1) <= 0; }
-  static bool eval(mpq_srcptr q, double d)
-  {
-    bool b;
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_d(temp, d);
-    b = (mpq_cmp(q, temp) >= 0);
-    mpq_clear(temp);
-    return b;
-  }
-  static bool eval(double d, mpq_srcptr q)
-  {
-    bool b;
-    mpq_t temp;
-    mpq_init(temp);
-    mpq_set_d(temp, d);
-    b = (mpq_cmp(temp, q) >= 0);
-    mpq_clear(temp);
-    return b;
-  }
-
-  static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) >= 0; }
-
-  static bool eval(mpf_srcptr f, unsigned long int l)
-  { return mpf_cmp_ui(f, l) >= 0; }
-  static bool eval(unsigned long int l, mpf_srcptr f)
-  { return mpf_cmp_ui(f, l) <= 0; }
-  static bool eval(mpf_srcptr f, signed long int l)
-  { return mpf_cmp_si(f, l) >= 0; }
-  static bool eval(signed long int l, mpf_srcptr f)
-  { return mpf_cmp_si(f, l) <= 0; }
-  static bool eval(mpf_srcptr f, double d)
-  { return mpf_cmp_d(f, d) >= 0; }
-  static bool eval(double d, mpf_srcptr f)
-  { return mpf_cmp_d(f, d) <= 0; }
-};
-
  struct __gmp_unary_increment
  {
    static void eval(mpz_ptr z) { mpz_add_ui(z, z, 1); }
@@ -1209,16 +1135,7 @@ struct __gmp_hypot_function
      mpf_clear(temp);
    }
    static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
-  {
-    mpf_t temp;
-    mpf_init2(temp, mpf_get_prec(f));
-    mpf_mul(temp, g, g);
-    mpf_set_ui(f, l);
-    mpf_mul(f, f, f);
-    mpf_add(f, f, temp);
-    mpf_sqrt(f, f);
-    mpf_clear(temp);
-  }
+  { eval(f, g, l); }
    static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
    {
      mpf_t temp;
@@ -1231,16 +1148,7 @@ struct __gmp_hypot_function
      mpf_clear(temp);
    }
    static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
-  {
-    mpf_t temp;
-    mpf_init2(temp, mpf_get_prec(f));
-    mpf_mul(temp, g, g);
-    mpf_set_si(f, l);
-    mpf_mul(f, f, f);
-    mpf_add(f, f, temp);
-    mpf_sqrt(f, f);
-    mpf_clear(temp);
-  }
+  { eval(f, g, l); }
    static void eval(mpf_ptr f, mpf_srcptr g, double d)
    {
      mpf_t temp;
@@ -1253,16 +1161,7 @@ struct __gmp_hypot_function
      mpf_clear(temp);
    }
    static void eval(mpf_ptr f, double d, mpf_srcptr g)
-  {
-    mpf_t temp;
-    mpf_init2(temp, mpf_get_prec(f));
-    mpf_mul(temp, g, g);
-    mpf_set_d(f, d);
-    mpf_mul(f, f, f);
-    mpf_add(f, f, temp);
-    mpf_sqrt(f, f);
-    mpf_clear(temp);
-  }
+  { eval(f, g, d); }
  };
  
  struct __gmp_sgn_function
@@ -1338,7 +1237,7 @@ struct __gmp_cmp_function
  
  struct __gmp_rand_function
  {
-  static void eval(mpz_ptr z, gmp_randstate_t s, unsigned long int l)
+  static void eval(mpz_ptr z, gmp_randstate_t s, mp_bitcnt_t l)
    { mpz_urandomb(z, s, l); }
    static void eval(mpz_ptr z, gmp_randstate_t s, mpz_srcptr w)
    { mpz_urandomm(z, s, w); }
@@ -1396,6 +1295,7 @@ struct __gmp_resolve_expr<mpz_t>
  {
    typedef mpz_t value_type;
    typedef mpz_ptr ptr_type;
+  typedef mpz_srcptr srcptr_type;
  };
  
  template <>
@@ -1403,6 +1303,7 @@ struct __gmp_resolve_expr<mpq_t>
  {
    typedef mpq_t value_type;
    typedef mpq_ptr ptr_type;
+  typedef mpq_srcptr srcptr_type;
  };
  
  template <>
@@ -1410,6 +1311,7 @@ struct __gmp_resolve_expr<mpf_t>
  {
    typedef mpf_t value_type;
    typedef mpf_ptr ptr_type;
+  typedef mpf_srcptr srcptr_type;
  };
  
  template <>
@@ -1448,20 +1350,49 @@ struct __gmp_resolve_expr<mpf_t, mpq_t>
    typedef mpf_t value_type;
  };
  
+#if __GMPXX_USE_CXX11
+namespace std {
+  template <class T, class U, class V, class W>
+  struct common_type <__gmp_expr<T, U>, __gmp_expr<V, W> >
+  {
+  private:
+    typedef typename __gmp_resolve_expr<T, V>::value_type X;
+  public:
+    typedef __gmp_expr<X, X> type;
+  };
  
-
-template <class T, class U, class V>
-struct __gmp_resolve_temp
-{
-  typedef __gmp_expr<T, T> temp_type;
-};
-
-template <class T>
-struct __gmp_resolve_temp<T, T, T>
-{
-  typedef const __gmp_expr<T, T> & temp_type;
-};
-
+  template <class T, class U>
+  struct common_type <__gmp_expr<T, U>, __gmp_expr<T, U> >
+  {
+    typedef __gmp_expr<T, U> type;
+  };
+
+#define __GMPXX_DECLARE_COMMON_TYPE(typ)       \
+  template <class T, class U>                  \
+  struct common_type <__gmp_expr<T, U>, typ >  \
+  {                                            \
+    typedef __gmp_expr<T, T> type;             \
+  };                                           \
+                                               \
+  template <class T, class U>                  \
+  struct common_type <typ, __gmp_expr<T, U> >  \
+  {                                            \
+    typedef __gmp_expr<T, T> type;             \
+  }
+
+  __GMPXX_DECLARE_COMMON_TYPE(signed char);
+  __GMPXX_DECLARE_COMMON_TYPE(unsigned char);
+  __GMPXX_DECLARE_COMMON_TYPE(signed int);
+  __GMPXX_DECLARE_COMMON_TYPE(unsigned int);
+  __GMPXX_DECLARE_COMMON_TYPE(signed short int);
+  __GMPXX_DECLARE_COMMON_TYPE(unsigned short int);
+  __GMPXX_DECLARE_COMMON_TYPE(signed long int);
+  __GMPXX_DECLARE_COMMON_TYPE(unsigned long int);
+  __GMPXX_DECLARE_COMMON_TYPE(float);
+  __GMPXX_DECLARE_COMMON_TYPE(double);
+#undef __GMPXX_DECLARE_COMMON_TYPE
+}
+#endif
  
  // classes for evaluating unary and binary expressions
  template <class T, class Op>
@@ -1486,14 +1417,6 @@ private:
  };
  
  
-// functions for evaluating expressions
-template <class T, class U>
-void __gmp_set_expr(mpz_ptr, const __gmp_expr<T, U> &);
-template <class T, class U>
-void __gmp_set_expr(mpq_ptr, const __gmp_expr<T, U> &);
-template <class T, class U>
-void __gmp_set_expr(mpf_ptr, const __gmp_expr<T, U> &);
-
  
  /**************** Macros for in-class declarations ****************/
  /* This is just repetitive code that is easier to maintain if it's written
@@ -1514,14 +1437,14 @@ void __gmp_set_expr(mpf_ptr, const __gmp_expr<T, U> &);
    __gmp_expr & fun(unsigned long int);        \
    __gmp_expr & fun(float);                    \
    __gmp_expr & fun(double);                   \
-  __gmp_expr & fun(long double);
+  /* __gmp_expr & fun(long double); */
  
  #define __GMP_DECLARE_COMPOUND_OPERATOR(fun) \
  __GMPP_DECLARE_COMPOUND_OPERATOR(fun)        \
  __GMPN_DECLARE_COMPOUND_OPERATOR(fun)
  
  #define __GMP_DECLARE_COMPOUND_OPERATOR_UI(fun) \
-  __gmp_expr & fun(unsigned long int);
+  __gmp_expr & fun(mp_bitcnt_t);
  
  #define __GMP_DECLARE_INCREMENT_OPERATOR(fun) \
    inline __gmp_expr & fun();                  \
@@ -1543,8 +1466,15 @@ public:
    __gmp_expr() { mpz_init(mp); }
  
    __gmp_expr(const __gmp_expr &z) { mpz_init_set(mp, z.mp); }
+#if __GMPXX_USE_CXX11
+  __gmp_expr(__gmp_expr &&z)
+  { *mp = *z.mp; mpz_init(z.mp); }
+#endif
+  template <class T>
+  __gmp_expr(const __gmp_expr<mpz_t, T> &expr)
+  { mpz_init(mp); __gmp_set_expr(mp, expr); }
    template <class T, class U>
-  __gmp_expr(const __gmp_expr<T, U> &expr)
+  explicit __gmp_expr(const __gmp_expr<T, U> &expr)
    { mpz_init(mp); __gmp_set_expr(mp, expr); }
  
    __gmp_expr(signed char c) { mpz_init_set_si(mp, c); }
@@ -1563,15 +1493,7 @@ public:
    __gmp_expr(double d) { mpz_init_set_d(mp, d); }
    // __gmp_expr(long double ld) { mpz_init_set_d(mp, ld); }
  
-  explicit __gmp_expr(const char *s)
-  {
-    if (mpz_init_set_str (mp, s, 0) != 0)
-      {
-        mpz_clear (mp);
-        throw std::invalid_argument ("mpz_set_str");
-      }
-  }
-  __gmp_expr(const char *s, int base)
+  explicit __gmp_expr(const char *s, int base = 0)
    {
      if (mpz_init_set_str (mp, s, base) != 0)
        {
@@ -1579,15 +1501,7 @@ public:
          throw std::invalid_argument ("mpz_set_str");
        }
    }
-  explicit __gmp_expr(const std::string &s)
-  {
-    if (mpz_init_set_str (mp, s.c_str(), 0) != 0)
-      {
-        mpz_clear (mp);
-        throw std::invalid_argument ("mpz_set_str");
-      }
-  }
-  __gmp_expr(const std::string &s, int base)
+  explicit __gmp_expr(const std::string &s, int base = 0)
    {
      if (mpz_init_set_str(mp, s.c_str(), base) != 0)
        {
@@ -1600,9 +1514,15 @@ public:
  
    ~__gmp_expr() { mpz_clear(mp); }
  
+  void swap(__gmp_expr& z) __GMPXX_NOEXCEPT { std::swap(*mp, *z.mp); }
+
    // assignment operators
    __gmp_expr & operator=(const __gmp_expr &z)
    { mpz_set(mp, z.mp); return *this; }
+#if __GMPXX_USE_CXX11
+  __gmp_expr & operator=(__gmp_expr &&z) noexcept
+  { swap(z); return *this; }
+#endif
    template <class T, class U>
    __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
    { __gmp_set_expr(mp, expr); return *this; }
@@ -1674,6 +1594,10 @@ public:
    // bool fits_double_p() const { return mpz_fits_double_p(mp); }
    // bool fits_ldouble_p() const { return mpz_fits_ldouble_p(mp); }
  
+#if __GMPXX_USE_CXX11
+  explicit operator bool() const { return mp->_mp_size != 0; }
+#endif
+
    // member operators
    __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
    __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
@@ -1710,9 +1634,23 @@ public:
    // constructors and destructor
    __gmp_expr() { mpq_init(mp); }
  
-  __gmp_expr(const __gmp_expr &q) { mpq_init(mp); mpq_set(mp, q.mp); }
+  __gmp_expr(const __gmp_expr &q)
+  {
+    mpz_init_set(mpq_numref(mp), mpq_numref(q.mp));
+    mpz_init_set(mpq_denref(mp), mpq_denref(q.mp));
+  }
+#if __GMPXX_USE_CXX11
+  __gmp_expr(__gmp_expr &&q)
+  { *mp = *q.mp; mpq_init(q.mp); }
+#endif
+  template <class T>
+  __gmp_expr(const __gmp_expr<mpz_t, T> &expr)
+  { mpq_init(mp); __gmp_set_expr(mp, expr); }
+  template <class T>
+  __gmp_expr(const __gmp_expr<mpq_t, T> &expr)
+  { mpq_init(mp); __gmp_set_expr(mp, expr); }
    template <class T, class U>
-  __gmp_expr(const __gmp_expr<T, U> &expr)
+  explicit __gmp_expr(const __gmp_expr<T, U> &expr)
    { mpq_init(mp); __gmp_set_expr(mp, expr); }
  
    __gmp_expr(signed char c) { mpq_init(mp); mpq_set_si(mp, c, 1); }
@@ -1731,34 +1669,23 @@ public:
    __gmp_expr(double d) { mpq_init(mp); mpq_set_d(mp, d); }
    // __gmp_expr(long double ld) { mpq_init(mp); mpq_set_ld(mp, ld); }
  
-  explicit __gmp_expr(const char *s)
-  {
-    mpq_init (mp);
-    if (mpq_set_str (mp, s, 0) != 0)
-      {
-        mpq_clear (mp);
-        throw std::invalid_argument ("mpq_set_str");
-      }
-  }
-  __gmp_expr(const char *s, int base)
+  explicit __gmp_expr(const char *s, int base = 0)
    {
      mpq_init (mp);
-    if (mpq_set_str(mp, s, base) != 0)
+    // If s is the literal 0, we meant to call another constructor.
+    // If s just happens to evaluate to 0, we would crash, so whatever.
+    if (s == 0)
        {
-        mpq_clear (mp);
-        throw std::invalid_argument ("mpq_set_str");
+       // Don't turn mpq_class(0,0) into 0
+       mpz_set_si(mpq_denref(mp), base);
        }
-  }
-  explicit __gmp_expr(const std::string &s)
-  {
-    mpq_init (mp);
-    if (mpq_set_str (mp, s.c_str(), 0) != 0)
+    else if (mpq_set_str(mp, s, base) != 0)
        {
          mpq_clear (mp);
          throw std::invalid_argument ("mpq_set_str");
        }
    }
-  __gmp_expr(const std::string &s, int base)
+  explicit __gmp_expr(const std::string &s, int base = 0)
    {
      mpq_init(mp);
      if (mpq_set_str (mp, s.c_str(), base) != 0)
@@ -1767,20 +1694,31 @@ public:
          throw std::invalid_argument ("mpq_set_str");
        }
    }
-  explicit __gmp_expr(mpq_srcptr q) { mpq_init(mp); mpq_set(mp, q); }
+  explicit __gmp_expr(mpq_srcptr q)
+  {
+    mpz_init_set(mpq_numref(mp), mpq_numref(q));
+    mpz_init_set(mpq_denref(mp), mpq_denref(q));
+  }
  
    __gmp_expr(const mpz_class &num, const mpz_class &den)
    {
-    mpq_init(mp);
-    mpz_set(mpq_numref(mp), num.get_mpz_t());
-    mpz_set(mpq_denref(mp), den.get_mpz_t());
+    mpz_init_set(mpq_numref(mp), num.get_mpz_t());
+    mpz_init_set(mpq_denref(mp), den.get_mpz_t());
    }
  
    ~__gmp_expr() { mpq_clear(mp); }
  
+  void swap(__gmp_expr& q) __GMPXX_NOEXCEPT { std::swap(*mp, *q.mp); }
+
    // assignment operators
    __gmp_expr & operator=(const __gmp_expr &q)
    { mpq_set(mp, q.mp); return *this; }
+#if __GMPXX_USE_CXX11
+  __gmp_expr & operator=(__gmp_expr &&q) noexcept
+  { swap(q); return *this; }
+  __gmp_expr & operator=(mpz_class &&z) noexcept
+  { get_num() = std::move(z); get_den() = 1u; return *this; }
+#endif
    template <class T, class U>
    __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
    { __gmp_set_expr(mp, expr); return *this; }
@@ -1859,6 +1797,10 @@ public:
  
    double get_d() const { return mpq_get_d(mp); }
  
+#if __GMPXX_USE_CXX11
+  explicit operator bool() const { return mpq_numref(mp)->_mp_size != 0; }
+#endif
+
    // compound assignments
    __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
    __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
@@ -1894,6 +1836,10 @@ public:
  
    __gmp_expr(const __gmp_expr &f)
    { mpf_init2(mp, f.get_prec()); mpf_set(mp, f.mp); }
+#if __GMPXX_USE_CXX11
+  __gmp_expr(__gmp_expr &&f)
+  { *mp = *f.mp; mpf_init2(f.mp, get_prec()); }
+#endif
    __gmp_expr(const __gmp_expr &f, mp_bitcnt_t prec)
    { mpf_init2(mp, prec); mpf_set(mp, f.mp); }
    template <class T, class U>
@@ -1983,9 +1929,15 @@ public:
  
    ~__gmp_expr() { mpf_clear(mp); }
  
+  void swap(__gmp_expr& f) __GMPXX_NOEXCEPT { std::swap(*mp, *f.mp); }
+
    // assignment operators
    __gmp_expr & operator=(const __gmp_expr &f)
    { mpf_set(mp, f.mp); return *this; }
+#if __GMPXX_USE_CXX11
+  __gmp_expr & operator=(__gmp_expr &&f) noexcept
+  { swap(f); return *this; }
+#endif
    template <class T, class U>
    __gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
    { __gmp_set_expr(mp, expr); return *this; }
@@ -2057,6 +2009,10 @@ public:
    // bool fits_double_p() const { return mpf_fits_double_p(mp); }
    // bool fits_ldouble_p() const { return mpf_fits_ldouble_p(mp); }
  
+#if __GMPXX_USE_CXX11
+  explicit operator bool() const { return mp->_mp_size != 0; }
+#endif
+
    // compound assignments
    __GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
    __GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
@@ -2074,43 +2030,58 @@ typedef __gmp_expr<mpf_t, mpf_t> mpf_class;
  
  
  
-/**************** I/O operators ****************/
+/**************** User-defined literals ****************/
  
-// these should (and will) be provided separately
+#if __GMPXX_USE_CXX11
+inline mpz_class operator"" _mpz(const char* s)
+{
+  return mpz_class(s);
+}
  
-template <class T>
-inline std::ostream & operator<<
-(std::ostream &o, const __gmp_expr<T, T> &expr)
+inline mpq_class operator"" _mpq(const char* s)
+{
+  mpq_class q;
+  q.get_num() = s;
+  return q;
+}
+
+inline mpf_class operator"" _mpf(const char* s)
  {
-  return o << expr.__get_mp();
+  return mpf_class(s);
  }
+#endif
+
+/**************** I/O operators ****************/
+
+// these should (and will) be provided separately
  
  template <class T, class U>
  inline std::ostream & operator<<
  (std::ostream &o, const __gmp_expr<T, U> &expr)
  {
-  __gmp_expr<T, T> temp(expr);
+  __gmp_expr<T, T> const& temp(expr);
    return o << temp.__get_mp();
  }
  
-
  template <class T>
  inline std::istream & operator>>(std::istream &i, __gmp_expr<T, T> &expr)
  {
    return i >> expr.__get_mp();
  }
  
+/*
+// you might want to uncomment this
  inline std::istream & operator>>(std::istream &i, mpq_class &q)
  {
    i >> q.get_mpq_t();
-  // q.canonicalize(); // you might want to uncomment this
+  q.canonicalize();
    return i;
  }
+*/
  
  
  /**************** Functions for type conversion ****************/
  
-template <>
  inline void __gmp_set_expr(mpz_ptr z, const mpz_class &w)
  {
    mpz_set(z, w.get_mpz_t());
@@ -2122,33 +2093,20 @@ inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpz_t, T> &expr)
    expr.eval(z);
  }
  
-template <>
-inline void __gmp_set_expr(mpz_ptr z, const mpq_class &q)
-{
-  mpz_set_q(z, q.get_mpq_t());
-}
-
  template <class T>
  inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpq_t, T> &expr)
  {
-  mpq_class temp(expr);
+  mpq_class const& temp(expr);
    mpz_set_q(z, temp.get_mpq_t());
  }
  
-template <class T>
-inline void __gmp_set_expr(mpz_ptr z, const mpf_class &f)
-{
-  mpz_set_f(z, f.get_mpf_t());
-}
-
  template <class T>
  inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpf_t, T> &expr)
  {
-  mpf_class temp(expr);
+  mpf_class const& temp(expr);
    mpz_set_f(z, temp.get_mpf_t());
  }
  
-template <>
  inline void __gmp_set_expr(mpq_ptr q, const mpz_class &z)
  {
    mpq_set_z(q, z.get_mpz_t());
@@ -2157,11 +2115,10 @@ inline void __gmp_set_expr(mpq_ptr q, const mpz_class &z)
  template <class T>
  inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpz_t, T> &expr)
  {
-  mpz_class temp(expr);
-  mpq_set_z(q, temp.get_mpz_t());
+  __gmp_set_expr(mpq_numref(q), expr);
+  mpz_set_ui(mpq_denref(q), 1);
  }
  
-template <>
  inline void __gmp_set_expr(mpq_ptr q, const mpq_class &r)
  {
    mpq_set(q, r.get_mpq_t());
@@ -2173,46 +2130,27 @@ inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpq_t, T> &expr)
    expr.eval(q);
  }
  
-template <class T>
-inline void __gmp_set_expr(mpq_ptr q, const mpf_class &f)
-{
-  mpq_set_f(q, f.get_mpf_t());
-}
-
  template <class T>
  inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpf_t, T> &expr)
  {
-  mpf_class temp(expr);
+  mpf_class const& temp(expr);
    mpq_set_f(q, temp.get_mpf_t());
  }
  
-template <class T>
-inline void __gmp_set_expr(mpf_ptr f, const mpz_class &z)
-{
-  mpf_set_z(f, z.get_mpz_t());
-}
-
  template <class T>
  inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpz_t, T> &expr)
  {
-  mpz_class temp(expr);
+  mpz_class const& temp(expr);
    mpf_set_z(f, temp.get_mpz_t());
  }
  
-template <class T>
-inline void __gmp_set_expr(mpf_ptr f, const mpq_class &q)
-{
-  mpf_set_q(f, q.get_mpq_t());
-}
-
  template <class T>
  inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpq_t, T> &expr)
  {
-  mpq_class temp(expr);
+  mpq_class const& temp(expr);
    mpf_set_q(f, temp.get_mpq_t());
  }
  
-template <>
  inline void __gmp_set_expr(mpf_ptr f, const mpf_class &g)
  {
    mpf_set(f, g.get_mpf_t());
@@ -2221,10 +2159,33 @@ inline void __gmp_set_expr(mpf_ptr f, const mpf_class &g)
  template <class T>
  inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpf_t, T> &expr)
  {
-  expr.eval(f, mpf_get_prec(f));
+  expr.eval(f);
  }
  
  
+/* Temporary objects */
+
+template <class T>
+class __gmp_temp
+{
+  __gmp_expr<T, T> val;
+  public:
+  template<class U, class V>
+  __gmp_temp(U const& u, V) : val (u) {}
+  typename __gmp_resolve_expr<T>::srcptr_type
+  __get_mp() const { return val.__get_mp(); }
+};
+
+template <>
+class __gmp_temp <mpf_t>
+{
+  mpf_class val;
+  public:
+  template<class U>
+  __gmp_temp(U const& u, mpf_ptr res) : val (u, mpf_get_prec(res)) {}
+  mpf_srcptr __get_mp() const { return val.__get_mp(); }
+};
+
  /**************** Specializations of __gmp_expr ****************/
  /* The eval() method of __gmp_expr<T, U> evaluates the corresponding
     expression and assigns the result to its argument, which is either an
@@ -2252,12 +2213,11 @@ private:
  
    __gmp_unary_expr<val_type, Op> expr;
  public:
-  __gmp_expr(const val_type &val) : expr(val) { }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           unsigned long int = 0) const
+  explicit __gmp_expr(const val_type &val) : expr(val) { }
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    { Op::eval(p, expr.val.__get_mp()); }
    const val_type & get_val() const { return expr.val; }
-  unsigned long int get_prec() const { return expr.val.get_prec(); }
+  mp_bitcnt_t get_prec() const { return expr.val.get_prec(); }
  };
  
  
@@ -2271,14 +2231,11 @@ private:
  
    __gmp_unary_expr<val_type, Op> expr;
  public:
-  __gmp_expr(const val_type &val) : expr(val) { }
+  explicit __gmp_expr(const val_type &val) : expr(val) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
-  { __gmp_expr<T, T> temp(expr.val); Op::eval(p, temp.__get_mp()); }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  { __gmp_expr<T, T> temp(expr.val, prec); Op::eval(p, temp.__get_mp()); }
+  { expr.val.eval(p); Op::eval(p, p); }
    const val_type & get_val() const { return expr.val; }
-  unsigned long int get_prec() const { return expr.val.get_prec(); }
+  mp_bitcnt_t get_prec() const { return expr.val.get_prec(); }
  };
  
  
@@ -2306,12 +2263,11 @@ private:
  public:
    __gmp_expr(const val1_type &val1, const val2_type &val2)
      : expr(val1, val2) { }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           unsigned long int = 0) const
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    { Op::eval(p, expr.val1.__get_mp(), expr.val2.__get_mp()); }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const
+  mp_bitcnt_t get_prec() const
    {
      mp_bitcnt_t prec1 = expr.val1.get_prec(),
        prec2 = expr.val2.get_prec();
@@ -2333,12 +2289,11 @@ private:
  public:
    __gmp_expr(const val1_type &val1, const val2_type &val2)
      : expr(val1, val2) { }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           unsigned long int = 0) const
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    { Op::eval(p, expr.val1.__get_mp(), expr.val2); }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const { return expr.val1.get_prec(); }
+  mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); }
  };
  
  template <class T, class U, class Op>
@@ -2352,12 +2307,11 @@ private:
  public:
    __gmp_expr(const val1_type &val1, const val2_type &val2)
      : expr(val1, val2) { }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           unsigned long int = 0) const
+  void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    { Op::eval(p, expr.val1, expr.val2.__get_mp()); }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const { return expr.val2.get_prec(); }
+  mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); }
  };
  
  
@@ -2377,18 +2331,20 @@ public:
      : expr(val1, val2) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    {
-    __gmp_expr<T, T> temp(expr.val2);
-    Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
-  }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  {
-    __gmp_expr<T, T> temp(expr.val2, prec);
-    Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+    if(p != expr.val1.__get_mp())
+    {
+      __gmp_set_expr(p, expr.val2);
+      Op::eval(p, expr.val1.__get_mp(), p);
+    }
+    else
+    {
+      __gmp_temp<T> temp(expr.val2, p);
+      Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+    }
    }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const
+  mp_bitcnt_t get_prec() const
    {
      mp_bitcnt_t prec1 = expr.val1.get_prec(),
        prec2 = expr.val2.get_prec();
@@ -2410,18 +2366,20 @@ public:
      : expr(val1, val2) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    {
-    __gmp_expr<T, T> temp(expr.val1);
-    Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
-  }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  {
-    __gmp_expr<T, T> temp(expr.val1, prec);
-    Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+    if(p != expr.val2.__get_mp())
+    {
+      __gmp_set_expr(p, expr.val1);
+      Op::eval(p, p, expr.val2.__get_mp());
+    }
+    else
+    {
+      __gmp_temp<T> temp(expr.val1, p);
+      Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+    }
    }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const
+  mp_bitcnt_t get_prec() const
    {
      mp_bitcnt_t prec1 = expr.val1.get_prec(),
        prec2 = expr.val2.get_prec();
@@ -2443,18 +2401,20 @@ public:
      : expr(val1, val2) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    {
-    __gmp_expr<T, T> temp(expr.val2);
-    Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
-  }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  {
-    __gmp_expr<T, T> temp(expr.val2, prec);
-    Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+    if(p != expr.val1.__get_mp())
+    {
+      __gmp_set_expr(p, expr.val2);
+      Op::eval(p, expr.val1.__get_mp(), p);
+    }
+    else
+    {
+      __gmp_temp<T> temp(expr.val2, p);
+      Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+    }
    }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const
+  mp_bitcnt_t get_prec() const
    {
      mp_bitcnt_t prec1 = expr.val1.get_prec(),
        prec2 = expr.val2.get_prec();
@@ -2476,18 +2436,20 @@ public:
      : expr(val1, val2) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    {
-    __gmp_expr<T, T> temp(expr.val1);
-    Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
-  }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  {
-    __gmp_expr<T, T> temp(expr.val1, prec);
-    Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+    if(p != expr.val2.__get_mp())
+    {
+      __gmp_set_expr(p, expr.val1);
+      Op::eval(p, p, expr.val2.__get_mp());
+    }
+    else
+    {
+      __gmp_temp<T> temp(expr.val1, p);
+      Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+    }
    }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const
+  mp_bitcnt_t get_prec() const
    {
      mp_bitcnt_t prec1 = expr.val1.get_prec(),
        prec2 = expr.val2.get_prec();
@@ -2511,18 +2473,12 @@ public:
      : expr(val1, val2) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    {
-    __gmp_expr<T, T> temp(expr.val1);
-    Op::eval(p, temp.__get_mp(), expr.val2);
-  }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  {
-    __gmp_expr<T, T> temp(expr.val1, prec);
-    Op::eval(p, temp.__get_mp(), expr.val2);
+    expr.val1.eval(p);
+    Op::eval(p, p, expr.val2);
    }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const { return expr.val1.get_prec(); }
+  mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); }
  };
  
  template <class T, class U, class V, class Op>
@@ -2538,18 +2494,12 @@ public:
      : expr(val1, val2) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    {
-    __gmp_expr<T, T> temp(expr.val2);
-    Op::eval(p, expr.val1, temp.__get_mp());
-  }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  {
-    __gmp_expr<T, T> temp(expr.val2, prec);
-    Op::eval(p, expr.val1, temp.__get_mp());
+    expr.val2.eval(p);
+    Op::eval(p, expr.val1, p);
    }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const { return expr.val2.get_prec(); }
+  mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); }
  };
  
  
@@ -2569,18 +2519,13 @@ public:
      : expr(val1, val2) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    {
-    __gmp_expr<T, T> temp1(expr.val1), temp2(expr.val2);
-    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
-  }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  {
-    __gmp_expr<T, T> temp1(expr.val1, prec), temp2(expr.val2, prec);
-    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+    __gmp_temp<T> temp2(expr.val2, p);
+    expr.val1.eval(p);
+    Op::eval(p, p, temp2.__get_mp());
    }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const
+  mp_bitcnt_t get_prec() const
    {
      mp_bitcnt_t prec1 = expr.val1.get_prec(),
        prec2 = expr.val2.get_prec();
@@ -2602,18 +2547,13 @@ public:
      : expr(val1, val2) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    {
-    __gmp_expr<T, T> temp1(expr.val1), temp2(expr.val2);
-    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
-  }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  {
-    __gmp_expr<T, T> temp1(expr.val1, prec), temp2(expr.val2, prec);
-    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+    __gmp_temp<T> temp1(expr.val1, p);
+    expr.val2.eval(p);
+    Op::eval(p, temp1.__get_mp(), p);
    }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const
+  mp_bitcnt_t get_prec() const
    {
      mp_bitcnt_t prec1 = expr.val1.get_prec(),
        prec2 = expr.val2.get_prec();
@@ -2635,18 +2575,13 @@ public:
      : expr(val1, val2) { }
    void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
    {
-    __gmp_expr<T, T> temp1(expr.val1), temp2(expr.val2);
-    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
-  }
-  void eval(typename __gmp_resolve_expr<T>::ptr_type p,
-           mp_bitcnt_t prec) const
-  {
-    __gmp_expr<T, T> temp1(expr.val1, prec), temp2(expr.val2, prec);
-    Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+    __gmp_temp<T> temp2(expr.val2, p);
+    expr.val1.eval(p);
+    Op::eval(p, p, temp2.__get_mp());
    }
    const val1_type & get_val1() const { return expr.val1; }
    const val2_type & get_val2() const { return expr.val2; }
-  unsigned long int get_prec() const
+  mp_bitcnt_t get_prec() const
    {
      mp_bitcnt_t prec1 = expr.val1.get_prec(),
        prec2 = expr.val2.get_prec();
@@ -2679,7 +2614,7 @@ public:                                                                     \
    { eval_fun::eval(q, expr.val1.get_mpz_t(), expr.val2.get_mpq_t()); }      \
    const val1_type & get_val1() const { return expr.val1; }                  \
    const val2_type & get_val2() const { return expr.val2; }                  \
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
  };                                                                          \
                                                                              \
  template <>                                                                 \
@@ -2697,7 +2632,7 @@ public:                                                                     \
    { eval_fun::eval(q, expr.val1.get_mpq_t(), expr.val2.get_mpz_t()); }      \
    const val1_type & get_val1() const { return expr.val1; }                  \
    const val2_type & get_val2() const { return expr.val2; }                  \
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
  };                                                                          \
                                                                              \
  template <class T>                                                          \
@@ -2719,7 +2654,7 @@ public:                                                                     \
    }                                                                         \
    const val1_type & get_val1() const { return expr.val1; }                  \
    const val2_type & get_val2() const { return expr.val2; }                  \
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
  };                                                                          \
                                                                              \
  template <class T>                                                          \
@@ -2741,7 +2676,7 @@ public:                                                                     \
    }                                                                         \
    const val1_type & get_val1() const { return expr.val1; }                  \
    const val2_type & get_val2() const { return expr.val2; }                  \
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
  };                                                                          \
                                                                              \
  template <class T>                                                          \
@@ -2763,7 +2698,7 @@ public:                                                                     \
    }                                                                         \
    const val1_type & get_val1() const { return expr.val1; }                  \
    const val2_type & get_val2() const { return expr.val2; }                  \
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
  };                                                                          \
                                                                              \
  template <class T>                                                          \
@@ -2785,7 +2720,7 @@ public:                                                                     \
    }                                                                         \
    const val1_type & get_val1() const { return expr.val1; }                  \
    const val2_type & get_val2() const { return expr.val2; }                  \
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
  };                                                                          \
                                                                              \
  template <class T, class U>                                                 \
@@ -2803,12 +2738,12 @@ public:                                                                     \
    void eval(mpq_ptr q) const                                                \
    {                                                                         \
      mpz_class temp1(expr.val1);                                             \
-    mpq_class temp2(expr.val2);                                             \
-    eval_fun::eval(q, temp1.get_mpz_t(), temp2.get_mpq_t());                \
+    expr.val2.eval(q);                                                      \
+    eval_fun::eval(q, temp1.get_mpz_t(), q);                                \
    }                                                                         \
    const val1_type & get_val1() const { return expr.val1; }                  \
    const val2_type & get_val2() const { return expr.val2; }                  \
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
  };                                                                          \
                                                                              \
  template <class T, class U>                                                 \
@@ -2825,13 +2760,13 @@ public:                                                                     \
      : expr(val1, val2) { }                                                  \
    void eval(mpq_ptr q) const                                                \
    {                                                                         \
-    mpq_class temp1(expr.val1);                                             \
      mpz_class temp2(expr.val2);                                             \
-    eval_fun::eval(q, temp1.get_mpq_t(), temp2.get_mpz_t());                \
+    expr.val1.eval(q);                                             \
+    eval_fun::eval(q, q, temp2.get_mpz_t());                \
    }                                                                         \
    const val1_type & get_val1() const { return expr.val1; }                  \
    const val2_type & get_val2() const { return expr.val2; }                  \
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }     \
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }           \
  };
  
  
@@ -2873,7 +2808,7 @@ fun(const __gmp_expr<T, U> &expr)                                            \
  template <class T, class U>                                   \
  inline type fun(const __gmp_expr<T, U> &expr)                 \
  {                                                             \
-  typename __gmp_resolve_temp<T, T, U>::temp_type temp(expr); \
+  __gmp_expr<T, T> const& temp(expr); \
    return eval_fun::eval(temp.__get_mp());                     \
  }
  
@@ -2935,7 +2870,7 @@ __GMPNS_DEFINE_BINARY_FUNCTION(fun, eval_fun, signed long int)    \
  __GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long int)  \
  __GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, float)              \
  __GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, double)             \
-__GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double)
+/* __GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double) */
  
  #define __GMP_DEFINE_BINARY_FUNCTION(fun, eval_fun) \
  __GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun)        \
@@ -2946,11 +2881,11 @@ __GMPN_DEFINE_BINARY_FUNCTION(fun, eval_fun)
                                                                         \
  template <class T, class U>                                            \
  inline __gmp_expr                                                      \
-<T, __gmp_binary_expr<__gmp_expr<T, U>, unsigned long int, eval_fun> > \
-fun(const __gmp_expr<T, U> &expr, unsigned long int l)                 \
+<T, __gmp_binary_expr<__gmp_expr<T, U>, mp_bitcnt_t, eval_fun> > \
+fun(const __gmp_expr<T, U> &expr, mp_bitcnt_t l)                 \
  {                                                                      \
    return __gmp_expr<T, __gmp_binary_expr                               \
-    <__gmp_expr<T, U>, unsigned long int, eval_fun> >(expr, l);        \
+    <__gmp_expr<T, U>, mp_bitcnt_t, eval_fun> >(expr, l);        \
  }
  
  
@@ -2961,8 +2896,8 @@ inline type fun(const __gmp_expr<T, U> &expr1,                          \
                 const __gmp_expr<V, W> &expr2)                          \
  {                                                                       \
    typedef typename __gmp_resolve_expr<T, V>::value_type eval_type;      \
-  typename __gmp_resolve_temp<eval_type, T, U>::temp_type temp1(expr1); \
-  typename __gmp_resolve_temp<eval_type, V, W>::temp_type temp2(expr2); \
+  __gmp_expr<eval_type, eval_type> const& temp1(expr1); \
+  __gmp_expr<eval_type, eval_type> const& temp2(expr2); \
    return eval_fun::eval(temp1.__get_mp(), temp2.__get_mp());            \
  }
  
@@ -2972,14 +2907,14 @@ inline type fun(const __gmp_expr<T, U> &expr1,                          \
  template <class T, class U>                                        \
  inline type fun(const __gmp_expr<T, U> &expr, type2 t)             \
  {                                                                  \
-  typename __gmp_resolve_temp<T, T, U>::temp_type temp(expr);      \
+  __gmp_expr<T, T> const& temp(expr);      \
    return eval_fun::eval(temp.__get_mp(), static_cast<bigtype>(t)); \
  }                                                                  \
                                                                     \
  template <class T, class U>                                        \
  inline type fun(type2 t, const __gmp_expr<T, U> &expr)             \
  {                                                                  \
-  typename __gmp_resolve_temp<T, T, U>::temp_type temp(expr);      \
+  __gmp_expr<T, T> const& temp(expr);      \
    return eval_fun::eval(static_cast<bigtype>(t), temp.__get_mp()); \
  }
  
@@ -3008,7 +2943,7 @@ __GMPNS_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, signed long int)    \
  __GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long int)  \
  __GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, float)              \
  __GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, double)             \
-__GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double)
+/* __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double) */
  
  #define __GMP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \
  __GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun)        \
@@ -3081,10 +3016,10 @@ __GMP_DEFINE_COMPOUND_OPERATOR(mpf, fun, eval_fun)
  
  #define __GMP_DEFINE_COMPOUND_OPERATOR_UI(type, fun, eval_fun)  \
                                                                  \
-inline type##_class & type##_class::fun(unsigned long int l)    \
+inline type##_class & type##_class::fun(mp_bitcnt_t l)    \
  {                                                               \
    __gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr     \
-    <type##_class, unsigned long int, eval_fun> >(*this, l));   \
+    <type##_class, mp_bitcnt_t, eval_fun> >(*this, l));   \
    return *this;                                                 \
  }
  
@@ -3146,12 +3081,11 @@ __GMP_DEFINE_BINARY_FUNCTION_UI(operator<<, __gmp_binary_lshift)
  __GMP_DEFINE_BINARY_FUNCTION_UI(operator>>, __gmp_binary_rshift)
  
  __GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator==, __gmp_binary_equal)
-__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator!=, __gmp_binary_not_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator!=, ! __gmp_binary_equal)
  __GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<, __gmp_binary_less)
-__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<=, __gmp_binary_less_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<=, ! __gmp_binary_greater)
  __GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>, __gmp_binary_greater)
-__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>=, \
-                                  __gmp_binary_greater_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>=, ! __gmp_binary_less)
  
  __GMP_DEFINE_UNARY_FUNCTION(abs, __gmp_abs_function)
  __GMP_DEFINE_UNARY_FUNCTION(trunc, __gmp_trunc_function)
@@ -3163,6 +3097,10 @@ __GMP_DEFINE_BINARY_FUNCTION(hypot, __gmp_hypot_function)
  __GMP_DEFINE_UNARY_TYPE_FUNCTION(int, sgn, __gmp_sgn_function)
  __GMP_DEFINE_BINARY_TYPE_FUNCTION(int, cmp, __gmp_cmp_function)
  
+template <class T>
+void swap(__gmp_expr<T, T>& x, __gmp_expr<T, T>& y) __GMPXX_NOEXCEPT
+{ x.swap(y); }
+
  // member operators for mpz_class
  
  __GMPZ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)
@@ -3219,11 +3157,11 @@ class __gmp_expr<mpz_t, __gmp_urandomb_value>
  {
  private:
    __gmp_randstate_struct *state;
-  unsigned long int bits;
+  mp_bitcnt_t bits;
  public:
-  __gmp_expr(gmp_randstate_t s, unsigned long int l) : state(s), bits(l) { }
+  __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { }
    void eval(mpz_ptr z) const { __gmp_rand_function::eval(z, state, bits); }
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
  };
  
  template <>
@@ -3236,7 +3174,7 @@ public:
    __gmp_expr(gmp_randstate_t s, const mpz_class &z) : state(s), range(z) { }
    void eval(mpz_ptr z) const
    { __gmp_rand_function::eval(z, state, range.get_mpz_t()); }
-  unsigned long int get_prec() const { return mpf_get_default_prec(); }
+  mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
  };
  
  template <>
@@ -3244,12 +3182,15 @@ class __gmp_expr<mpf_t, __gmp_urandomb_value>
  {
  private:
    __gmp_randstate_struct *state;
-  unsigned long int bits;
+  mp_bitcnt_t bits;
  public:
-  __gmp_expr(gmp_randstate_t s, unsigned long int l) : state(s), bits(l) { }
-  void eval(mpf_ptr f, mp_bitcnt_t prec) const
-  { __gmp_rand_function::eval(f, state, (bits>0) ? get_prec() : prec); }
-  unsigned long int get_prec() const
+  __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { }
+  void eval(mpf_ptr f) const
+  {
+    __gmp_rand_function::eval(f, state,
+       (bits>0) ? bits : mpf_get_prec(f));
+  }
+  mp_bitcnt_t get_prec() const
    {
      if (bits == 0)
        return mpf_get_default_prec();
@@ -3260,8 +3201,8 @@ public:
  
  extern "C" {
    typedef void __gmp_randinit_default_t (gmp_randstate_t);
-  typedef void __gmp_randinit_lc_2exp_t (gmp_randstate_t, mpz_srcptr, unsigned long int, unsigned long int);
-  typedef int __gmp_randinit_lc_2exp_size_t (gmp_randstate_t, unsigned long int);
+  typedef void __gmp_randinit_lc_2exp_t (gmp_randstate_t, mpz_srcptr, unsigned long int, mp_bitcnt_t);
+  typedef int __gmp_randinit_lc_2exp_size_t (gmp_randstate_t, mp_bitcnt_t);
  }
  
  class gmp_randclass
@@ -3290,12 +3231,12 @@ public:
  
    // gmp_randinit_lc_2exp
    gmp_randclass(__gmp_randinit_lc_2exp_t* f,
-               mpz_class z, unsigned long int l1, unsigned long int l2)
+               mpz_class z, unsigned long int l1, mp_bitcnt_t l2)
    { f(state, z.get_mpz_t(), l1, l2); }
  
    // gmp_randinit_lc_2exp_size
    gmp_randclass(__gmp_randinit_lc_2exp_size_t* f,
-               unsigned long int size)
+               mp_bitcnt_t size)
    {
      if (f (state, size) == 0)
        throw std::length_error ("gmp_randinit_lc_2exp_size");
@@ -3309,10 +3250,11 @@ public:
    void seed(const mpz_class &z) { gmp_randseed(state, z.get_mpz_t()); }
  
    // get random number
-  __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(unsigned long int l)
+  __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(mp_bitcnt_t l)
    { return __gmp_expr<mpz_t, __gmp_urandomb_value>(state, l); }
    __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(const mpz_class &z)
    { return get_z_bits(z.get_ui()); }
+  // FIXME: z.get_bitcnt_t() ?
  
    __gmp_expr<mpz_t, __gmp_urandomm_value> get_z_range(const mpz_class &z)
    { return __gmp_expr<mpz_t, __gmp_urandomm_value>(state, z); }
@@ -3322,6 +3264,122 @@ public:
  };
  
  
+/**************** Specialize std::numeric_limits ****************/
+
+namespace std {
+  template <> class numeric_limits<mpz_class>
+  {
+  public:
+    static const bool is_specialized = true;
+    static mpz_class min() { return mpz_class(); }
+    static mpz_class max() { return mpz_class(); }
+    static mpz_class lowest() { return mpz_class(); }
+    static const int digits = 0;
+    static const int digits10 = 0;
+    static const int max_digits10 = 0;
+    static const bool is_signed = true;
+    static const bool is_integer = true;
+    static const bool is_exact = true;
+    static const int radix = 2;
+    static mpz_class epsilon() { return mpz_class(); }
+    static mpz_class round_error() { return mpz_class(); }
+    static const int min_exponent = 0;
+    static const int min_exponent10 = 0;
+    static const int max_exponent = 0;
+    static const int max_exponent10 = 0;
+    static const bool has_infinity = false;
+    static const bool has_quiet_NaN = false;
+    static const bool has_signaling_NaN = false;
+    static const float_denorm_style has_denorm = denorm_absent;
+    static const bool has_denorm_loss = false;
+    static mpz_class infinity() { return mpz_class(); }
+    static mpz_class quiet_NaN() { return mpz_class(); }
+    static mpz_class signaling_NaN() { return mpz_class(); }
+    static mpz_class denorm_min() { return mpz_class(); }
+    static const bool is_iec559 = false;
+    static const bool is_bounded = false;
+    static const bool is_modulo = false;
+    static const bool traps = false;
+    static const bool tinyness_before = false;
+    static const float_round_style round_style = round_toward_zero;
+  };
+
+  template <> class numeric_limits<mpq_class>
+  {
+  public:
+    static const bool is_specialized = true;
+    static mpq_class min() { return mpq_class(); }
+    static mpq_class max() { return mpq_class(); }
+    static mpq_class lowest() { return mpq_class(); }
+    static const int digits = 0;
+    static const int digits10 = 0;
+    static const int max_digits10 = 0;
+    static const bool is_signed = true;
+    static const bool is_integer = false;
+    static const bool is_exact = true;
+    static const int radix = 2;
+    static mpq_class epsilon() { return mpq_class(); }
+    static mpq_class round_error() { return mpq_class(); }
+    static const int min_exponent = 0;
+    static const int min_exponent10 = 0;
+    static const int max_exponent = 0;
+    static const int max_exponent10 = 0;
+    static const bool has_infinity = false;
+    static const bool has_quiet_NaN = false;
+    static const bool has_signaling_NaN = false;
+    static const float_denorm_style has_denorm = denorm_absent;
+    static const bool has_denorm_loss = false;
+    static mpq_class infinity() { return mpq_class(); }
+    static mpq_class quiet_NaN() { return mpq_class(); }
+    static mpq_class signaling_NaN() { return mpq_class(); }
+    static mpq_class denorm_min() { return mpq_class(); }
+    static const bool is_iec559 = false;
+    static const bool is_bounded = false;
+    static const bool is_modulo = false;
+    static const bool traps = false;
+    static const bool tinyness_before = false;
+    static const float_round_style round_style = round_toward_zero;
+  };
+
+  template <> class numeric_limits<mpf_class>
+  {
+  public:
+    static const bool is_specialized = true;
+    static mpf_class min() { return mpf_class(); }
+    static mpf_class max() { return mpf_class(); }
+    static mpf_class lowest() { return mpf_class(); }
+    static const int digits = 0;
+    static const int digits10 = 0;
+    static const int max_digits10 = 0;
+    static const bool is_signed = true;
+    static const bool is_integer = false;
+    static const bool is_exact = false;
+    static const int radix = 2;
+    static mpf_class epsilon() { return mpf_class(); }
+    static mpf_class round_error() { return mpf_class(); }
+    static const int min_exponent = 0;
+    static const int min_exponent10 = 0;
+    static const int max_exponent = 0;
+    static const int max_exponent10 = 0;
+    static const bool has_infinity = false;
+    static const bool has_quiet_NaN = false;
+    static const bool has_signaling_NaN = false;
+    static const float_denorm_style has_denorm = denorm_absent;
+    static const bool has_denorm_loss = false;
+    static mpf_class infinity() { return mpf_class(); }
+    static mpf_class quiet_NaN() { return mpf_class(); }
+    static mpf_class signaling_NaN() { return mpf_class(); }
+    static mpf_class denorm_min() { return mpf_class(); }
+    static const bool is_iec559 = false;
+    static const bool is_bounded = false;
+    static const bool is_modulo = false;
+    static const bool traps = false;
+    static const bool tinyness_before = false;
+    static const float_round_style round_style = round_indeterminate;
+  };
+}
+
+
  /**************** #undef all private macros ****************/
  
  #undef __GMPP_DECLARE_COMPOUND_OPERATOR
@@ -3356,12 +3414,6 @@ public:
  #undef __GMP_DEFINE_BINARY_TYPE_FUNCTION
  
  #undef __GMPZ_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZN_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZNN_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZNS_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZNU_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZND_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZNLD_DEFINE_COMPOUND_OPERATOR
  
  #undef __GMPP_DEFINE_COMPOUND_OPERATOR
  #undef __GMPNN_DEFINE_COMPOUND_OPERATOR
@@ -3385,4 +3437,6 @@ public:
  #undef __GMPQ_DEFINE_INCREMENT_OPERATOR
  #undef __GMPF_DEFINE_INCREMENT_OPERATOR
  
+#undef __GMPXX_CONSTANT
+
  #endif /* __GMP_PLUSPLUS__ */
diff --git a/libmp.sym b/libmp.sym

deleted file mode 100644 (file)

index f36d5c0..0000000
--- a/libmp.sym
+++ /dev/null
@@ -1,18 +0,0 @@
-itom
-xtom
-move
-madd
-msub
-mult
-mdiv
-sdiv
-msqrt
-pow
-rpow
-gcd
-mcmp
-min
-mout
-mtox
-mfree
-__gmp_set_memory_functions
diff --git a/longlong.h b/longlong.h

index 8cac79da9c2f164d2d604f65849b39466254c265..211d80d6b387dfab2db782fc0ea2c81117175c30 100644 (file)
--- a/longlong.h
+++ b/longlong.h
@@ -1,7 +1,7 @@
  /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
  
  Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
  
  This file is free software; you can redistribute it and/or modify it under the
  terms of the GNU Lesser General Public License as published by the Free
@@ -51,14 +51,6 @@ along with this file.  If not, see http://www.gnu.org/licenses/.  */
  #define __MPN(x) __##x
  #endif
  
-#ifndef _PROTO
-#if (__STDC__-0) || defined (__cplusplus)
-#define _PROTO(x) x
-#else
-#define _PROTO(x) ()
-#endif
-#endif
-
  /* Define auxiliary asm macros.
  
     1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
@@ -258,21 +250,30 @@ along with this file.  If not, see http://www.gnu.org/licenses/.  */
  
  #if ! defined (count_leading_zeros) && ! defined (LONGLONG_STANDALONE)
  #if HAVE_ATTRIBUTE_CONST
-long __MPN(count_leading_zeros) _PROTO ((UDItype)) __attribute__ ((const));
+long __MPN(count_leading_zeros) (UDItype) __attribute__ ((const));
  #else
-long __MPN(count_leading_zeros) _PROTO ((UDItype));
+long __MPN(count_leading_zeros) (UDItype);
  #endif
  #define count_leading_zeros(count, x) \
    ((count) = __MPN(count_leading_zeros) (x))
  #endif /* clz using mpn */
  #endif /* __alpha */
  
+#if defined (__AVR) && W_TYPE_SIZE == 8
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    unsigned short __p = (unsigned short) (m0) * (m1);                 \
+    (ph) = __p >> 8;                                                   \
+    (pl) = __p;                                                                \
+  } while (0)
+#endif /* AVR */
+
  #if defined (_CRAY) && W_TYPE_SIZE == 64
  #include <intrinsics.h>
  #define UDIV_PREINV_ALWAYS  1
  #define UDIV_NEEDS_NORMALIZATION 1
  #define UDIV_TIME 220
-long __MPN(count_leading_zeros) _PROTO ((UDItype));
+long __MPN(count_leading_zeros) (UDItype);
  #define count_leading_zeros(count, x) \
    ((count) = _leadz ((UWtype) (x)))
  #if defined (_CRAYIEEE)                /* I.e., Cray T90/ieee, T3D, and T3E */
@@ -423,7 +424,7 @@ long __MPN(count_leading_zeros) _PROTO ((UDItype));
              "rIJ" ((USItype) (bl)))
  #endif
  
-#if defined (__arm__) && W_TYPE_SIZE == 32
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
  #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3"                       \
            : "=r" (sh), "=&r" (sl)                                      \
@@ -509,18 +510,47 @@ long __MPN(count_leading_zeros) _PROTO ((UDItype));
      (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));                   \
      (r) = __r;                                                         \
    } while (0)
-extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
+extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
  #define UDIV_TIME 200
  #endif /* LONGLONG_STANDALONE */
  #endif
-#if defined (__ARM_ARCH_5__)
-/* This actually requires arm 5 */
+/* This is a bizarre test, but GCC doesn't define useful common symbol. */
+#if defined (__ARM_ARCH_5__)  || defined (__ARM_ARCH_5T__) || \
+    defined (__ARM_ARCH_5E__) || defined (__ARM_ARCH_5TE__)|| \
+    defined (__ARM_ARCH_6__)  || defined (__ARM_ARCH_6J__) || \
+    defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) || \
+    defined (__ARM_ARCH_6ZK__)|| defined (__ARM_ARCH_6T2__)|| \
+    defined (__ARM_ARCH_6M__) || defined (__ARM_ARCH_7__)  || \
+    defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__) || \
+    defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__)
  #define count_leading_zeros(count, x) \
    __asm__ ("clz\t%0, %1" : "=r" (count) : "r" (x))
  #define COUNT_LEADING_ZEROS_0 32
  #endif
  #endif /* __arm__ */
  
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+/* FIXME: Extend the immediate range for the low word by using both
+   ADDS and SUBS, since they set carry in the same way.  */
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"                    \
+          : "=r" (sh), "=&r" (sl)                                      \
+          : "rZ" (ah), "rZ" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"                    \
+          : "=r,r" (sh), "=&r,&r" (sl)                                 \
+          : "rZ,rZ" (ah), "rZ,rZ" (bh), "r,Z" (al), "rI,r" (bl) __CLOBBER_CC)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    UDItype __m0 = (m0), __m1 = (m1);                                  \
+    __asm__ ("umulh\t%0, %1, %2" : "=r" (ph) : "r" (m0), "r" (m1));    \
+    (pl) = __m0 * __m1;                                                        \
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("clz\t%0, %1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#endif /* __aarch64__ */
+
  #if defined (__clipper__) && W_TYPE_SIZE == 32
  #define umul_ppmm(w1, w0, u, v) \
    ({union {UDItype __ll;                                               \
@@ -1284,37 +1314,37 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
  #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    do {                                                                 \
      if (__builtin_constant_p (bh) && (bh) == 0)                                \
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"          \
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"               \
              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
      else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)                \
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"          \
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"               \
              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
      else                                                               \
-      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"         \
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"             \
              : "=r" (sh), "=&r" (sl)                                    \
              : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));               \
    } while (0)
  #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
    do {                                                                 \
      if (__builtin_constant_p (ah) && (ah) == 0)                                \
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"      \
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"     \
                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
      else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)                \
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"      \
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"     \
                : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
      else if (__builtin_constant_p (bh) && (bh) == 0)                   \
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"                \
+      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"              \
                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
      else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)                \
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"                \
+      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"              \
                : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
      else                                                               \
-      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"     \
+      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"   \
                : "=r" (sh), "=&r" (sl)                                  \
                : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
    } while (0)
  #define count_leading_zeros(count, x) \
-  __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
+  __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
  #define COUNT_LEADING_ZEROS_0 32
  #if HAVE_HOST_CPU_FAMILY_powerpc
  #if __GMP_GNUC_PREREQ (4,4)
@@ -1362,13 +1392,13 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
  #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    do {                                                                 \
      if (__builtin_constant_p (bh) && (bh) == 0)                                \
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"          \
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"               \
              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
      else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)                \
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"          \
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"               \
              : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
      else                                                               \
-      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"         \
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"             \
              : "=r" (sh), "=&r" (sl)                                    \
              : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));               \
    } while (0)
@@ -1378,36 +1408,36 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
    do {                                                                       \
      if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) {         \
         if (__builtin_constant_p (ah) && (ah) == 0)                           \
-         __asm__ ("{ai|addic} %1,%3,%4\n\t{sfze|subfze} %0,%2"               \
+         __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2"                   \
                    : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
         else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)           \
-         __asm__ ("{ai|addic} %1,%3,%4\n\t{sfme|subfme} %0,%2"               \
+         __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2"                   \
                    : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
         else if (__builtin_constant_p (bh) && (bh) == 0)                      \
-         __asm__ ("{ai|addic} %1,%3,%4\n\t{ame|addme} %0,%2"                 \
+         __asm__ ("addic %1,%3,%4\n\taddme %0,%2"                    \
                    : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
         else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)           \
-         __asm__ ("{ai|addic} %1,%3,%4\n\t{aze|addze} %0,%2"                 \
+         __asm__ ("addic %1,%3,%4\n\taddze %0,%2"                    \
                    : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
         else                                                                  \
-         __asm__ ("{ai|addic} %1,%4,%5\n\t{sfe|subfe} %0,%3,%2"              \
+         __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2"         \
                    : "=r" (sh), "=&r" (sl)                                    \
                    : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl));             \
        } else {                                                               \
         if (__builtin_constant_p (ah) && (ah) == 0)                           \
-         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"         \
+         __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"        \
                    : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));  \
         else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)           \
-         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"         \
+         __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"        \
                    : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));  \
         else if (__builtin_constant_p (bh) && (bh) == 0)                      \
-         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"           \
+         __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"         \
                    : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));  \
         else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)           \
-         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"           \
+         __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"         \
                    : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));  \
         else                                                                  \
-         __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"        \
+         __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"              \
                    : "=r" (sh), "=&r" (sl)                                    \
                    : "r" (ah), "r" (bh), "rI" (al), "r" (bl));                \
        }                                                                              \
@@ -1518,7 +1548,7 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
    } while (0)
  #endif /* RT/ROMP */
  
-#if defined (__sh2__) && W_TYPE_SIZE == 32
+#if (defined (__SH2__) || defined (__SH3__) || defined (__SH4__)) && W_TYPE_SIZE == 32
  #define umul_ppmm(w1, w0, u, v) \
    __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0"               \
            : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
@@ -1700,7 +1730,7 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
      (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));                   \
      (r) = __r;                                                         \
    } while (0)
-extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
+extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
  #ifndef UDIV_TIME
  #define UDIV_TIME 140
  #endif
@@ -1729,7 +1759,7 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
            __CLOBBER_CC)
  #endif
  
-#if defined (__vax__) && W_TYPE_SIZE == 32
+#if (defined (__vax) || defined (__vax__)) && W_TYPE_SIZE == 32
  #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
    __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                 \
            : "=g" (sh), "=&g" (sl)                                      \
@@ -1769,7 +1799,7 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
              : "g" ((USItype) (x)));                                    \
    } while (0)
  #endif
-#endif /* __vax__ */
+#endif /* vax */
  
  #if defined (__z8000__) && W_TYPE_SIZE == 16
  #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
@@ -1802,6 +1832,7 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
  #endif /* NO_ASM */
  
  
+/* FIXME: "sidi" here is highly doubtful, should sometimes be "diti".  */
  #if !defined (umul_ppmm) && defined (__umulsidi3)
  #define umul_ppmm(ph, pl, m0, m1) \
    {                                                                    \
@@ -1825,7 +1856,7 @@ extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
     hppa. */
  
  #define mpn_umul_ppmm  __MPN(umul_ppmm)
-extern UWtype mpn_umul_ppmm _PROTO ((UWtype *, UWtype, UWtype));
+extern UWtype mpn_umul_ppmm (UWtype *, UWtype, UWtype);
  
  #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm  \
    && ! defined (LONGLONG_STANDALONE)
@@ -1838,7 +1869,7 @@ extern UWtype mpn_umul_ppmm _PROTO ((UWtype *, UWtype, UWtype));
  #endif
  
  #define mpn_umul_ppmm_r  __MPN(umul_ppmm_r)
-extern UWtype mpn_umul_ppmm_r _PROTO ((UWtype, UWtype, UWtype *));
+extern UWtype mpn_umul_ppmm_r (UWtype, UWtype, UWtype *);
  
  #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r       \
    && ! defined (LONGLONG_STANDALONE)
@@ -1851,7 +1882,7 @@ extern UWtype mpn_umul_ppmm_r _PROTO ((UWtype, UWtype, UWtype *));
  #endif
  
  #define mpn_udiv_qrnnd  __MPN(udiv_qrnnd)
-extern UWtype mpn_udiv_qrnnd _PROTO ((UWtype *, UWtype, UWtype, UWtype));
+extern UWtype mpn_udiv_qrnnd (UWtype *, UWtype, UWtype, UWtype);
  
  #if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd       \
    && ! defined (LONGLONG_STANDALONE)
@@ -1865,7 +1896,7 @@ extern UWtype mpn_udiv_qrnnd _PROTO ((UWtype *, UWtype, UWtype, UWtype));
  #endif
  
  #define mpn_udiv_qrnnd_r  __MPN(udiv_qrnnd_r)
-extern UWtype mpn_udiv_qrnnd_r _PROTO ((UWtype, UWtype, UWtype, UWtype *));
+extern UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, UWtype *);
  
  #if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd_r     \
    && ! defined (LONGLONG_STANDALONE)
@@ -2049,6 +2080,7 @@ __GMP_DECLSPEC UWtype __MPN(udiv_w_sdiv) (UWtype *, UWtype, UWtype, UWtype);
  /* This version gives a well-defined value for zero. */
  #define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1)
  #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#define COUNT_LEADING_ZEROS_SLOW
  #endif
  
  /* clz_tab needed by mpn/x86/pentium/mod_1.asm in a fat binary */
@@ -2057,13 +2089,13 @@ __GMP_DECLSPEC UWtype __MPN(udiv_w_sdiv) (UWtype *, UWtype, UWtype, UWtype);
  #endif
  
  #ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
-extern const unsigned char __GMP_DECLSPEC __clz_tab[128];
+extern const unsigned char __GMP_DECLSPEC __clz_tab[129];
  #endif
  
  #if !defined (count_trailing_zeros)
-/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
-   defined in asm, but if it is not, the C version above is good enough.  */
-#define count_trailing_zeros(count, x) \
+#if !defined (COUNT_LEADING_ZEROS_SLOW)
+/* Define count_trailing_zeros using an asm count_leading_zeros.  */
+#define count_trailing_zeros(count, x)                                 \
    do {                                                                 \
      UWtype __ctz_x = (x);                                              \
      UWtype __ctz_c;                                                    \
@@ -2071,6 +2103,30 @@ extern const unsigned char __GMP_DECLSPEC __clz_tab[128];
      count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                 \
      (count) = W_TYPE_SIZE - 1 - __ctz_c;                               \
    } while (0)
+#else
+/* Define count_trailing_zeros in plain C, assuming small counts are common.
+   We use clz_tab without ado, since the C count_leading_zeros above will have
+   pulled it in.  */
+#define count_trailing_zeros(count, x)                                 \
+  do {                                                                 \
+    UWtype __ctz_x = (x);                                              \
+    int __ctz_c;                                                       \
+                                                                       \
+    if (LIKELY ((__ctz_x & 0xff) != 0))                                        \
+      (count) = __clz_tab[__ctz_x & -__ctz_x] - 2;                     \
+    else                                                               \
+      {                                                                        \
+       for (__ctz_c = 8 - 2; __ctz_c < W_TYPE_SIZE - 2; __ctz_c += 8)  \
+         {                                                             \
+           __ctz_x >>= 8;                                              \
+           if (LIKELY ((__ctz_x & 0xff) != 0))                         \
+             break;                                                    \
+         }                                                             \
+                                                                       \
+       (count) = __ctz_c + __clz_tab[__ctz_x & -__ctz_x];              \
+      }                                                                        \
+  } while (0)
+#endif
  #endif
  
  #ifndef UDIV_NEEDS_NORMALIZATION
diff --git a/memory.c b/memory.c

index 42a9834dae13e8ca097ba2fa029f31673af79464..b9e858d74771a807f1b2881168e465d476a9305b 100644 (file)
--- a/memory.c
+++ b/memory.c
@@ -1,6 +1,7 @@
  /* Memory allocation routines.
  
-Copyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,10 +25,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  
-void * (*__gmp_allocate_func) __GMP_PROTO ((size_t)) = __gmp_default_allocate;
-void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t))
-     = __gmp_default_reallocate;
-void   (*__gmp_free_func) __GMP_PROTO ((void *, size_t)) = __gmp_default_free;
+void * (*__gmp_allocate_func) (size_t) = __gmp_default_allocate;
+void * (*__gmp_reallocate_func) (void *, size_t, size_t) = __gmp_default_reallocate;
+void   (*__gmp_free_func) (void *, size_t) = __gmp_default_free;
  
  
  /* Default allocation functions.  In case of failure to allocate/reallocate
diff --git a/mini-gmp/README b/mini-gmp/README

new file mode 100644 (file)

index 0000000..a767d7c
--- /dev/null
+++ b/mini-gmp/README
@@ -0,0 +1,66 @@
+Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+This is "mini-gmp", a small implementation of a subset of GMP's mpn
+and mpz interfaces.
+
+It is intended for applications which need arithmetic on numbers
+larger than a machine word, but which don't need to handle very large
+numbers very efficiently. Those applications can include a copy of
+mini-gmp to get a GMP-compatible interface with small footprint. One
+can also arrange for optional linking with the real GMP library, using
+mini-gmp as a fallback when for some reason GMP is not available, or
+not desired as a dependency.
+
+The supported GMP subset is declared in mini-gmp.h. The implemented
+functions are fully compatible with the corresponding GMP functions,
+as specified in the GMP manual, with a few exceptions:
+
+  mpz_set_str, mpz_init_set_str, mpz_get_str, mpz_out_str and
+  mpz_sizeinbase support only |base| <= 36;
+  mpz_export and mpz_import support only NAILS = 0.
+
+  The REALLOC_FUNC and FREE_FUNC registered with
+  mp_set_memory_functions does not get the correct size of the
+  allocated block in the corresponding argument. mini-gmp always
+  passes zero for these rarely used arguments.
+
+The implementation is a single file, mini-gmp.c.
+
+The performance target for mini-gmp is to be at most 10 times slower
+than the real GMP library, for numbers of size up to a few hundred
+bits. No asymptotically fast algorithms are included in mini-gmp, so
+it will be many orders of magnitude slower than GMP for very large
+numbers.
+
+You should never "install" mini-gmp. Applications can either just
+#include mini-gmp.c (but then, beware that it defines several macros
+and functions outside of the advertised interface). Or compile
+mini-gmp.c as a separate compilation unit, and use the declarations in
+mini-gmp.h.
+
+The tests subdirectory contains a testsuite. To use it, you need GMP
+and GNU make. Just run make check in the tests directory. If the
+hard-coded compiler settings are not right, you have to either edit the
+Makefile or pass overriding values on the make command line (e.g.,
+make CC=cc check). Testing is not (yet) as thorough as for the real
+GMP.
+
+The current version was put together by Niels Möller
+<nisse@lysator.liu.se>, with a fair amount of copy-and-paste from the
+GMP sources.
diff --git a/mini-gmp/mini-gmp.c b/mini-gmp/mini-gmp.c

new file mode 100644 (file)

index 0000000..f3b43fb
--- /dev/null
+++ b/mini-gmp/mini-gmp.c
@@ -0,0 +1,4130 @@
+/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
+
+   Contributed to the GNU project by Niels Möller
+
+Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* NOTE: All functions in this file which are not declared in
+   mini-gmp.h are internal, and are not intended to be compatible
+   neither with GMP nor with future versions of mini-gmp. */
+
+/* Much of the material copied from GMP files, including: gmp-impl.h,
+   longlong.h, mpn/generic/add_n.c, mpn/generic/addmul_1.c,
+   mpn/generic/lshift.c, mpn/generic/mul_1.c,
+   mpn/generic/mul_basecase.c, mpn/generic/rshift.c,
+   mpn/generic/sbpi1_div_qr.c, mpn/generic/sub_n.c,
+   mpn/generic/submul_1.c. */
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mini-gmp.h"
+
+\f
+/* Macros */
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+
+#define GMP_LIMB_MAX (~ (mp_limb_t) 0)
+#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
+
+#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2))
+#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1)
+
+#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT)
+#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1))
+
+#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x))
+#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
+
+#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define gmp_assert_nocarry(x) do { \
+    mp_limb_t __cy = x;                   \
+    assert (__cy == 0);                   \
+  } while (0)
+
+#define gmp_clz(count, x) do {                                         \
+    mp_limb_t __clz_x = (x);                                           \
+    unsigned __clz_c;                                                  \
+    for (__clz_c = 0;                                                  \
+        (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0;    \
+        __clz_c += 8)                                                  \
+      __clz_x <<= 8;                                                   \
+    for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++)               \
+      __clz_x <<= 1;                                                   \
+    (count) = __clz_c;                                                 \
+  } while (0)
+
+#define gmp_ctz(count, x) do {                                         \
+    mp_limb_t __ctz_x = (x);                                           \
+    unsigned __ctz_c = 0;                                              \
+    gmp_clz (__ctz_c, __ctz_x & - __ctz_x);                            \
+    (count) = GMP_LIMB_BITS - 1 - __ctz_c;                             \
+  } while (0)
+
+#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {                                                                 \
+    mp_limb_t __x;                                                     \
+    __x = (al) + (bl);                                                 \
+    (sh) = (ah) + (bh) + (__x < (al));                                 \
+    (sl) = __x;                                                                \
+  } while (0)
+
+#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {                                                                 \
+    mp_limb_t __x;                                                     \
+    __x = (al) - (bl);                                                 \
+    (sh) = (ah) - (bh) - ((al) < (bl));                                        \
+    (sl) = __x;                                                                \
+  } while (0)
+
+#define gmp_umul_ppmm(w1, w0, u, v)                                    \
+  do {                                                                 \
+    mp_limb_t __x0, __x1, __x2, __x3;                                  \
+    unsigned __ul, __vl, __uh, __vh;                                   \
+    mp_limb_t __u = (u), __v = (v);                                    \
+                                                                       \
+    __ul = __u & GMP_LLIMB_MASK;                                       \
+    __uh = __u >> (GMP_LIMB_BITS / 2);                                 \
+    __vl = __v & GMP_LLIMB_MASK;                                       \
+    __vh = __v >> (GMP_LIMB_BITS / 2);                                 \
+                                                                       \
+    __x0 = (mp_limb_t) __ul * __vl;                                    \
+    __x1 = (mp_limb_t) __ul * __vh;                                    \
+    __x2 = (mp_limb_t) __uh * __vl;                                    \
+    __x3 = (mp_limb_t) __uh * __vh;                                    \
+                                                                       \
+    __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */    \
+    __x1 += __x2;              /* but this indeed can */               \
+    if (__x1 < __x2)           /* did we get it? */                    \
+      __x3 += GMP_HLIMB_BIT;   /* yes, add it in the proper pos. */    \
+                                                                       \
+    (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2));                       \
+    (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK);    \
+  } while (0)
+
+#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di)                     \
+  do {                                                                 \
+    mp_limb_t _qh, _ql, _r, _mask;                                     \
+    gmp_umul_ppmm (_qh, _ql, (nh), (di));                              \
+    gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));               \
+    _r = (nl) - _qh * (d);                                             \
+    _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */                \
+    _qh += _mask;                                                      \
+    _r += _mask & (d);                                                 \
+    if (_r >= (d))                                                     \
+      {                                                                        \
+       _r -= (d);                                                      \
+       _qh++;                                                          \
+      }                                                                        \
+                                                                       \
+    (r) = _r;                                                          \
+    (q) = _qh;                                                         \
+  } while (0)
+
+#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)          \
+  do {                                                                 \
+    mp_limb_t _q0, _t1, _t0, _mask;                                    \
+    gmp_umul_ppmm ((q), _q0, (n2), (dinv));                            \
+    gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));                   \
+                                                                       \
+    /* Compute the two most significant limbs of n - q'd */            \
+    (r1) = (n1) - (d1) * (q);                                          \
+    gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0));               \
+    gmp_umul_ppmm (_t1, _t0, (d0), (q));                               \
+    gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);                 \
+    (q)++;                                                             \
+                                                                       \
+    /* Conditionally adjust q and the remainders */                    \
+    _mask = - (mp_limb_t) ((r1) >= _q0);                               \
+    (q) += _mask;                                                      \
+    gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
+    if ((r1) >= (d1))                                                  \
+      {                                                                        \
+       if ((r1) > (d1) || (r0) >= (d0))                                \
+         {                                                             \
+           (q)++;                                                      \
+           gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));        \
+         }                                                             \
+      }                                                                        \
+  } while (0)
+
+/* Swap macros. */
+#define MP_LIMB_T_SWAP(x, y)                                           \
+  do {                                                                 \
+    mp_limb_t __mp_limb_t_swap__tmp = (x);                             \
+    (x) = (y);                                                         \
+    (y) = __mp_limb_t_swap__tmp;                                       \
+  } while (0)
+#define MP_SIZE_T_SWAP(x, y)                                           \
+  do {                                                                 \
+    mp_size_t __mp_size_t_swap__tmp = (x);                             \
+    (x) = (y);                                                         \
+    (y) = __mp_size_t_swap__tmp;                                       \
+  } while (0)
+#define MP_BITCNT_T_SWAP(x,y)                  \
+  do {                                         \
+    mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \
+    (x) = (y);                                 \
+    (y) = __mp_bitcnt_t_swap__tmp;             \
+  } while (0)
+#define MP_PTR_SWAP(x, y)                                              \
+  do {                                                                 \
+    mp_ptr __mp_ptr_swap__tmp = (x);                                   \
+    (x) = (y);                                                         \
+    (y) = __mp_ptr_swap__tmp;                                          \
+  } while (0)
+#define MP_SRCPTR_SWAP(x, y)                                           \
+  do {                                                                 \
+    mp_srcptr __mp_srcptr_swap__tmp = (x);                             \
+    (x) = (y);                                                         \
+    (y) = __mp_srcptr_swap__tmp;                                       \
+  } while (0)
+
+#define MPN_PTR_SWAP(xp,xs, yp,ys)                                     \
+  do {                                                                 \
+    MP_PTR_SWAP (xp, yp);                                              \
+    MP_SIZE_T_SWAP (xs, ys);                                           \
+  } while(0)
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)                                  \
+  do {                                                                 \
+    MP_SRCPTR_SWAP (xp, yp);                                           \
+    MP_SIZE_T_SWAP (xs, ys);                                           \
+  } while(0)
+
+#define MPZ_PTR_SWAP(x, y)                                             \
+  do {                                                                 \
+    mpz_ptr __mpz_ptr_swap__tmp = (x);                                 \
+    (x) = (y);                                                         \
+    (y) = __mpz_ptr_swap__tmp;                                         \
+  } while (0)
+#define MPZ_SRCPTR_SWAP(x, y)                                          \
+  do {                                                                 \
+    mpz_srcptr __mpz_srcptr_swap__tmp = (x);                   \
+    (x) = (y);                                                         \
+    (y) = __mpz_srcptr_swap__tmp;                                      \
+  } while (0)
+
+\f
+/* Memory allocation and other helper functions. */
+static void
+gmp_die (const char *msg)
+{
+  fprintf (stderr, "%s\n", msg);
+  abort();
+}
+
+static void *
+gmp_default_alloc (size_t size)
+{
+  void *p;
+
+  assert (size > 0);
+
+  p = malloc (size);
+  if (!p)
+    gmp_die("gmp_default_alloc: Virtual memory exhausted.");
+
+  return p;
+}
+
+static void *
+gmp_default_realloc (void *old, size_t old_size, size_t new_size)
+{
+  mp_ptr p;
+
+  p = realloc (old, new_size);
+
+  if (!p)
+    gmp_die("gmp_default_realoc: Virtual memory exhausted.");
+
+  return p;
+}
+
+static void
+gmp_default_free (void *p, size_t size)
+{
+  free (p);
+}
+
+static void * (*gmp_allocate_func) (size_t) = gmp_default_alloc;
+static void * (*gmp_reallocate_func) (void *, size_t, size_t) = gmp_default_realloc;
+static void (*gmp_free_func) (void *, size_t) = gmp_default_free;
+
+void
+mp_get_memory_functions (void *(**alloc_func) (size_t),
+                        void *(**realloc_func) (void *, size_t, size_t),
+                        void (**free_func) (void *, size_t))
+{
+  if (alloc_func)
+    *alloc_func = gmp_allocate_func;
+
+  if (realloc_func)
+    *realloc_func = gmp_reallocate_func;
+
+  if (free_func)
+    *free_func = gmp_free_func;
+}
+
+void
+mp_set_memory_functions (void *(*alloc_func) (size_t),
+                        void *(*realloc_func) (void *, size_t, size_t),
+                        void (*free_func) (void *, size_t))
+{
+  if (!alloc_func)
+    alloc_func = gmp_default_alloc;
+  if (!realloc_func)
+    realloc_func = gmp_default_realloc;
+  if (!free_func)
+    free_func = gmp_default_free;
+
+  gmp_allocate_func = alloc_func;
+  gmp_reallocate_func = realloc_func;
+  gmp_free_func = free_func;
+}
+
+#define gmp_xalloc(size) ((*gmp_allocate_func)((size)))
+#define gmp_free(p) ((*gmp_free_func) ((p), 0))
+
+static mp_ptr
+gmp_xalloc_limbs (mp_size_t size)
+{
+  return gmp_xalloc (size * sizeof (mp_limb_t));
+}
+
+static mp_ptr
+gmp_xrealloc_limbs (mp_ptr old, mp_size_t size)
+{
+  assert (size > 0);
+  return (*gmp_reallocate_func) (old, 0, size * sizeof (mp_limb_t));
+}
+
+\f
+/* MPN interface */
+
+void
+mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n)
+{
+  mp_size_t i;
+  for (i = 0; i < n; i++)
+    d[i] = s[i];
+}
+
+void
+mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n)
+{
+  while (n-- > 0)
+    d[n] = s[n];
+}
+
+int
+mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  for (; n > 0; n--)
+    {
+      if (ap[n-1] < bp[n-1])
+       return -1;
+      else if (ap[n-1] > bp[n-1])
+       return 1;
+    }
+  return 0;
+}
+
+static int
+mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  if (an > bn)
+    return 1;
+  else if (an < bn)
+    return -1;
+  else
+    return mpn_cmp (ap, bp, an);
+}
+
+static mp_size_t
+mpn_normalized_size (mp_srcptr xp, mp_size_t n)
+{
+  for (; n > 0 && xp[n-1] == 0; n--)
+    ;
+  return n;
+}
+
+#define mpn_zero_p(xp, n) (mpn_normalized_size ((xp), (n)) == 0)
+
+mp_limb_t
+mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_size_t i;
+
+  assert (n > 0);
+
+  for (i = 0; i < n; i++)
+    {
+      mp_limb_t r = ap[i] + b;
+      /* Carry out */
+      b = (r < b);
+      rp[i] = r;
+    }
+  return b;
+}
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  for (i = 0, cy = 0; i < n; i++)
+    {
+      mp_limb_t a, b, r;
+      a = ap[i]; b = bp[i];
+      r = a + cy;
+      cy = (r < cy);
+      r += b;
+      cy += (r < b);
+      rp[i] = r;
+    }
+  return cy;
+}
+
+mp_limb_t
+mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_limb_t cy;
+
+  assert (an >= bn);
+
+  cy = mpn_add_n (rp, ap, bp, bn);
+  if (an > bn)
+    cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy);
+  return cy;
+}
+
+mp_limb_t
+mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  mp_size_t i;
+
+  assert (n > 0);
+
+  for (i = 0; i < n; i++)
+    {
+      mp_limb_t a = ap[i];
+      /* Carry out */
+      mp_limb_t cy = a < b;;
+      rp[i] = a - b;
+      b = cy;
+    }
+  return b;
+}
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mp_size_t i;
+  mp_limb_t cy;
+
+  for (i = 0, cy = 0; i < n; i++)
+    {
+      mp_limb_t a, b;
+      a = ap[i]; b = bp[i];
+      b += cy;
+      cy = (b < cy);
+      cy += (a < b);
+      rp[i] = a - b;
+    }
+  return cy;
+}
+
+mp_limb_t
+mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_limb_t cy;
+
+  assert (an >= bn);
+
+  cy = mpn_sub_n (rp, ap, bp, bn);
+  if (an > bn)
+    cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy);
+  return cy;
+}
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl;
+
+  assert (n >= 1);
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl, rl;
+
+  assert (n >= 1);
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      rl = *rp;
+      lpl = rl + lpl;
+      cl += lpl < rl;
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+  mp_limb_t ul, cl, hpl, lpl, rl;
+
+  assert (n >= 1);
+
+  cl = 0;
+  do
+    {
+      ul = *up++;
+      gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+      lpl += cl;
+      cl = (lpl < cl) + hpl;
+
+      rl = *rp;
+      lpl = rl - lpl;
+      cl += lpl > rl;
+      *rp++ = lpl;
+    }
+  while (--n != 0);
+
+  return cl;
+}
+
+mp_limb_t
+mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
+{
+  assert (un >= vn);
+  assert (vn >= 1);
+
+  /* We first multiply by the low order limb. This result can be
+     stored, not added, to rp. We also avoid a loop for zeroing this
+     way. */
+
+  rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+  rp += 1, vp += 1, vn -= 1;
+
+  /* Now accumulate the product of up[] and the next higher limb from
+     vp[]. */
+
+  while (vn >= 1)
+    {
+      rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
+      rp += 1, vp += 1, vn -= 1;
+    }
+  return rp[un - 1];
+}
+
+void
+mpn_mul_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  mpn_mul (rp, ap, n, bp, n);
+}
+
+void
+mpn_sqr (mp_ptr rp, mp_srcptr ap, mp_size_t n)
+{
+  mpn_mul (rp, ap, n, ap, n);
+}
+
+mp_limb_t
+mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  assert (n >= 1);
+  assert (cnt >= 1);
+  assert (cnt < GMP_LIMB_BITS);
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_LIMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt);
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = high_limb | (low_limb >> tnc);
+      high_limb = (low_limb << cnt);
+    }
+  *--rp = high_limb;
+
+  return retval;
+}
+
+mp_limb_t
+mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  assert (n >= 1);
+  assert (cnt >= 1);
+  assert (cnt < GMP_LIMB_BITS);
+
+  tnc = GMP_LIMB_BITS - cnt;
+  high_limb = *up++;
+  retval = (high_limb << tnc);
+  low_limb = high_limb >> cnt;
+
+  for (i = n - 1; i != 0; i--)
+    {
+      high_limb = *up++;
+      *rp++ = low_limb | (high_limb << tnc);
+      low_limb = high_limb >> cnt;
+    }
+  *rp = low_limb;
+
+  return retval;
+}
+
+\f
+/* MPN division interface. */
+mp_limb_t
+mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
+{
+  mp_limb_t r, p, m;
+  unsigned ul, uh;
+  unsigned ql, qh;
+
+  /* First, do a 2/1 inverse. */
+  /* The inverse m is defined as floor( (B^2 - 1 - u1)/u1 ), so that 0 <
+   * B^2 - (B + m) u1 <= u1 */
+  assert (u1 >= GMP_LIMB_HIGHBIT);
+
+  ul = u1 & GMP_LLIMB_MASK;
+  uh = u1 >> (GMP_LIMB_BITS / 2);
+
+  qh = ~u1 / uh;
+  r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
+
+  p = (mp_limb_t) qh * ul;
+  /* Adjustment steps taken from udiv_qrnnd_c */
+  if (r < p)
+    {
+      qh--;
+      r += u1;
+      if (r >= u1) /* i.e. we didn't get carry when adding to r */
+       if (r < p)
+         {
+           qh--;
+           r += u1;
+         }
+    }
+  r -= p;
+
+  /* Do a 3/2 division (with half limb size) */
+  p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
+  ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
+
+  /* By the 3/2 method, we don't need the high half limb. */
+  r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
+
+  if (r >= (p << (GMP_LIMB_BITS / 2)))
+    {
+      ql--;
+      r += u1;
+    }
+  m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql;
+  if (r >= u1)
+    {
+      m++;
+      r -= u1;
+    }
+
+  if (u0 > 0)
+    {
+      mp_limb_t th, tl;
+      r = ~r;
+      r += u0;
+      if (r < u0)
+       {
+         m--;
+         if (r >= u1)
+           {
+             m--;
+             r -= u1;
+           }
+         r -= u1;
+       }
+      gmp_umul_ppmm (th, tl, u0, m);
+      r += th;
+      if (r < th)
+       {
+         m--;
+         if (r > u1 || (r == u1 && tl > u0))
+           m--;
+       }
+    }
+
+  return m;
+}
+
+struct gmp_div_inverse
+{
+  /* Normalization shift count. */
+  unsigned shift;
+  /* Normalized divisor (d0 unused for mpn_div_qr_1) */
+  mp_limb_t d1, d0;
+  /* Inverse, for 2/1 or 3/2. */
+  mp_limb_t di;
+};
+
+static void
+mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d)
+{
+  unsigned shift;
+
+  assert (d > 0);
+  gmp_clz (shift, d);
+  inv->shift = shift;
+  inv->d1 = d << shift;
+  inv->di = mpn_invert_limb (inv->d1);
+}
+
+static void
+mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
+                    mp_limb_t d1, mp_limb_t d0)
+{
+  unsigned shift;
+
+  assert (d1 > 0);
+  gmp_clz (shift, d1);
+  inv->shift = shift;
+  if (shift > 0)
+    {
+      d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+      d0 <<= shift;
+    }
+  inv->d1 = d1;
+  inv->d0 = d0;
+  inv->di = mpn_invert_3by2 (d1, d0);
+}
+
+static void
+mpn_div_qr_invert (struct gmp_div_inverse *inv,
+                  mp_srcptr dp, mp_size_t dn)
+{
+  assert (dn > 0);
+
+  if (dn == 1)
+    mpn_div_qr_1_invert (inv, dp[0]);
+  else if (dn == 2)
+    mpn_div_qr_2_invert (inv, dp[1], dp[0]);
+  else
+    {
+      unsigned shift;
+      mp_limb_t d1, d0;
+
+      d1 = dp[dn-1];
+      d0 = dp[dn-2];
+      assert (d1 > 0);
+      gmp_clz (shift, d1);
+      inv->shift = shift;
+      if (shift > 0)
+       {
+         d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+         d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift));
+       }
+      inv->d1 = d1;
+      inv->d0 = d0;
+      inv->di = mpn_invert_3by2 (d1, d0);
+    }
+}
+
+/* Not matching current public gmp interface, rather corresponding to
+   the sbpi1_div_* functions. */
+static mp_limb_t
+mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
+                    const struct gmp_div_inverse *inv)
+{
+  mp_limb_t d, di;
+  mp_limb_t r;
+  mp_ptr tp = NULL;
+
+  if (inv->shift > 0)
+    {
+      tp = gmp_xalloc_limbs (nn);
+      r = mpn_lshift (tp, np, nn, inv->shift);
+      np = tp;
+    }
+  else
+    r = 0;
+
+  d = inv->d1;
+  di = inv->di;
+  while (nn-- > 0)
+    {
+      mp_limb_t q;
+
+      gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di);
+      if (qp)
+       qp[nn] = q;
+    }
+  if (inv->shift > 0)
+    gmp_free (tp);
+
+  return r >> inv->shift;
+}
+
+static mp_limb_t
+mpn_div_qr_1 (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_limb_t d)
+{
+  assert (d > 0);
+
+  /* Special case for powers of two. */
+  if (d > 1 && (d & (d-1)) == 0)
+    {
+      unsigned shift;
+      mp_limb_t r = np[0] & (d-1);
+      gmp_ctz (shift, d);
+      if (qp)
+       mpn_rshift (qp, np, nn, shift);
+
+      return r;
+    }
+  else
+    {
+      struct gmp_div_inverse inv;
+      mpn_div_qr_1_invert (&inv, d);
+      return mpn_div_qr_1_preinv (qp, np, nn, &inv);
+    }
+}
+
+static void
+mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+                    const struct gmp_div_inverse *inv)
+{
+  unsigned shift;
+  mp_size_t i;
+  mp_limb_t d1, d0, di, r1, r0;
+  mp_ptr tp;
+
+  assert (nn >= 2);
+  shift = inv->shift;
+  d1 = inv->d1;
+  d0 = inv->d0;
+  di = inv->di;
+
+  if (shift > 0)
+    {
+      tp = gmp_xalloc_limbs (nn);
+      r1 = mpn_lshift (tp, np, nn, shift);
+      np = tp;
+    }
+  else
+    r1 = 0;
+
+  r0 = np[nn - 1];
+
+  for (i = nn - 2; i >= 0; i--)
+    {
+      mp_limb_t n0, q;
+      n0 = np[i];
+      gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
+
+      if (qp)
+       qp[i] = q;
+    }
+
+  if (shift > 0)
+    {
+      assert ((r0 << (GMP_LIMB_BITS - shift)) == 0);
+      r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift));
+      r1 >>= shift;
+
+      gmp_free (tp);
+    }
+
+  rp[1] = r1;
+  rp[0] = r0;
+}
+
+#if 0
+static void
+mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+             mp_limb_t d1, mp_limb_t d0)
+{
+  struct gmp_div_inverse inv;
+  assert (nn >= 2);
+
+  mpn_div_qr_2_invert (&inv, d1, d0);
+  mpn_div_qr_2_preinv (qp, rp, np, nn, &inv);
+}
+#endif
+
+static void
+mpn_div_qr_pi1 (mp_ptr qp,
+               mp_ptr np, mp_size_t nn, mp_limb_t n1,
+               mp_srcptr dp, mp_size_t dn,
+               mp_limb_t dinv)
+{
+  mp_size_t i;
+
+  mp_limb_t d1, d0;
+  mp_limb_t cy, cy1;
+  mp_limb_t q;
+
+  assert (dn > 2);
+  assert (nn >= dn);
+
+  d1 = dp[dn - 1];
+  d0 = dp[dn - 2];
+
+  assert ((d1 & GMP_LIMB_HIGHBIT) != 0);
+  /* Iteration variable is the index of the q limb.
+   *
+   * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]>
+   * by            <d1,          d0,        dp[dn-3],  ..., dp[0] >
+   */
+
+  for (i = nn - dn; i >= 0; i--)
+    {
+      mp_limb_t n0 = np[dn-1+i];
+
+      if (n1 == d1 && n0 == d0)
+       {
+         q = GMP_LIMB_MAX;
+         mpn_submul_1 (np+i, dp, dn, q);
+         n1 = np[dn-1+i];      /* update n1, last loop's value will now be invalid */
+       }
+      else
+       {
+         gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv);
+
+         cy = mpn_submul_1 (np + i, dp, dn-2, q);
+
+         cy1 = n0 < cy;
+         n0 = n0 - cy;
+         cy = n1 < cy1;
+         n1 = n1 - cy1;
+         np[dn-2+i] = n0;
+
+         if (cy != 0)
+           {
+             n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1);
+             q--;
+           }
+       }
+
+      if (qp)
+       qp[i] = q;
+    }
+
+  np[dn - 1] = n1;
+}
+
+static void
+mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
+                  mp_srcptr dp, mp_size_t dn,
+                  const struct gmp_div_inverse *inv)
+{
+  assert (dn > 0);
+  assert (nn >= dn);
+
+  if (dn == 1)
+    np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv);
+  else if (dn == 2)
+    mpn_div_qr_2_preinv (qp, np, np, nn, inv);
+  else
+    {
+      mp_limb_t nh;
+      unsigned shift;
+
+      assert (inv->d1 == dp[dn-1]);
+      assert (inv->d0 == dp[dn-2]);
+      assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0);
+
+      shift = inv->shift;
+      if (shift > 0)
+       nh = mpn_lshift (np, np, nn, shift);
+      else
+       nh = 0;
+
+      mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di);
+
+      if (shift > 0)
+       gmp_assert_nocarry (mpn_rshift (np, np, dn, shift));
+    }
+}
+
+static void
+mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  struct gmp_div_inverse inv;
+  mp_ptr tp = NULL;
+
+  assert (dn > 0);
+  assert (nn >= dn);
+
+  mpn_div_qr_invert (&inv, dp, dn);
+  if (dn > 2 && inv.shift > 0)
+    {
+      tp = gmp_xalloc_limbs (dn);
+      gmp_assert_nocarry (mpn_lshift (tp, dp, dn, inv.shift));
+      dp = tp;
+    }
+  mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv);
+  if (tp)
+    gmp_free (tp);
+}
+
+\f
+/* MPN base conversion. */
+static unsigned
+mpn_base_power_of_two_p (unsigned b)
+{
+  switch (b)
+    {
+    case 2: return 1;
+    case 4: return 2;
+    case 8: return 3;
+    case 16: return 4;
+    case 32: return 5;
+    case 64: return 6;
+    case 128: return 7;
+    case 256: return 8;
+    default: return 0;
+    }
+}
+
+struct mpn_base_info
+{
+  /* bb is the largest power of the base which fits in one limb, and
+     exp is the corresponding exponent. */
+  unsigned exp;
+  mp_limb_t bb;
+};
+
+static void
+mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b)
+{
+  mp_limb_t m;
+  mp_limb_t p;
+  unsigned exp;
+
+  m = GMP_LIMB_MAX / b;
+  for (exp = 1, p = b; p <= m; exp++)
+    p *= b;
+
+  info->exp = exp;
+  info->bb = p;
+}
+
+static mp_bitcnt_t
+mpn_limb_size_in_base_2 (mp_limb_t u)
+{
+  unsigned shift;
+
+  assert (u > 0);
+  gmp_clz (shift, u);
+  return GMP_LIMB_BITS - shift;
+}
+
+static size_t
+mpn_get_str_bits (unsigned char *sp, unsigned bits, mp_srcptr up, mp_size_t un)
+{
+  unsigned char mask;
+  size_t sn, j;
+  mp_size_t i;
+  int shift;
+
+  sn = ((un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1])
+       + bits - 1) / bits;
+
+  mask = (1U << bits) - 1;
+
+  for (i = 0, j = sn, shift = 0; j-- > 0;)
+    {
+      unsigned char digit = up[i] >> shift;
+
+      shift += bits;
+
+      if (shift >= GMP_LIMB_BITS && ++i < un)
+       {
+         shift -= GMP_LIMB_BITS;
+         digit |= up[i] << (bits - shift);
+       }
+      sp[j] = digit & mask;
+    }
+  return sn;
+}
+
+/* We generate digits from the least significant end, and reverse at
+   the end. */
+static size_t
+mpn_limb_get_str (unsigned char *sp, mp_limb_t w,
+                 const struct gmp_div_inverse *binv)
+{
+  mp_size_t i;
+  for (i = 0; w > 0; i++)
+    {
+      mp_limb_t h, l, r;
+
+      h = w >> (GMP_LIMB_BITS - binv->shift);
+      l = w << binv->shift;
+
+      gmp_udiv_qrnnd_preinv (w, r, h, l, binv->d1, binv->di);
+      assert ( (r << (GMP_LIMB_BITS - binv->shift)) == 0);
+      r >>= binv->shift;
+
+      sp[i] = r;
+    }
+  return i;
+}
+
+static size_t
+mpn_get_str_other (unsigned char *sp,
+                  int base, const struct mpn_base_info *info,
+                  mp_ptr up, mp_size_t un)
+{
+  struct gmp_div_inverse binv;
+  size_t sn;
+  size_t i;
+
+  mpn_div_qr_1_invert (&binv, base);
+
+  sn = 0;
+
+  if (un > 1)
+    {
+      struct gmp_div_inverse bbinv;
+      mpn_div_qr_1_invert (&bbinv, info->bb);
+
+      do
+       {
+         mp_limb_t w;
+         size_t done;
+         w = mpn_div_qr_1_preinv (up, up, un, &bbinv);
+         un -= (up[un-1] == 0);
+         done = mpn_limb_get_str (sp + sn, w, &binv);
+
+         for (sn += done; done < info->exp; done++)
+           sp[sn++] = 0;
+       }
+      while (un > 1);
+    }
+  sn += mpn_limb_get_str (sp + sn, up[0], &binv);
+
+  /* Reverse order */
+  for (i = 0; 2*i + 1 < sn; i++)
+    {
+      unsigned char t = sp[i];
+      sp[i] = sp[sn - i - 1];
+      sp[sn - i - 1] = t;
+    }
+
+  return sn;
+}
+
+size_t
+mpn_get_str (unsigned char *sp, int base, mp_ptr up, mp_size_t un)
+{
+  unsigned bits;
+
+  assert (un > 0);
+  assert (up[un-1] > 0);
+
+  bits = mpn_base_power_of_two_p (base);
+  if (bits)
+    return mpn_get_str_bits (sp, bits, up, un);
+  else
+    {
+      struct mpn_base_info info;
+
+      mpn_get_base_info (&info, base);
+      return mpn_get_str_other (sp, base, &info, up, un);
+    }
+}
+
+static mp_size_t
+mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
+                 unsigned bits)
+{
+  mp_size_t rn;
+  size_t j;
+  unsigned shift;
+
+  for (j = sn, rn = 0, shift = 0; j-- > 0; )
+    {
+      if (shift == 0)
+       {
+         rp[rn++] = sp[j];
+         shift += bits;
+       }
+      else
+       {
+         rp[rn-1] |= (mp_limb_t) sp[j] << shift;
+         shift += bits;
+         if (shift >= GMP_LIMB_BITS)
+           {
+             shift -= GMP_LIMB_BITS;
+             if (shift > 0)
+               rp[rn++] = (mp_limb_t) sp[j] >> (bits - shift);
+           }
+       }
+    }
+  rn = mpn_normalized_size (rp, rn);
+  return rn;
+}
+
+static mp_size_t
+mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
+                  mp_limb_t b, const struct mpn_base_info *info)
+{
+  mp_size_t rn;
+  mp_limb_t w;
+  unsigned first;
+  unsigned k;
+  size_t j;
+
+  first = 1 + (sn - 1) % info->exp;
+
+  j = 0;
+  w = sp[j++];
+  for (k = 1; k < first; k++)
+    w = w * b + sp[j++];
+
+  rp[0] = w;
+
+  for (rn = (w > 0); j < sn;)
+    {
+      mp_limb_t cy;
+
+      w = sp[j++];
+      for (k = 1; k < info->exp; k++)
+       w = w * b + sp[j++];
+
+      cy = mpn_mul_1 (rp, rp, rn, info->bb);
+      cy += mpn_add_1 (rp, rp, rn, w);
+      if (cy > 0)
+       rp[rn++] = cy;
+    }
+  assert (j == sn);
+
+  return rn;
+}
+
+mp_size_t
+mpn_set_str (mp_ptr rp, const unsigned char *sp, size_t sn, int base)
+{
+  unsigned bits;
+
+  if (sn == 0)
+    return 0;
+
+  bits = mpn_base_power_of_two_p (base);
+  if (bits)
+    return mpn_set_str_bits (rp, sp, sn, bits);
+  else
+    {
+      struct mpn_base_info info;
+
+      mpn_get_base_info (&info, base);
+      return mpn_set_str_other (rp, sp, sn, base, &info);
+    }
+}
+
+\f
+/* MPZ interface */
+void
+mpz_init (mpz_t r)
+{
+  r->_mp_alloc = 1;
+  r->_mp_size = 0;
+  r->_mp_d = gmp_xalloc_limbs (1);
+}
+
+/* The utility of this function is a bit limited, since many functions
+   assings the result variable using mpz_swap. */
+void
+mpz_init2 (mpz_t r, mp_bitcnt_t bits)
+{
+  mp_size_t rn;
+
+  bits -= (bits != 0);         /* Round down, except if 0 */
+  rn = 1 + bits / GMP_LIMB_BITS;
+
+  r->_mp_alloc = rn;
+  r->_mp_size = 0;
+  r->_mp_d = gmp_xalloc_limbs (rn);
+}
+
+void
+mpz_clear (mpz_t r)
+{
+  gmp_free (r->_mp_d);
+}
+
+static void *
+mpz_realloc (mpz_t r, mp_size_t size)
+{
+  size = GMP_MAX (size, 1);
+
+  r->_mp_d = gmp_xrealloc_limbs (r->_mp_d, size);
+  r->_mp_alloc = size;
+
+  if (GMP_ABS (r->_mp_size) > size)
+    r->_mp_size = 0;
+
+  return r->_mp_d;
+}
+
+/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
+#define MPZ_REALLOC(z,n) ((n) > (z)->_mp_alloc                 \
+                         ? mpz_realloc(z,n)                    \
+                         : (z)->_mp_d)
+\f
+/* MPZ assignment and basic conversions. */
+void
+mpz_set_si (mpz_t r, signed long int x)
+{
+  if (x >= 0)
+    mpz_set_ui (r, x);
+  else /* (x < 0) */
+    {
+      r->_mp_size = -1;
+      r->_mp_d[0] = GMP_NEG_CAST (unsigned long int, x);
+    }
+}
+
+void
+mpz_set_ui (mpz_t r, unsigned long int x)
+{
+  if (x > 0)
+    {
+      r->_mp_size = 1;
+      r->_mp_d[0] = x;
+    }
+  else
+    r->_mp_size = 0;
+}
+
+void
+mpz_set (mpz_t r, const mpz_t x)
+{
+  /* Allow the NOP r == x */
+  if (r != x)
+    {
+      mp_size_t n;
+      mp_ptr rp;
+
+      n = GMP_ABS (x->_mp_size);
+      rp = MPZ_REALLOC (r, n);
+
+      mpn_copyi (rp, x->_mp_d, n);
+      r->_mp_size = x->_mp_size;
+    }
+}
+
+void
+mpz_init_set_si (mpz_t r, signed long int x)
+{
+  mpz_init (r);
+  mpz_set_si (r, x);
+}
+
+void
+mpz_init_set_ui (mpz_t r, unsigned long int x)
+{
+  mpz_init (r);
+  mpz_set_ui (r, x);
+}
+
+void
+mpz_init_set (mpz_t r, const mpz_t x)
+{
+  mpz_init (r);
+  mpz_set (r, x);
+}
+
+int
+mpz_fits_slong_p (const mpz_t u)
+{
+  mp_size_t us = u->_mp_size;
+
+  if (us == 0)
+    return 1;
+  else if (us == 1)
+    return u->_mp_d[0] < GMP_LIMB_HIGHBIT;
+  else if (us == -1)
+    return u->_mp_d[0] <= GMP_LIMB_HIGHBIT;
+  else
+    return 0;
+}
+
+int
+mpz_fits_ulong_p (const mpz_t u)
+{
+  mp_size_t us = u->_mp_size;
+
+  return us == 0 || us == 1;
+}
+
+long int
+mpz_get_si (const mpz_t u)
+{
+  mp_size_t us = u->_mp_size;
+
+  if (us > 0)
+    return (long) (u->_mp_d[0] & ~GMP_LIMB_HIGHBIT);
+  else if (us < 0)
+    return (long) (- u->_mp_d[0] | GMP_LIMB_HIGHBIT);
+  else
+    return 0;
+}
+
+unsigned long int
+mpz_get_ui (const mpz_t u)
+{
+  return u->_mp_size == 0 ? 0 : u->_mp_d[0];
+}
+
+size_t
+mpz_size (const mpz_t u)
+{
+  return GMP_ABS (u->_mp_size);
+}
+
+mp_limb_t
+mpz_getlimbn (const mpz_t u, mp_size_t n)
+{
+  if (n >= 0 && n < GMP_ABS (u->_mp_size))
+    return u->_mp_d[n];
+  else
+    return 0;
+}
+
+\f
+/* Conversions and comparison to double. */
+void
+mpz_set_d (mpz_t r, double x)
+{
+  int sign;
+  mp_ptr rp;
+  mp_size_t rn, i;
+  double B;
+  double Bi;
+  mp_limb_t f;
+
+  /* x != x is true when x is a NaN, and x == x * 0.5 is true when x is
+     zero or infinity. */
+  if (x == 0.0 || x != x || x == x * 0.5)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  if (x < 0.0)
+    {
+      x = - x;
+      sign = 1;
+    }
+  else
+    sign = 0;
+
+  if (x < 1.0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+  B = 2.0 * (double) GMP_LIMB_HIGHBIT;
+  Bi = 1.0 / B;
+  for (rn = 1; x >= B; rn++)
+    x *= Bi;
+
+  rp = MPZ_REALLOC (r, rn);
+
+  f = (mp_limb_t) x;
+  x -= f;
+  assert (x < 1.0);
+  rp[rn-1] = f;
+  for (i = rn-1; i-- > 0; )
+    {
+      x = B * x;
+      f = (mp_limb_t) x;
+      x -= f;
+      assert (x < 1.0);
+      rp[i] = f;
+    }
+
+  r->_mp_size = sign ? - rn : rn;
+}
+
+void
+mpz_init_set_d (mpz_t r, double x)
+{
+  mpz_init (r);
+  mpz_set_d (r, x);
+}
+
+double
+mpz_get_d (const mpz_t u)
+{
+  mp_size_t un;
+  double x;
+  double B = 2.0 * (double) GMP_LIMB_HIGHBIT;
+
+  un = GMP_ABS (u->_mp_size);
+
+  if (un == 0)
+    return 0.0;
+
+  x = u->_mp_d[--un];
+  while (un > 0)
+    x = B*x + u->_mp_d[--un];
+
+  if (u->_mp_size < 0)
+    x = -x;
+
+  return x;
+}
+
+int
+mpz_cmpabs_d (const mpz_t x, double d)
+{
+  mp_size_t xn;
+  double B, Bi;
+  mp_size_t i;
+
+  xn = x->_mp_size;
+  d = GMP_ABS (d);
+
+  if (xn != 0)
+    {
+      xn = GMP_ABS (xn);
+
+      B = 2.0 * (double) GMP_LIMB_HIGHBIT;
+      Bi = 1.0 / B;
+
+      /* Scale d so it can be compared with the top limb. */
+      for (i = 1; i < xn; i++)
+       d *= Bi;
+
+      if (d >= B)
+       return -1;
+
+      /* Compare floor(d) to top limb, subtract and cancel when equal. */
+      for (i = xn; i-- > 0;)
+       {
+         mp_limb_t f, xl;
+
+         f = (mp_limb_t) d;
+         xl = x->_mp_d[i];
+         if (xl > f)
+           return 1;
+         else if (xl < f)
+           return -1;
+         d = B * (d - f);
+       }
+    }
+  return - (d > 0.0);
+}
+
+int
+mpz_cmp_d (const mpz_t x, double d)
+{
+  if (x->_mp_size < 0)
+    {
+      if (d >= 0.0)
+       return -1;
+      else
+       return -mpz_cmpabs_d (x, d);
+    }
+  else
+    {
+      if (d < 0.0)
+       return 1;
+      else
+       return mpz_cmpabs_d (x, d);
+    }
+}
+
+\f
+/* MPZ comparisons and the like. */
+int
+mpz_sgn (const mpz_t u)
+{
+  mp_size_t usize = u->_mp_size;
+
+  if (usize > 0)
+    return 1;
+  else if (usize < 0)
+    return -1;
+  else
+    return 0;
+}
+
+int
+mpz_cmp_si (const mpz_t u, long v)
+{
+  mp_size_t usize = u->_mp_size;
+
+  if (usize < -1)
+    return -1;
+  else if (v >= 0)
+    return mpz_cmp_ui (u, v);
+  else if (usize >= 0)
+    return 1;
+  else /* usize == -1 */
+    {
+      mp_limb_t ul = u->_mp_d[0];
+      if ((mp_limb_t)GMP_NEG_CAST (unsigned long int, v) < ul)
+       return -1;
+      else if ( (mp_limb_t)GMP_NEG_CAST (unsigned long int, v) > ul)
+       return 1;
+    }
+  return 0;
+}
+
+int
+mpz_cmp_ui (const mpz_t u, unsigned long v)
+{
+  mp_size_t usize = u->_mp_size;
+
+  if (usize > 1)
+    return 1;
+  else if (usize < 0)
+    return -1;
+  else
+    {
+      mp_limb_t ul = (usize > 0) ? u->_mp_d[0] : 0;
+      if (ul > v)
+       return 1;
+      else if (ul < v)
+       return -1;
+    }
+  return 0;
+}
+
+int
+mpz_cmp (const mpz_t a, const mpz_t b)
+{
+  mp_size_t asize = a->_mp_size;
+  mp_size_t bsize = b->_mp_size;
+
+  if (asize > bsize)
+    return 1;
+  else if (asize < bsize)
+    return -1;
+  else if (asize > 0)
+    return mpn_cmp (a->_mp_d, b->_mp_d, asize);
+  else if (asize < 0)
+    return -mpn_cmp (a->_mp_d, b->_mp_d, -asize);
+  else
+    return 0;
+}
+
+int
+mpz_cmpabs_ui (const mpz_t u, unsigned long v)
+{
+  mp_size_t un = GMP_ABS (u->_mp_size);
+  mp_limb_t ul;
+
+  if (un > 1)
+    return 1;
+
+  ul = (un == 1) ? u->_mp_d[0] : 0;
+
+  if (ul > v)
+    return 1;
+  else if (ul < v)
+    return -1;
+  else
+    return 0;
+}
+
+int
+mpz_cmpabs (const mpz_t u, const mpz_t v)
+{
+  return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size),
+                  v->_mp_d, GMP_ABS (v->_mp_size));
+}
+
+void
+mpz_abs (mpz_t r, const mpz_t u)
+{
+  if (r != u)
+    mpz_set (r, u);
+
+  r->_mp_size = GMP_ABS (r->_mp_size);
+}
+
+void
+mpz_neg (mpz_t r, const mpz_t u)
+{
+  if (r != u)
+    mpz_set (r, u);
+
+  r->_mp_size = -r->_mp_size;
+}
+
+void
+mpz_swap (mpz_t u, mpz_t v)
+{
+  MP_SIZE_T_SWAP (u->_mp_size, v->_mp_size);
+  MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc);
+  MP_PTR_SWAP (u->_mp_d, v->_mp_d);
+}
+
+\f
+/* MPZ addition and subtraction */
+
+/* Adds to the absolute value. Returns new size, but doesn't store it. */
+static mp_size_t
+mpz_abs_add_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  mp_size_t an;
+  mp_ptr rp;
+  mp_limb_t cy;
+
+  an = GMP_ABS (a->_mp_size);
+  if (an == 0)
+    {
+      r->_mp_d[0] = b;
+      return b > 0;
+    }
+
+  rp = MPZ_REALLOC (r, an + 1);
+
+  cy = mpn_add_1 (rp, a->_mp_d, an, b);
+  rp[an] = cy;
+  an += (cy > 0);
+
+  return an;
+}
+
+/* Subtract from the absolute value. Returns new size, (or -1 on underflow),
+   but doesn't store it. */
+static mp_size_t
+mpz_abs_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  mp_size_t an = GMP_ABS (a->_mp_size);
+  mp_ptr rp = MPZ_REALLOC (r, an);
+
+  if (an == 0)
+    {
+      rp[0] = b;
+      return -(b > 0);
+    }
+  else if (an == 1 && a->_mp_d[0] < b)
+    {
+      rp[0] = b - a->_mp_d[0];
+      return -1;
+    }
+  else
+    {
+      gmp_assert_nocarry (mpn_sub_1 (rp, a->_mp_d, an, b));
+      return mpn_normalized_size (rp, an);
+    }
+}
+
+void
+mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  if (a->_mp_size >= 0)
+    r->_mp_size = mpz_abs_add_ui (r, a, b);
+  else
+    r->_mp_size = -mpz_abs_sub_ui (r, a, b);
+}
+
+void
+mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+  if (a->_mp_size < 0)
+    r->_mp_size = -mpz_abs_add_ui (r, a, b);
+  else
+    r->_mp_size = mpz_abs_sub_ui (r, a, b);
+}
+
+void
+mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b)
+{
+  if (b->_mp_size < 0)
+    r->_mp_size = mpz_abs_add_ui (r, b, a);
+  else
+    r->_mp_size = -mpz_abs_sub_ui (r, b, a);
+}
+
+static mp_size_t
+mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t an = GMP_ABS (a->_mp_size);
+  mp_size_t bn = GMP_ABS (b->_mp_size);
+  mp_size_t rn;
+  mp_ptr rp;
+  mp_limb_t cy;
+
+  rn = GMP_MAX (an, bn);
+  rp = MPZ_REALLOC (r, rn + 1);
+  if (an >= bn)
+    cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn);
+  else
+    cy = mpn_add (rp, b->_mp_d, bn, a->_mp_d, an);
+
+  rp[rn] = cy;
+
+  return rn + (cy > 0);
+}
+
+static mp_size_t
+mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t an = GMP_ABS (a->_mp_size);
+  mp_size_t bn = GMP_ABS (b->_mp_size);
+  int cmp;
+  mp_ptr rp;
+
+  cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn);
+  if (cmp > 0)
+    {
+      rp = MPZ_REALLOC (r, an);
+      gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn));
+      return mpn_normalized_size (rp, an);
+    }
+  else if (cmp < 0)
+    {
+      rp = MPZ_REALLOC (r, bn);
+      gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an));
+      return -mpn_normalized_size (rp, bn);
+    }
+  else
+    return 0;
+}
+
+void
+mpz_add (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t rn;
+
+  if ( (a->_mp_size ^ b->_mp_size) >= 0)
+    rn = mpz_abs_add (r, a, b);
+  else
+    rn = mpz_abs_sub (r, a, b);
+
+  r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
+}
+
+void
+mpz_sub (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  mp_size_t rn;
+
+  if ( (a->_mp_size ^ b->_mp_size) >= 0)
+    rn = mpz_abs_sub (r, a, b);
+  else
+    rn = mpz_abs_add (r, a, b);
+
+  r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
+}
+
+\f
+/* MPZ multiplication */
+void
+mpz_mul_si (mpz_t r, const mpz_t u, long int v)
+{
+  if (v < 0)
+    {
+      mpz_mul_ui (r, u, GMP_NEG_CAST (unsigned long int, v));
+      mpz_neg (r, r);
+    }
+  else
+    mpz_mul_ui (r, u, (unsigned long int) v);
+}
+
+void
+mpz_mul_ui (mpz_t r, const mpz_t u, unsigned long int v)
+{
+  mp_size_t un;
+  mpz_t t;
+  mp_ptr tp;
+  mp_limb_t cy;
+
+  un = GMP_ABS (u->_mp_size);
+
+  if (un == 0 || v == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  mpz_init2 (t, (un + 1) * GMP_LIMB_BITS);
+
+  tp = t->_mp_d;
+  cy = mpn_mul_1 (tp, u->_mp_d, un, v);
+  tp[un] = cy;
+
+  t->_mp_size = un + (cy > 0);
+  if (u->_mp_size < 0)
+    t->_mp_size = - t->_mp_size;
+
+  mpz_swap (r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_mul (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  int sign;
+  mp_size_t un, vn, rn;
+  mpz_t t;
+  mp_ptr tp;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+
+  if (un == 0 || vn == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  sign = (u->_mp_size ^ v->_mp_size) < 0;
+
+  mpz_init2 (t, (un + vn) * GMP_LIMB_BITS);
+
+  tp = t->_mp_d;
+  if (un >= vn)
+    mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn);
+  else
+    mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un);
+
+  rn = un + vn;
+  rn -= tp[rn-1] == 0;
+
+  t->_mp_size = sign ? - rn : rn;
+  mpz_swap (r, t);
+  mpz_clear (t);
+}
+
+void
+mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits)
+{
+  mp_size_t un, rn;
+  mp_size_t limbs;
+  unsigned shift;
+  mp_ptr rp;
+
+  un = GMP_ABS (u->_mp_size);
+  if (un == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  limbs = bits / GMP_LIMB_BITS;
+  shift = bits % GMP_LIMB_BITS;
+
+  rn = un + limbs + (shift > 0);
+  rp = MPZ_REALLOC (r, rn);
+  if (shift > 0)
+    {
+      mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift);
+      rp[rn-1] = cy;
+      rn -= (cy == 0);
+    }
+  else
+    mpn_copyd (rp + limbs, u->_mp_d, un);
+
+  while (limbs > 0)
+    rp[--limbs] = 0;
+
+  r->_mp_size = (u->_mp_size < 0) ? - rn : rn;
+}
+
+\f
+/* MPZ division */
+enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC };
+
+/* Allows q or r to be zero. Returns 1 iff remainder is non-zero. */
+static int
+mpz_div_qr (mpz_t q, mpz_t r,
+           const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode)
+{
+  mp_size_t ns, ds, nn, dn, qs;
+  ns = n->_mp_size;
+  ds = d->_mp_size;
+
+  if (ds == 0)
+    gmp_die("mpz_div_qr: Divide by zero.");
+
+  if (ns == 0)
+    {
+      if (q)
+       q->_mp_size = 0;
+      if (r)
+       r->_mp_size = 0;
+      return 0;
+    }
+
+  nn = GMP_ABS (ns);
+  dn = GMP_ABS (ds);
+
+  qs = ds ^ ns;
+
+  if (nn < dn)
+    {
+      if (mode == GMP_DIV_CEIL && qs >= 0)
+       {
+         /* q = 1, r = n - d */
+         if (r)
+           mpz_sub (r, n, d);
+         if (q)
+           mpz_set_ui (q, 1);
+       }
+      else if (mode == GMP_DIV_FLOOR && qs < 0)
+       {
+         /* q = -1, r = n + d */
+         if (r)
+           mpz_add (r, n, d);
+         if (q)
+           mpz_set_si (q, -1);
+       }
+      else
+       {
+         /* q = 0, r = d */
+         if (r)
+           mpz_set (r, n);
+         if (q)
+           q->_mp_size = 0;
+       }
+      return 1;
+    }
+  else
+    {
+      mp_ptr np, qp;
+      mp_size_t qn, rn;
+      mpz_t tq, tr;
+
+      mpz_init (tr);
+      mpz_set (tr, n);
+      np = tr->_mp_d;
+
+      qn = nn - dn + 1;
+
+      if (q)
+       {
+         mpz_init2 (tq, qn * GMP_LIMB_BITS);
+         qp = tq->_mp_d;
+       }
+      else
+       qp = NULL;
+
+      mpn_div_qr (qp, np, nn, d->_mp_d, dn);
+
+      if (qp)
+       {
+         qn -= (qp[qn-1] == 0);
+
+         tq->_mp_size = qs < 0 ? -qn : qn;
+       }
+      rn = mpn_normalized_size (np, dn);
+      tr->_mp_size = ns < 0 ? - rn : rn;
+
+      if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0)
+       {
+         if (q)
+           mpz_sub_ui (tq, tq, 1);
+         if (r)
+           mpz_add (tr, tr, d);
+       }
+      else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0)
+       {
+         if (q)
+           mpz_add_ui (tq, tq, 1);
+         if (r)
+           mpz_sub (tr, tr, d);
+       }
+
+      if (q)
+       {
+         mpz_swap (tq, q);
+         mpz_clear (tq);
+       }
+      if (r)
+       mpz_swap (tr, r);
+
+      mpz_clear (tr);
+
+      return rn != 0;
+    }
+}
+
+void
+mpz_cdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, r, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, NULL, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  mpz_div_qr (NULL, r, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_mod (mpz_t r, const mpz_t n, const mpz_t d)
+{
+  if (d->_mp_size >= 0)
+    mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR);
+  else
+    mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL);
+}
+
+static void
+mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
+               enum mpz_div_round_mode mode)
+{
+  mp_size_t un, qn;
+  mp_size_t limb_cnt;
+  mp_ptr qp;
+  mp_limb_t adjust;
+
+  un = u->_mp_size;
+  if (un == 0)
+    {
+      q->_mp_size = 0;
+      return;
+    }
+  limb_cnt = bit_index / GMP_LIMB_BITS;
+  qn = GMP_ABS (un) - limb_cnt;
+  bit_index %= GMP_LIMB_BITS;
+
+  if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */
+    /* Note: Below, the final indexing at limb_cnt is valid because at
+       that point we have qn > 0. */
+    adjust = (qn <= 0
+             || !mpn_zero_p (u->_mp_d, limb_cnt)
+             || (u->_mp_d[limb_cnt]
+                 & (((mp_limb_t) 1 << bit_index) - 1)));
+  else
+    adjust = 0;
+
+  if (qn <= 0)
+    qn = 0;
+
+  else
+    {
+      qp = MPZ_REALLOC (q, qn);
+
+      if (bit_index != 0)
+       {
+         mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index);
+         qn -= qp[qn - 1] == 0;
+       }
+      else
+       {
+         mpn_copyi (qp, u->_mp_d + limb_cnt, qn);
+       }
+    }
+
+  q->_mp_size = qn;
+
+  mpz_add_ui (q, q, adjust);
+  if (un < 0)
+    mpz_neg (q, q);
+}
+
+static void
+mpz_div_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bit_index,
+               enum mpz_div_round_mode mode)
+{
+  mp_size_t us, un, rn;
+  mp_ptr rp;
+  mp_limb_t mask;
+
+  us = u->_mp_size;
+  if (us == 0 || bit_index == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+  rn = (bit_index + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
+  assert (rn > 0);
+
+  rp = MPZ_REALLOC (r, rn);
+  un = GMP_ABS (us);
+
+  mask = GMP_LIMB_MAX >> (rn * GMP_LIMB_BITS - bit_index);
+
+  if (rn > un)
+    {
+      /* Quotient (with truncation) is zero, and remainder is
+        non-zero */
+      if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
+       {
+         /* Have to negate and sign extend. */
+         mp_size_t i;
+         mp_limb_t cy;
+
+         for (cy = 1, i = 0; i < un; i++)
+           {
+             mp_limb_t s = ~u->_mp_d[i] + cy;
+             cy = s < cy;
+             rp[i] = s;
+           }
+         assert (cy == 0);
+         for (; i < rn - 1; i++)
+           rp[i] = GMP_LIMB_MAX;
+
+         rp[rn-1] = mask;
+         us = -us;
+       }
+      else
+       {
+         /* Just copy */
+         if (r != u)
+           mpn_copyi (rp, u->_mp_d, un);
+
+         rn = un;
+       }
+    }
+  else
+    {
+      if (r != u)
+       mpn_copyi (rp, u->_mp_d, rn - 1);
+
+      rp[rn-1] = u->_mp_d[rn-1] & mask;
+
+      if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
+       {
+         /* If r != 0, compute 2^{bit_count} - r. */
+         mp_size_t i;
+
+         for (i = 0; i < rn && rp[i] == 0; i++)
+           ;
+         if (i < rn)
+           {
+             /* r > 0, need to flip sign. */
+             rp[i] = ~rp[i] + 1;
+             for (i++; i < rn; i++)
+               rp[i] = ~rp[i];
+
+             rp[rn-1] &= mask;
+
+             /* us is not used for anything else, so we can modify it
+                here to indicate flipped sign. */
+             us = -us;
+           }
+       }
+    }
+  rn = mpn_normalized_size (rp, rn);
+  r->_mp_size = us < 0 ? -rn : rn;
+}
+
+void
+mpz_cdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_q_2exp (r, u, cnt, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_q_2exp (r, u, cnt, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_r_2exp (r, u, cnt, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_r_2exp (r, u, cnt, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+  mpz_div_r_2exp (r, u, cnt, GMP_DIV_TRUNC);
+}
+
+void
+mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d)
+{
+  gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC));
+}
+
+int
+mpz_divisible_p (const mpz_t n, const mpz_t d)
+{
+  return mpz_div_qr (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
+}
+
+static unsigned long
+mpz_div_qr_ui (mpz_t q, mpz_t r,
+              const mpz_t n, unsigned long d, enum mpz_div_round_mode mode)
+{
+  mp_size_t ns, qn;
+  mp_ptr qp;
+  mp_limb_t rl;
+  mp_size_t rs;
+
+  ns = n->_mp_size;
+  if (ns == 0)
+    {
+      if (q)
+       q->_mp_size = 0;
+      if (r)
+       r->_mp_size = 0;
+      return 0;
+    }
+
+  qn = GMP_ABS (ns);
+  if (q)
+    qp = MPZ_REALLOC (q, qn);
+  else
+    qp = NULL;
+
+  rl = mpn_div_qr_1 (qp, n->_mp_d, qn, d);
+  assert (rl < d);
+
+  rs = rl > 0;
+  rs = (ns < 0) ? -rs : rs;
+
+  if (rl > 0 && ( (mode == GMP_DIV_FLOOR && ns < 0)
+                 || (mode == GMP_DIV_CEIL && ns >= 0)))
+    {
+      if (q)
+       gmp_assert_nocarry (mpn_add_1 (qp, qp, qn, 1));
+      rl = d - rl;
+      rs = -rs;
+    }
+
+  if (r)
+    {
+      r->_mp_d[0] = rl;
+      r->_mp_size = rs;
+    }
+  if (q)
+    {
+      qn -= (qp[qn-1] == 0);
+      assert (qn == 0 || qp[qn-1] > 0);
+
+      q->_mp_size = (ns < 0) ? - qn : qn;
+    }
+
+  return rl;
+}
+
+unsigned long
+mpz_cdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_CEIL);
+}
+unsigned long
+mpz_fdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+unsigned long
+mpz_tdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_ui (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_ui (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_ui (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_mod_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_divexact_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+  gmp_assert_nocarry (mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC));
+}
+
+int
+mpz_divisible_ui_p (const mpz_t n, unsigned long d)
+{
+  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
+}
+
+\f
+/* GCD */
+static mp_limb_t
+mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
+{
+  unsigned shift;
+
+  assert ( (u | v) > 0);
+
+  if (u == 0)
+    return v;
+  else if (v == 0)
+    return u;
+
+  gmp_ctz (shift, u | v);
+
+  u >>= shift;
+  v >>= shift;
+
+  if ( (u & 1) == 0)
+    MP_LIMB_T_SWAP (u, v);
+
+  while ( (v & 1) == 0)
+    v >>= 1;
+
+  while (u != v)
+    {
+      if (u > v)
+       {
+         u -= v;
+         do
+           u >>= 1;
+         while ( (u & 1) == 0);
+       }
+      else
+       {
+         v -= u;
+         do
+           v >>= 1;
+         while ( (v & 1) == 0);
+       }
+    }
+  return u << shift;
+}
+
+unsigned long
+mpz_gcd_ui (mpz_t g, const mpz_t u, unsigned long v)
+{
+  mp_size_t un;
+
+  if (v == 0)
+    {
+      if (g)
+       mpz_abs (g, u);
+    }
+  else
+    {
+      un = GMP_ABS (u->_mp_size);
+      if (un != 0)
+       v = mpn_gcd_11 (mpn_div_qr_1 (NULL, u->_mp_d, un, v), v);
+
+      if (g)
+       mpz_set_ui (g, v);
+    }
+
+  return v;
+}
+
+static mp_bitcnt_t
+mpz_make_odd (mpz_t r, const mpz_t u)
+{
+  mp_size_t un, rn, i;
+  mp_ptr rp;
+  unsigned shift;
+
+  un = GMP_ABS (u->_mp_size);
+  assert (un > 0);
+
+  for (i = 0; u->_mp_d[i] == 0; i++)
+    ;
+
+  gmp_ctz (shift, u->_mp_d[i]);
+
+  rn = un - i;
+  rp = MPZ_REALLOC (r, rn);
+  if (shift > 0)
+    {
+      mpn_rshift (rp, u->_mp_d + i, rn, shift);
+      rn -= (rp[rn-1] == 0);
+    }
+  else
+    mpn_copyi (rp, u->_mp_d + i, rn);
+
+  r->_mp_size = rn;
+  return i * GMP_LIMB_BITS + shift;
+}
+
+void
+mpz_gcd (mpz_t g, const mpz_t u, const mpz_t v)
+{
+  mpz_t tu, tv;
+  mp_bitcnt_t uz, vz, gz;
+
+  if (u->_mp_size == 0)
+    {
+      mpz_abs (g, v);
+      return;
+    }
+  if (v->_mp_size == 0)
+    {
+      mpz_abs (g, u);
+      return;
+    }
+
+  mpz_init (tu);
+  mpz_init (tv);
+
+  uz = mpz_make_odd (tu, u);
+  vz = mpz_make_odd (tv, v);
+  gz = GMP_MIN (uz, vz);
+
+  if (tu->_mp_size < tv->_mp_size)
+    mpz_swap (tu, tv);
+
+  mpz_tdiv_r (tu, tu, tv);
+  if (tu->_mp_size == 0)
+    {
+      mpz_swap (g, tv);
+    }
+  else
+    for (;;)
+      {
+       int c;
+
+       mpz_make_odd (tu, tu);
+       c = mpz_cmp (tu, tv);
+       if (c == 0)
+         {
+           mpz_swap (g, tu);
+           break;
+         }
+       if (c < 0)
+         mpz_swap (tu, tv);
+
+       if (tv->_mp_size == 1)
+         {
+           mp_limb_t vl = tv->_mp_d[0];
+           mp_limb_t ul = mpz_tdiv_ui (tu, vl);
+           mpz_set_ui (g, mpn_gcd_11 (ul, vl));
+           break;
+         }
+       mpz_sub (tu, tu, tv);
+      }
+  mpz_clear (tu);
+  mpz_clear (tv);
+  mpz_mul_2exp (g, g, gz);
+}
+
+void
+mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v)
+{
+  mpz_t tu, tv, s0, s1, t0, t1;
+  mp_bitcnt_t uz, vz, gz;
+  mp_bitcnt_t power;
+
+  if (u->_mp_size == 0)
+    {
+      /* g = 0 u + sgn(v) v */
+      signed long sign = mpz_sgn (v);
+      mpz_abs (g, v);
+      if (s)
+       mpz_set_ui (s, 0);
+      if (t)
+       mpz_set_si (t, sign);
+      return;
+    }
+
+  if (v->_mp_size == 0)
+    {
+      /* g = sgn(u) u + 0 v */
+      signed long sign = mpz_sgn (u);
+      mpz_abs (g, u);
+      if (s)
+       mpz_set_si (s, sign);
+      if (t)
+       mpz_set_ui (t, 0);
+      return;
+    }
+
+  mpz_init (tu);
+  mpz_init (tv);
+  mpz_init (s0);
+  mpz_init (s1);
+  mpz_init (t0);
+  mpz_init (t1);
+
+  uz = mpz_make_odd (tu, u);
+  vz = mpz_make_odd (tv, v);
+  gz = GMP_MIN (uz, vz);
+
+  uz -= gz;
+  vz -= gz;
+
+  /* Cofactors corresponding to odd gcd. gz handled later. */
+  if (tu->_mp_size < tv->_mp_size)
+    {
+      mpz_swap (tu, tv);
+      MPZ_SRCPTR_SWAP (u, v);
+      MPZ_PTR_SWAP (s, t);
+      MP_BITCNT_T_SWAP (uz, vz);
+    }
+
+  /* Maintain
+   *
+   * u = t0 tu + t1 tv
+   * v = s0 tu + s1 tv
+   *
+   * where u and v denote the inputs with common factors of two
+   * eliminated, and det (s0, t0; s1, t1) = 2^p. Then
+   *
+   * 2^p tu =  s1 u - t1 v
+   * 2^p tv = -s0 u + t0 v
+   */
+
+  /* After initial division, tu = q tv + tu', we have
+   *
+   * u = 2^uz (tu' + q tv)
+   * v = 2^vz tv
+   *
+   * or
+   *
+   * t0 = 2^uz, t1 = 2^uz q
+   * s0 = 0,    s1 = 2^vz
+   */
+
+  mpz_setbit (t0, uz);
+  mpz_tdiv_qr (t1, tu, tu, tv);
+  mpz_mul_2exp (t1, t1, uz);
+
+  mpz_setbit (s1, vz);
+  power = uz + vz;
+
+  if (tu->_mp_size > 0)
+    {
+      mp_bitcnt_t shift;
+      shift = mpz_make_odd (tu, tu);
+      mpz_mul_2exp (t0, t0, shift);
+      mpz_mul_2exp (s0, s0, shift);
+      power += shift;
+
+      for (;;)
+       {
+         int c;
+         c = mpz_cmp (tu, tv);
+         if (c == 0)
+           break;
+
+         if (c < 0)
+           {
+             /* tv = tv' + tu
+              *
+              * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv'
+              * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */
+
+             mpz_sub (tv, tv, tu);
+             mpz_add (t0, t0, t1);
+             mpz_add (s0, s0, s1);
+
+             shift = mpz_make_odd (tv, tv);
+             mpz_mul_2exp (t1, t1, shift);
+             mpz_mul_2exp (s1, s1, shift);
+           }
+         else
+           {
+             mpz_sub (tu, tu, tv);
+             mpz_add (t1, t0, t1);
+             mpz_add (s1, s0, s1);
+
+             shift = mpz_make_odd (tu, tu);
+             mpz_mul_2exp (t0, t0, shift);
+             mpz_mul_2exp (s0, s0, shift);
+           }
+         power += shift;
+       }
+    }
+
+  /* Now tv = odd part of gcd, and -s0 and t0 are corresponding
+     cofactors. */
+
+  mpz_mul_2exp (tv, tv, gz);
+  mpz_neg (s0, s0);
+
+  /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To
+     adjust cofactors, we need u / g and v / g */
+
+  mpz_divexact (s1, v, tv);
+  mpz_abs (s1, s1);
+  mpz_divexact (t1, u, tv);
+  mpz_abs (t1, t1);
+
+  while (power-- > 0)
+    {
+      /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */
+      if (mpz_odd_p (s0) || mpz_odd_p (t0))
+       {
+         mpz_sub (s0, s0, s1);
+         mpz_add (t0, t0, t1);
+       }
+      mpz_divexact_ui (s0, s0, 2);
+      mpz_divexact_ui (t0, t0, 2);
+    }
+
+  /* Arrange so that |s| < |u| / 2g */
+  mpz_add (s1, s0, s1);
+  if (mpz_cmpabs (s0, s1) > 0)
+    {
+      mpz_swap (s0, s1);
+      mpz_sub (t0, t0, t1);
+    }
+  if (u->_mp_size < 0)
+    mpz_neg (s0, s0);
+  if (v->_mp_size < 0)
+    mpz_neg (t0, t0);
+
+  mpz_swap (g, tv);
+  if (s)
+    mpz_swap (s, s0);
+  if (t)
+    mpz_swap (t, t0);
+
+  mpz_clear (tu);
+  mpz_clear (tv);
+  mpz_clear (s0);
+  mpz_clear (s1);
+  mpz_clear (t0);
+  mpz_clear (t1);
+}
+
+void
+mpz_lcm (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mpz_t g;
+
+  if (u->_mp_size == 0 || v->_mp_size == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  mpz_init (g);
+
+  mpz_gcd (g, u, v);
+  mpz_divexact (g, u, g);
+  mpz_mul (r, g, v);
+
+  mpz_clear (g);
+  mpz_abs (r, r);
+}
+
+void
+mpz_lcm_ui (mpz_t r, const mpz_t u, unsigned long v)
+{
+  if (v == 0 || u->_mp_size == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  v /= mpz_gcd_ui (NULL, u, v);
+  mpz_mul_ui (r, u, v);
+
+  mpz_abs (r, r);
+}
+
+int
+mpz_invert (mpz_t r, const mpz_t u, const mpz_t m)
+{
+  mpz_t g, tr;
+  int invertible;
+
+  if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0)
+    return 0;
+
+  mpz_init (g);
+  mpz_init (tr);
+
+  mpz_gcdext (g, tr, NULL, u, m);
+  invertible = (mpz_cmp_ui (g, 1) == 0);
+
+  if (invertible)
+    {
+      if (tr->_mp_size < 0)
+       {
+         if (m->_mp_size >= 0)
+           mpz_add (tr, tr, m);
+         else
+           mpz_sub (tr, tr, m);
+       }
+      mpz_swap (r, tr);
+    }
+
+  mpz_clear (g);
+  mpz_clear (tr);
+  return invertible;
+}
+
+\f
+/* Higher level operations (sqrt, pow and root) */
+
+void
+mpz_pow_ui (mpz_t r, const mpz_t b, unsigned long e)
+{
+  unsigned long bit;
+  mpz_t tr;
+  mpz_init_set_ui (tr, 1);
+
+  for (bit = GMP_ULONG_HIGHBIT; bit > 0; bit >>= 1)
+    {
+      mpz_mul (tr, tr, tr);
+      if (e & bit)
+       mpz_mul (tr, tr, b);
+    }
+  mpz_swap (r, tr);
+  mpz_clear (tr);
+}
+
+void
+mpz_ui_pow_ui (mpz_t r, unsigned long blimb, unsigned long e)
+{
+  mpz_t b;
+  mpz_init_set_ui (b, blimb);
+  mpz_pow_ui (r, b, e);
+  mpz_clear (b);
+}
+
+void
+mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m)
+{
+  mpz_t tr;
+  mpz_t base;
+  mp_size_t en, mn;
+  mp_srcptr mp;
+  struct gmp_div_inverse minv;
+  unsigned shift;
+  mp_ptr tp = NULL;
+
+  en = GMP_ABS (e->_mp_size);
+  mn = GMP_ABS (m->_mp_size);
+  if (mn == 0)
+    gmp_die ("mpz_powm: Zero modulo.");
+
+  if (en == 0)
+    {
+      mpz_set_ui (r, 1);
+      return;
+    }
+
+  mp = m->_mp_d;
+  mpn_div_qr_invert (&minv, mp, mn);
+  shift = minv.shift;
+
+  if (shift > 0)
+    {
+      /* To avoid shifts, we do all our reductions, except the final
+        one, using a *normalized* m. */
+      minv.shift = 0;
+
+      tp = gmp_xalloc_limbs (mn);
+      gmp_assert_nocarry (mpn_lshift (tp, mp, mn, shift));
+      mp = tp;
+    }
+
+  mpz_init (base);
+
+  if (e->_mp_size < 0)
+    {
+      if (!mpz_invert (base, b, m))
+       gmp_die ("mpz_powm: Negative exponent and non-invertibe base.");
+    }
+  else
+    {
+      mp_size_t bn;
+      mpz_abs (base, b);
+
+      bn = base->_mp_size;
+      if (bn >= mn)
+       {
+         mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv);
+         bn = mn;
+       }
+
+      /* We have reduced the absolute value. Now take care of the
+        sign. Note that we get zero represented non-canonically as
+        m. */
+      if (b->_mp_size < 0)
+       {
+         mp_ptr bp = MPZ_REALLOC (base, mn);
+         gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn));
+         bn = mn;
+       }
+      base->_mp_size = mpn_normalized_size (base->_mp_d, bn);
+    }
+  mpz_init_set_ui (tr, 1);
+
+  while (en-- > 0)
+    {
+      mp_limb_t w = e->_mp_d[en];
+      mp_limb_t bit;
+
+      for (bit = GMP_LIMB_HIGHBIT; bit > 0; bit >>= 1)
+       {
+         mpz_mul (tr, tr, tr);
+         if (w & bit)
+           mpz_mul (tr, tr, base);
+         if (tr->_mp_size > mn)
+           {
+             mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
+             tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
+           }
+       }
+    }
+
+  /* Final reduction */
+  if (tr->_mp_size >= mn)
+    {
+      minv.shift = shift;
+      mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
+      tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
+    }
+  if (tp)
+    gmp_free (tp);
+
+  mpz_swap (r, tr);
+  mpz_clear (tr);
+  mpz_clear (base);
+}
+
+void
+mpz_powm_ui (mpz_t r, const mpz_t b, unsigned long elimb, const mpz_t m)
+{
+  mpz_t e;
+  mpz_init_set_ui (e, elimb);
+  mpz_powm (r, b, e, m);
+  mpz_clear (e);
+}
+
+/* x=trunc(y^(1/z)), r=y-x^z */
+void
+mpz_rootrem (mpz_t x, mpz_t r, const mpz_t y, unsigned long z)
+{
+  int sgn;
+  mpz_t t, u;
+
+  sgn = y->_mp_size < 0;
+  if (sgn && (z & 1) == 0)
+    gmp_die ("mpz_rootrem: Negative argument, with even root.");
+  if (z == 0)
+    gmp_die ("mpz_rootrem: Zeroth root.");
+
+  if (mpz_cmpabs_ui (y, 1) <= 0) {
+    mpz_set (x, y);
+    if (r)
+      r->_mp_size = 0;
+    return;
+  }
+
+  mpz_init (t);
+  mpz_init (u);
+  mpz_setbit (t, mpz_sizeinbase (y, 2) / z + 1);
+
+  if (z == 2) /* simplify sqrt loop: z-1 == 1 */
+    do {
+      mpz_swap (u, t);                 /* u = x */
+      mpz_tdiv_q (t, y, u);            /* t = y/x */
+      mpz_add (t, t, u);               /* t = y/x + x */
+      mpz_tdiv_q_2exp (t, t, 1);       /* x'= (y/x + x)/2 */
+    } while (mpz_cmpabs (t, u) < 0);   /* |x'| < |x| */
+  else /* z != 2 */ {
+    mpz_t v;
+
+    mpz_init (v);
+    if (sgn)
+      mpz_neg (t, t);
+
+    do {
+      mpz_swap (u, t);                 /* u = x */
+      mpz_pow_ui (t, u, z - 1);                /* t = x^(z-1) */
+      mpz_tdiv_q (t, y, t);            /* t = y/x^(z-1) */
+      mpz_mul_ui (v, u, z - 1);                /* v = x*(z-1) */
+      mpz_add (t, t, v);               /* t = y/x^(z-1) + x*(z-1) */
+      mpz_tdiv_q_ui (t, t, z);         /* x'=(y/x^(z-1) + x*(z-1))/z */
+    } while (mpz_cmpabs (t, u) < 0);   /* |x'| < |x| */
+
+    mpz_clear (v);
+  }
+
+  if (r) {
+    mpz_pow_ui (t, u, z);
+    mpz_sub (r, y, t);
+  }
+  mpz_swap (x, u);
+  mpz_clear (u);
+  mpz_clear (t);
+}
+
+int
+mpz_root (mpz_t x, const mpz_t y, unsigned long z)
+{
+  int res;
+  mpz_t r;
+
+  mpz_init (r);
+  mpz_rootrem (x, r, y, z);
+  res = r->_mp_size == 0;
+  mpz_clear (r);
+
+  return res;
+}
+
+/* Compute s = floor(sqrt(u)) and r = u - s^2. Allows r == NULL */
+void
+mpz_sqrtrem (mpz_t s, mpz_t r, const mpz_t u)
+{
+  mpz_rootrem (s, r, u, 2);
+}
+
+void
+mpz_sqrt (mpz_t s, const mpz_t u)
+{
+  mpz_rootrem (s, NULL, u, 2);
+}
+
+\f
+/* Combinatorics */
+
+void
+mpz_fac_ui (mpz_t x, unsigned long n)
+{
+  if (n < 2) {
+    mpz_set_ui (x, 1);
+    return;
+  }
+  mpz_set_ui (x, n);
+  for (;--n > 1;)
+    mpz_mul_ui (x, x, n);
+}
+
+void
+mpz_bin_uiui (mpz_t r, unsigned long n, unsigned long k)
+{
+  mpz_t t;
+
+  if (k > n) {
+    r->_mp_size = 0;
+    return;
+  }
+  mpz_fac_ui (r, n);
+  mpz_init (t);
+  mpz_fac_ui (t, k);
+  mpz_divexact (r, r, t);
+  mpz_fac_ui (t, n - k);
+  mpz_divexact (r, r, t);
+  mpz_clear (t);
+}
+
+\f
+/* Logical operations and bit manipulation. */
+
+/* Numbers are treated as if represented in two's complement (and
+   infinitely sign extended). For a negative values we get the two's
+   complement from -x = ~x + 1, where ~ is bitwise complementt.
+   Negation transforms
+
+     xxxx10...0
+
+   into
+
+     yyyy10...0
+
+   where yyyy is the bitwise complement of xxxx. So least significant
+   bits, up to and including the first one bit, are unchanged, and
+   the more significant bits are all complemented.
+
+   To change a bit from zero to one in a negative number, subtract the
+   corresponding power of two from the absolute value. This can never
+   underflow. To change a bit from one to zero, add the corresponding
+   power of two, and this might overflow. E.g., if x = -001111, the
+   two's complement is 110001. Clearing the least significant bit, we
+   get two's complement 110000, and -010000. */
+
+int
+mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index)
+{
+  mp_size_t limb_index;
+  unsigned shift;
+  mp_size_t ds;
+  mp_size_t dn;
+  mp_limb_t w;
+  int bit;
+
+  ds = d->_mp_size;
+  dn = GMP_ABS (ds);
+  limb_index = bit_index / GMP_LIMB_BITS;
+  if (limb_index >= dn)
+    return ds < 0;
+
+  shift = bit_index % GMP_LIMB_BITS;
+  w = d->_mp_d[limb_index];
+  bit = (w >> shift) & 1;
+
+  if (ds < 0)
+    {
+      /* d < 0. Check if any of the bits below is set: If so, our bit
+        must be complemented. */
+      if (shift > 0 && (w << (GMP_LIMB_BITS - shift)) > 0)
+       return bit ^ 1;
+      while (limb_index-- > 0)
+       if (d->_mp_d[limb_index] > 0)
+         return bit ^ 1;
+    }
+  return bit;
+}
+
+static void
+mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dn, limb_index;
+  mp_limb_t bit;
+  mp_ptr dp;
+
+  dn = GMP_ABS (d->_mp_size);
+
+  limb_index = bit_index / GMP_LIMB_BITS;
+  bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
+
+  if (limb_index >= dn)
+    {
+      mp_size_t i;
+      /* The bit should be set outside of the end of the number.
+        We have to increase the size of the number. */
+      dp = MPZ_REALLOC (d, limb_index + 1);
+
+      dp[limb_index] = bit;
+      for (i = dn; i < limb_index; i++)
+       dp[i] = 0;
+      dn = limb_index + 1;
+    }
+  else
+    {
+      mp_limb_t cy;
+
+      dp = d->_mp_d;
+
+      cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit);
+      if (cy > 0)
+       {
+         dp = MPZ_REALLOC (d, dn + 1);
+         dp[dn++] = cy;
+       }
+    }
+
+  d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
+}
+
+static void
+mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  mp_size_t dn, limb_index;
+  mp_ptr dp;
+  mp_limb_t bit;
+
+  dn = GMP_ABS (d->_mp_size);
+  dp = d->_mp_d;
+
+  limb_index = bit_index / GMP_LIMB_BITS;
+  bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
+
+  assert (limb_index < dn);
+
+  gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index,
+                                dn - limb_index, bit));
+  dn -= (dp[dn-1] == 0);
+  d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
+}
+
+void
+mpz_setbit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  if (!mpz_tstbit (d, bit_index))
+    {
+      if (d->_mp_size >= 0)
+       mpz_abs_add_bit (d, bit_index);
+      else
+       mpz_abs_sub_bit (d, bit_index);
+    }
+}
+
+void
+mpz_clrbit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  if (mpz_tstbit (d, bit_index))
+    {
+      if (d->_mp_size >= 0)
+       mpz_abs_sub_bit (d, bit_index);
+      else
+       mpz_abs_add_bit (d, bit_index);
+    }
+}
+
+void
+mpz_combit (mpz_t d, mp_bitcnt_t bit_index)
+{
+  if (mpz_tstbit (d, bit_index) ^ (d->_mp_size < 0))
+    mpz_abs_sub_bit (d, bit_index);
+  else
+    mpz_abs_add_bit (d, bit_index);
+}
+
+void
+mpz_com (mpz_t r, const mpz_t u)
+{
+  mpz_neg (r, u);
+  mpz_sub_ui (r, r, 1);
+}
+
+void
+mpz_and (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, rn, i;
+  mp_ptr up, vp, rp;
+
+  mp_limb_t ux, vx, rx;
+  mp_limb_t uc, vc, rc;
+  mp_limb_t ul, vl, rl;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+  if (un < vn)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (un, vn);
+    }
+  if (vn == 0)
+    {
+      r->_mp_size = 0;
+      return;
+    }
+
+  uc = u->_mp_size < 0;
+  vc = v->_mp_size < 0;
+  rc = uc & vc;
+
+  ux = -uc;
+  vx = -vc;
+  rx = -rc;
+
+  /* If the smaller input is positive, higher limbs don't matter. */
+  rn = vx ? un : vn;
+
+  rp = MPZ_REALLOC (r, rn + rc);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  for (i = 0; i < vn; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ vx) + vc;
+      vc = vl < vc;
+
+      rl = ( (ul & vl) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  assert (vc == 0);
+
+  for (; i < rn; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      rl = ( (ul & vx) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  if (rc)
+    rp[rn++] = rc;
+  else
+    rn = mpn_normalized_size (rp, rn);
+
+  r->_mp_size = rx ? -rn : rn;
+}
+
+void
+mpz_ior (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, rn, i;
+  mp_ptr up, vp, rp;
+
+  mp_limb_t ux, vx, rx;
+  mp_limb_t uc, vc, rc;
+  mp_limb_t ul, vl, rl;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+  if (un < vn)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (un, vn);
+    }
+  if (vn == 0)
+    {
+      mpz_set (r, u);
+      return;
+    }
+
+  uc = u->_mp_size < 0;
+  vc = v->_mp_size < 0;
+  rc = uc | vc;
+
+  ux = -uc;
+  vx = -vc;
+  rx = -rc;
+
+  /* If the smaller input is negative, by sign extension higher limbs
+     don't matter. */
+  rn = vx ? vn : un;
+
+  rp = MPZ_REALLOC (r, rn + rc);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  for (i = 0; i < vn; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ vx) + vc;
+      vc = vl < vc;
+
+      rl = ( (ul | vl) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  assert (vc == 0);
+
+  for (; i < rn; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      rl = ( (ul | vx) ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  if (rc)
+    rp[rn++] = rc;
+  else
+    rn = mpn_normalized_size (rp, rn);
+
+  r->_mp_size = rx ? -rn : rn;
+}
+
+void
+mpz_xor (mpz_t r, const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, i;
+  mp_ptr up, vp, rp;
+
+  mp_limb_t ux, vx, rx;
+  mp_limb_t uc, vc, rc;
+  mp_limb_t ul, vl, rl;
+
+  un = GMP_ABS (u->_mp_size);
+  vn = GMP_ABS (v->_mp_size);
+  if (un < vn)
+    {
+      MPZ_SRCPTR_SWAP (u, v);
+      MP_SIZE_T_SWAP (un, vn);
+    }
+  if (vn == 0)
+    {
+      mpz_set (r, u);
+      return;
+    }
+
+  uc = u->_mp_size < 0;
+  vc = v->_mp_size < 0;
+  rc = uc ^ vc;
+
+  ux = -uc;
+  vx = -vc;
+  rx = -rc;
+
+  rp = MPZ_REALLOC (r, un + rc);
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  for (i = 0; i < vn; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ vx) + vc;
+      vc = vl < vc;
+
+      rl = (ul ^ vl ^ rx) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  assert (vc == 0);
+
+  for (; i < un; i++)
+    {
+      ul = (up[i] ^ ux) + uc;
+      uc = ul < uc;
+
+      rl = (ul ^ ux) + rc;
+      rc = rl < rc;
+      rp[i] = rl;
+    }
+  if (rc)
+    rp[un++] = rc;
+  else
+    un = mpn_normalized_size (rp, un);
+
+  r->_mp_size = rx ? -un : un;
+}
+
+static unsigned
+gmp_popcount_limb (mp_limb_t x)
+{
+  unsigned c;
+
+  /* Do 16 bits at a time, to avoid limb-sized constants. */
+  for (c = 0; x > 0; x >>= 16)
+    {
+      unsigned w = ((x >> 1) & 0x5555) + (x & 0x5555);
+      w = ((w >> 2) & 0x3333) + (w & 0x3333);
+      w = ((w >> 4) & 0x0f0f) + (w & 0x0f0f);
+      w = (w >> 8) + (w & 0x00ff);
+      c += w;
+    }
+  return c;
+}
+
+mp_bitcnt_t
+mpz_popcount (const mpz_t u)
+{
+  mp_size_t un, i;
+  mp_bitcnt_t c;
+
+  un = u->_mp_size;
+
+  if (un < 0)
+    return ~(mp_bitcnt_t) 0;
+
+  for (c = 0, i = 0; i < un; i++)
+    c += gmp_popcount_limb (u->_mp_d[i]);
+
+  return c;
+}
+
+mp_bitcnt_t
+mpz_hamdist (const mpz_t u, const mpz_t v)
+{
+  mp_size_t un, vn, i;
+  mp_limb_t uc, vc, ul, vl, comp;
+  mp_srcptr up, vp;
+  mp_bitcnt_t c;
+
+  un = u->_mp_size;
+  vn = v->_mp_size;
+
+  if ( (un ^ vn) < 0)
+    return ~(mp_bitcnt_t) 0;
+
+  if (un < 0)
+    {
+      assert (vn < 0);
+      un = -un;
+      vn = -vn;
+      uc = vc = 1;
+      comp = - (mp_limb_t) 1;
+    }
+  else
+    uc = vc = comp = 0;
+
+  up = u->_mp_d;
+  vp = v->_mp_d;
+
+  if (un < vn)
+    MPN_SRCPTR_SWAP (up, un, vp, vn);
+
+  for (i = 0, c = 0; i < vn; i++)
+    {
+      ul = (up[i] ^ comp) + uc;
+      uc = ul < uc;
+
+      vl = (vp[i] ^ comp) + vc;
+      vc = vl < vc;
+
+      c += gmp_popcount_limb (ul ^ vl);
+    }
+  assert (vc == 0);
+
+  for (; i < un; i++)
+    {
+      ul = (up[i] ^ comp) + uc;
+      uc = ul < uc;
+
+      c += gmp_popcount_limb (ul ^ comp);
+    }
+
+  return c;
+}
+
+mp_bitcnt_t
+mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit)
+{
+  mp_ptr up;
+  mp_size_t us, un, i;
+  mp_limb_t limb, ux, uc;
+  unsigned cnt;
+
+  up = u->_mp_d;
+  us = u->_mp_size;
+  un = GMP_ABS (us);
+  i = starting_bit / GMP_LIMB_BITS;
+
+  /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit
+     for u<0. Notice this test picks up any u==0 too. */
+  if (i >= un)
+    return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
+
+  if (us < 0)
+    {
+      ux = GMP_LIMB_MAX;
+      uc = mpn_zero_p (up, i);
+    }
+  else
+    ux = uc = 0;
+
+  limb = (ux ^ up[i]) + uc;
+  uc = limb < uc;
+
+  /* Mask to 0 all bits before starting_bit, thus ignoring them. */
+  limb &= (GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS));
+
+  while (limb == 0)
+    {
+      i++;
+      if (i == un)
+       {
+         assert (uc == 0);
+         /* For the u > 0 case, this can happen only for the first
+            masked limb. For the u < 0 case, it happens when the
+            highest limbs of the absolute value are all ones. */
+         return (us >= 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS);
+       }
+      limb = (ux ^ up[i]) + uc;
+      uc = limb < uc;
+    }
+  gmp_ctz (cnt, limb);
+  return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
+}
+
+mp_bitcnt_t
+mpz_scan0 (const mpz_t u, mp_bitcnt_t starting_bit)
+{
+  mp_ptr up;
+  mp_size_t us, un, i;
+  mp_limb_t limb, ux, uc;
+  unsigned cnt;
+
+  up = u->_mp_d;
+  us = u->_mp_size;
+  un = GMP_ABS (us);
+  i = starting_bit / GMP_LIMB_BITS;
+
+  /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for
+     u<0.  Notice this test picks up all cases of u==0 too. */
+  if (i >= un)
+    return (us >= 0 ? starting_bit : ~(mp_bitcnt_t) 0);
+
+  if (us < 0)
+    {
+      ux = GMP_LIMB_MAX;
+      uc = mpn_zero_p (up, i);
+    }
+  else
+    ux = uc = 0;
+
+  limb = (ux ^ up[i]) + uc;
+  uc = limb < uc;
+
+  /* Mask to 1 all bits before starting_bit, thus ignoring them. */
+  limb |= ((mp_limb_t) 1 << (starting_bit % GMP_LIMB_BITS)) - 1;
+
+  while (limb == GMP_LIMB_MAX)
+    {
+      i++;
+      if (i == un)
+       {
+         assert (uc == 0);
+         return (us >= 0 ? un * GMP_LIMB_BITS : ~(mp_bitcnt_t) 0);
+       }
+      limb = (ux ^ up[i]) + uc;
+      uc = limb < uc;
+    }
+  gmp_ctz (cnt, ~limb);
+  return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
+}
+
+\f
+/* MPZ base conversion. */
+
+size_t
+mpz_sizeinbase (const mpz_t u, int base)
+{
+  mp_size_t un;
+  mp_srcptr up;
+  mp_ptr tp;
+  mp_bitcnt_t bits;
+  struct gmp_div_inverse bi;
+  size_t ndigits;
+
+  assert (base >= 2);
+  assert (base <= 36);
+
+  un = GMP_ABS (u->_mp_size);
+  if (un == 0)
+    return 1;
+
+  up = u->_mp_d;
+
+  bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]);
+  switch (base)
+    {
+    case 2:
+      return bits;
+    case 4:
+      return (bits + 1) / 2;
+    case 8:
+      return (bits + 2) / 3;
+    case 16:
+      return (bits + 3) / 4;
+    case 32:
+      return (bits + 4) / 5;
+      /* FIXME: Do something more clever for the common case of base
+        10. */
+    }
+
+  tp = gmp_xalloc_limbs (un);
+  mpn_copyi (tp, up, un);
+  mpn_div_qr_1_invert (&bi, base);
+
+  for (ndigits = 0; un > 0; ndigits++)
+    {
+      mpn_div_qr_1_preinv (tp, tp, un, &bi);
+      un -= (tp[un-1] == 0);
+    }
+  gmp_free (tp);
+  return ndigits;
+}
+
+char *
+mpz_get_str (char *sp, int base, const mpz_t u)
+{
+  unsigned bits;
+  const char *digits;
+  mp_size_t un;
+  size_t i, sn;
+
+  if (base >= 0)
+    {
+      digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+    }
+  else
+    {
+      base = -base;
+      digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+    }
+  if (base <= 1)
+    base = 10;
+  if (base > 36)
+    return NULL;
+
+  sn = 1 + mpz_sizeinbase (u, base);
+  if (!sp)
+    sp = gmp_xalloc (1 + sn);
+
+  un = GMP_ABS (u->_mp_size);
+
+  if (un == 0)
+    {
+      sp[0] = '0';
+      sp[1] = '\0';
+      return sp;
+    }
+
+  i = 0;
+
+  if (u->_mp_size < 0)
+    sp[i++] = '-';
+
+  bits = mpn_base_power_of_two_p (base);
+
+  if (bits)
+    /* Not modified in this case. */
+    sn = i + mpn_get_str_bits ((unsigned char *) sp + i, bits, u->_mp_d, un);
+  else
+    {
+      struct mpn_base_info info;
+      mp_ptr tp;
+
+      mpn_get_base_info (&info, base);
+      tp = gmp_xalloc_limbs (un);
+      mpn_copyi (tp, u->_mp_d, un);
+
+      sn = i + mpn_get_str_other ((unsigned char *) sp + i, base, &info, tp, un);
+      gmp_free (tp);
+    }
+
+  for (; i < sn; i++)
+    sp[i] = digits[(unsigned char) sp[i]];
+
+  sp[sn] = '\0';
+  return sp;
+}
+
+int
+mpz_set_str (mpz_t r, const char *sp, int base)
+{
+  unsigned bits;
+  mp_size_t rn, alloc;
+  mp_ptr rp;
+  size_t sn;
+  size_t dn;
+  int sign;
+  unsigned char *dp;
+
+  assert (base == 0 || (base >= 2 && base <= 36));
+
+  while (isspace( (unsigned char) *sp))
+    sp++;
+
+  if (*sp == '-')
+    {
+      sign = 1;
+      sp++;
+    }
+  else
+    sign = 0;
+
+  if (base == 0)
+    {
+      if (*sp == '0')
+       {
+         sp++;
+         if (*sp == 'x' || *sp == 'X')
+           {
+             base = 16;
+             sp++;
+           }
+         else if (*sp == 'b' || *sp == 'B')
+           {
+             base = 2;
+             sp++;
+           }
+         else
+           base = 8;
+       }
+      else
+       base = 10;
+    }
+
+  sn = strlen (sp);
+  dp = gmp_xalloc (sn + (sn == 0));
+
+  for (dn = 0; *sp; sp++)
+    {
+      unsigned digit;
+
+      if (isspace ((unsigned char) *sp))
+       continue;
+      if (*sp >= '0' && *sp <= '9')
+       digit = *sp - '0';
+      else if (*sp >= 'a' && *sp <= 'z')
+       digit = *sp - 'a' + 10;
+      else if (*sp >= 'A' && *sp <= 'Z')
+       digit = *sp - 'A' + 10;
+      else
+       digit = base; /* fail */
+
+      if (digit >= base)
+       {
+         gmp_free (dp);
+         r->_mp_size = 0;
+         return -1;
+       }
+
+      dp[dn++] = digit;
+    }
+
+  bits = mpn_base_power_of_two_p (base);
+
+  if (bits > 0)
+    {
+      alloc = (sn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
+      rp = MPZ_REALLOC (r, alloc);
+      rn = mpn_set_str_bits (rp, dp, dn, bits);
+    }
+  else
+    {
+      struct mpn_base_info info;
+      mpn_get_base_info (&info, base);
+      alloc = (sn + info.exp - 1) / info.exp;
+      rp = MPZ_REALLOC (r, alloc);
+      rn = mpn_set_str_other (rp, dp, dn, base, &info);
+    }
+  assert (rn <= alloc);
+  gmp_free (dp);
+
+  r->_mp_size = sign ? - rn : rn;
+
+  return 0;
+}
+
+int
+mpz_init_set_str (mpz_t r, const char *sp, int base)
+{
+  mpz_init (r);
+  return mpz_set_str (r, sp, base);
+}
+
+size_t
+mpz_out_str (FILE *stream, int base, const mpz_t x)
+{
+  char *str;
+  size_t len;
+
+  str = mpz_get_str (NULL, base, x);
+  len = strlen (str);
+  len = fwrite (str, 1, len, stream);
+  gmp_free (str);
+  return len;
+}
+
+\f
+static int
+gmp_detect_endian (void)
+{
+  static const int i = 1;
+  const unsigned char *p = (const unsigned char *) &i;
+  if (*p == 1)
+    /* Little endian */
+    return -1;
+  else
+    /* Big endian */
+    return 1;
+}
+
+/* Import and export. Does not support nails. */
+void
+mpz_import (mpz_t r, size_t count, int order, size_t size, int endian,
+           size_t nails, const void *src)
+{
+  const unsigned char *p;
+  ptrdiff_t word_step;
+  mp_ptr rp;
+  mp_size_t rn;
+
+  /* The current (partial) limb. */
+  mp_limb_t limb;
+  /* The number of bytes already copied to this limb (starting from
+     the low end). */
+  size_t bytes;
+  /* The index where the limb should be stored, when completed. */
+  mp_size_t i;
+
+  if (nails != 0)
+    gmp_die ("mpz_import: Nails not supported.");
+
+  assert (order == 1 || order == -1);
+  assert (endian >= -1 && endian <= 1);
+
+  if (endian == 0)
+    endian = gmp_detect_endian ();
+
+  p = (unsigned char *) src;
+
+  word_step = (order != endian) ? 2 * size : 0;
+
+  /* Process bytes from the least significant end, so point p at the
+     least significant word. */
+  if (order == 1)
+    {
+      p += size * (count - 1);
+      word_step = - word_step;
+    }
+
+  /* And at least significant byte of that word. */
+  if (endian == 1)
+    p += (size - 1);
+
+  rn = (size * count + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t);
+  rp = MPZ_REALLOC (r, rn);
+
+  for (limb = 0, bytes = 0, i = 0; count > 0; count--, p += word_step)
+    {
+      size_t j;
+      for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
+       {
+         limb |= (mp_limb_t) *p << (bytes++ * CHAR_BIT);
+         if (bytes == sizeof(mp_limb_t))
+           {
+             rp[i++] = limb;
+             bytes = 0;
+             limb = 0;
+           }
+       }
+    }
+  if (bytes > 0)
+    rp[i++] = limb;
+  assert (i == rn);
+
+  r->_mp_size = mpn_normalized_size (rp, i);
+}
+
+void *
+mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
+           size_t nails, const mpz_t u)
+{
+  unsigned char *p;
+  ptrdiff_t word_step;
+  size_t count, k;
+  mp_size_t un;
+
+  /* The current (partial) limb. */
+  mp_limb_t limb;
+  /* The number of bytes left to to in this limb. */
+  size_t bytes;
+  /* The index where the limb was read. */
+  mp_size_t i;
+
+  if (nails != 0)
+    gmp_die ("mpz_import: Nails not supported.");
+
+  assert (order == 1 || order == -1);
+  assert (endian >= -1 && endian <= 1);
+  assert (size > 0 || u->_mp_size == 0);
+
+  un = GMP_ABS (u->_mp_size);
+  if (un == 0)
+    {
+      if (countp)
+       *countp = 0;
+      return r;
+    }
+
+  /* Count bytes in top limb. */
+  for (limb = u->_mp_d[un-1], k = 0; limb > 0; k++, limb >>= CHAR_BIT)
+    ;
+
+  assert (k > 0);
+
+  count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size;
+
+  if (!r)
+    r = gmp_xalloc (count * size);
+
+  if (endian == 0)
+    endian = gmp_detect_endian ();
+
+  p = (unsigned char *) r;
+
+  word_step = (order != endian) ? 2 * size : 0;
+
+  /* Process bytes from the least significant end, so point p at the
+     least significant word. */
+  if (order == 1)
+    {
+      p += size * (count - 1);
+      word_step = - word_step;
+    }
+
+  /* And at least significant byte of that word. */
+  if (endian == 1)
+    p += (size - 1);
+
+  for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step)
+      {
+       size_t j;
+       for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
+         {
+           if (bytes == 0)
+             {
+               if (i < un)
+                 limb = u->_mp_d[i++];
+               bytes = sizeof (mp_limb_t);
+             }
+           *p = limb;
+           limb >>= CHAR_BIT;
+           bytes--;
+         }
+      }
+  assert (i == un);
+  assert (k == count);
+
+  if (countp)
+    *countp = count;
+
+  return r;
+}
diff --git a/mini-gmp/mini-gmp.h b/mini-gmp/mini-gmp.h

new file mode 100644 (file)

index 0000000..8c94ca2
--- /dev/null
+++ b/mini-gmp/mini-gmp.h
@@ -0,0 +1,256 @@
+/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
+
+Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* About mini-gmp: This is a minimal implementation of a subset of the
+   GMP interface. It is intended for inclusion into applications which
+   have modest bignums needs, as a fallback when the real GMP library
+   is not installed.
+
+   This file defines the public interface. */
+
+#ifndef __MINI_GMP_H__
+#define __MINI_GMP_H__
+
+/* For size_t */
+#include <stddef.h>
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+void mp_set_memory_functions (void *(*) (size_t),
+                             void *(*) (void *, size_t, size_t),
+                             void (*) (void *, size_t));
+
+void mp_get_memory_functions (void *(**) (size_t),
+                             void *(**) (void *, size_t, size_t),
+                             void (**) (void *, size_t));
+
+typedef unsigned long mp_limb_t;
+typedef long mp_size_t;
+typedef unsigned long mp_bitcnt_t;
+
+typedef mp_limb_t *mp_ptr;
+typedef const mp_limb_t *mp_srcptr;
+
+typedef struct
+{
+  int _mp_alloc;               /* Number of *limbs* allocated and pointed
+                                  to by the _mp_d field.  */
+  int _mp_size;                        /* abs(_mp_size) is the number of limbs the
+                                  last field points to.  If _mp_size is
+                                  negative this is a negative number.  */
+  mp_limb_t *_mp_d;            /* Pointer to the limbs.  */
+} __mpz_struct;
+
+typedef __mpz_struct mpz_t[1];
+
+typedef __mpz_struct *mpz_ptr;
+typedef const __mpz_struct *mpz_srcptr;
+
+void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
+void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
+
+int mpn_cmp (mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t mpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t mpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+
+mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t);
+#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0)
+
+size_t mpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
+mp_size_t mpn_set_str (mp_ptr, const unsigned char *, size_t, int);
+
+void mpz_init (mpz_t);
+void mpz_init2 (mpz_t, mp_bitcnt_t);
+void mpz_clear (mpz_t);
+
+#define mpz_odd_p(z)   (((z)->_mp_size != 0) & (int) (z)->_mp_d[0])
+#define mpz_even_p(z)  (! mpz_odd_p (z))
+
+int mpz_sgn (const mpz_t);
+int mpz_cmp_si (const mpz_t, long);
+int mpz_cmp_ui (const mpz_t, unsigned long);
+int mpz_cmp (const mpz_t, const mpz_t);
+int mpz_cmpabs_ui (const mpz_t, unsigned long);
+int mpz_cmpabs (const mpz_t, const mpz_t);
+int mpz_cmp_d (const mpz_t, double);
+int mpz_cmpabs_d (const mpz_t, double);
+
+void mpz_abs (mpz_t, const mpz_t);
+void mpz_neg (mpz_t, const mpz_t);
+void mpz_swap (mpz_t, mpz_t);
+
+void mpz_add_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_add (mpz_t, const mpz_t, const mpz_t);
+void mpz_sub_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_ui_sub (mpz_t, unsigned long, const mpz_t);
+void mpz_sub (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_mul_si (mpz_t, const mpz_t, long int);
+void mpz_mul_ui (mpz_t, const mpz_t, unsigned long int);
+void mpz_mul (mpz_t, const mpz_t, const mpz_t);
+void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+
+void mpz_cdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_cdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_cdiv_r (mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_r (mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_r (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_cdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_fdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_tdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_cdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_fdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_tdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+
+void mpz_mod (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_divexact (mpz_t, const mpz_t, const mpz_t);
+
+int mpz_divisible_p (const mpz_t, const mpz_t);
+
+unsigned long mpz_cdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_ui (const mpz_t, unsigned long);
+unsigned long mpz_fdiv_ui (const mpz_t, unsigned long);
+unsigned long mpz_tdiv_ui (const mpz_t, unsigned long);
+
+unsigned long mpz_mod_ui (mpz_t, const mpz_t, unsigned long);
+
+void mpz_divexact_ui (mpz_t, const mpz_t, unsigned long);
+
+int mpz_divisible_ui_p (const mpz_t, unsigned long);
+
+unsigned long mpz_gcd_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_gcd (mpz_t, const mpz_t, const mpz_t);
+void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_lcm_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_lcm (mpz_t, const mpz_t, const mpz_t);
+int mpz_invert (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_sqrtrem (mpz_t, mpz_t, const mpz_t);
+void mpz_sqrt (mpz_t, const mpz_t);
+
+void mpz_pow_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_ui_pow_ui (mpz_t, unsigned long, unsigned long);
+void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t);
+void mpz_powm_ui (mpz_t, const mpz_t, unsigned long, const mpz_t);
+
+void mpz_rootrem (mpz_t, mpz_t, const mpz_t, unsigned long);
+int mpz_root (mpz_t, const mpz_t, unsigned long);
+
+void mpz_fac_ui (mpz_t, unsigned long);
+void mpz_bin_uiui (mpz_t, unsigned long, unsigned long);
+
+int mpz_tstbit (const mpz_t, mp_bitcnt_t);
+void mpz_setbit (mpz_t, mp_bitcnt_t);
+void mpz_clrbit (mpz_t, mp_bitcnt_t);
+void mpz_combit (mpz_t, mp_bitcnt_t);
+
+void mpz_com (mpz_t, const mpz_t);
+void mpz_and (mpz_t, const mpz_t, const mpz_t);
+void mpz_ior (mpz_t, const mpz_t, const mpz_t);
+void mpz_xor (mpz_t, const mpz_t, const mpz_t);
+
+mp_bitcnt_t mpz_popcount (const mpz_t);
+mp_bitcnt_t mpz_hamdist (const mpz_t, const mpz_t);
+mp_bitcnt_t mpz_scan0 (const mpz_t, mp_bitcnt_t);
+mp_bitcnt_t mpz_scan1 (const mpz_t, mp_bitcnt_t);
+
+int mpz_fits_slong_p (const mpz_t);
+int mpz_fits_ulong_p (const mpz_t);
+long int mpz_get_si (const mpz_t);
+unsigned long int mpz_get_ui (const mpz_t);
+double mpz_get_d (const mpz_t);
+size_t mpz_size (const mpz_t);
+mp_limb_t mpz_getlimbn (const mpz_t, mp_size_t);
+
+void mpz_set_si (mpz_t, signed long int);
+void mpz_set_ui (mpz_t, unsigned long int);
+void mpz_set (mpz_t, const mpz_t);
+void mpz_set_d (mpz_t, double);
+
+void mpz_init_set_si (mpz_t, signed long int);
+void mpz_init_set_ui (mpz_t, unsigned long int);
+void mpz_init_set (mpz_t, const mpz_t);
+void mpz_init_set_d (mpz_t, double);
+
+size_t mpz_sizeinbase (const mpz_t, int);
+char *mpz_get_str (char *, int, const mpz_t);
+int mpz_set_str (mpz_t, const char *, int);
+int mpz_init_set_str (mpz_t, const char *, int);
+
+/* This long list taken from gmp.h. */
+/* For reference, "defined(EOF)" cannot be used here.  In g++ 2.95.4,
+   <iostream> defines EOF but not FILE.  */
+#if defined (FILE)                                              \
+  || defined (H_STDIO)                                          \
+  || defined (_H_STDIO)               /* AIX */                 \
+  || defined (_STDIO_H)               /* glibc, Sun, SCO */     \
+  || defined (_STDIO_H_)              /* BSD, OSF */            \
+  || defined (__STDIO_H)              /* Borland */             \
+  || defined (__STDIO_H__)            /* IRIX */                \
+  || defined (_STDIO_INCLUDED)        /* HPUX */                \
+  || defined (__dj_include_stdio_h_)  /* DJGPP */               \
+  || defined (_FILE_DEFINED)          /* Microsoft */           \
+  || defined (__STDIO__)              /* Apple MPW MrC */       \
+  || defined (_MSL_STDIO_H)           /* Metrowerks */          \
+  || defined (_STDIO_H_INCLUDED)      /* QNX4 */               \
+  || defined (_ISO_STDIO_ISO_H)       /* Sun C++ */            \
+  || defined (__STDIO_LOADED)         /* VMS */
+size_t mpz_out_str (FILE *, int, const mpz_t);
+#endif
+
+void mpz_import (mpz_t, size_t, int, size_t, int, size_t, const void *);
+void *mpz_export (void *, size_t *, int, size_t, int, size_t, const mpz_t);
+
+#if defined (__cplusplus)
+}
+#endif
+#endif /* __MINI_GMP_H__ */
diff --git a/mini-gmp/tests/Makefile b/mini-gmp/tests/Makefile

new file mode 100644 (file)

index 0000000..2476f28
--- /dev/null
+++ b/mini-gmp/tests/Makefile
@@ -0,0 +1,60 @@
+# Note: Requires GNU make
+
+# Copyright 2011, 2012 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library test suite.
+#
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+#
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
+
+srcdir=.
+MINI_GMP_DIR=..
+
+CC = gcc
+EXTRA_CFLAGS = -O -Wall -g
+CFLAGS = $(EXTRA_CFLAGS) -I$(MINI_GMP_DIR)
+LDFLAGS =
+
+LIBS = -lgmp -lm -lmcheck
+
+CHECK_PROGRAMS = t-add t-sub t-mul t-invert t-div t-div_2exp \
+       t-double t-cmp_d t-gcd t-lcm t-import t-comb t-signed \
+       t-sqrt t-root t-powm t-logops t-bitops t-scan t-str \
+       t-reuse
+
+MISC_OBJS = hex-random.o mini-random.o testutils.o
+
+all:
+
+clean:
+       rm -f *.o $(CHECK_PROGRAMS)
+
+%: %.c
+.c:
+
+# Keep object files
+.PRECIOUS: %.o
+
+%.o: %.c $(MINI_GMP_DIR)/mini-gmp.h hex-random.h mini-random.h
+       $(CC) $(CFLAGS) -c $< -o $@
+
+testutils.o: $(MINI_GMP_DIR)/mini-gmp.c
+
+%: %.o $(MISC_OBJS)
+       $(CC) $(LDFLAGS) $^ $(LIBS) -o $@
+
+# Missing tests include:
+#   mpz_cmp_d, mpz_popcount, mpz_hamdist, mpz_ui_pow_ui
+
+check: $(CHECK_PROGRAMS)
+       $(srcdir)/run-tests $(CHECK_PROGRAMS)
diff --git a/mini-gmp/tests/hex-random.c b/mini-gmp/tests/hex-random.c

new file mode 100644 (file)

index 0000000..53b4460
--- /dev/null
+++ b/mini-gmp/tests/hex-random.c
@@ -0,0 +1,434 @@
+/*
+
+Copyright 2011, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <time.h>
+#include <unistd.h>
+
+#include "gmp.h"
+
+#include "hex-random.h"
+
+static gmp_randstate_t state;
+
+void
+hex_random_init (void)
+{
+  unsigned long seed;
+  char *env_seed;
+
+  env_seed = getenv("GMP_CHECK_RANDOMIZE");
+  if (env_seed && env_seed[0])
+    {
+      seed = strtoul (env_seed, NULL, 0);
+      if (seed)
+       printf ("Re-seeding with GMP_CHECK_RANDOMIZE=%lu\n", seed);
+      else
+       {
+         seed = time(NULL) + getpid();
+         printf ("Seed GMP_CHECK_RANDOMIZE=%lu (include this in bug reports)\n", seed);
+       }
+    }
+  else
+    seed = 4711;
+
+  gmp_randinit_default (state);
+  gmp_randseed_ui (state, seed);
+}
+
+char *
+hex_urandomb (unsigned long bits)
+{
+  char *res;
+  mpz_t x;
+
+  mpz_init (x);
+  mpz_urandomb (x, state, bits);
+  gmp_asprintf (&res, "%Zx", x);
+  mpz_clear (x);
+  return res;
+}
+
+char *
+hex_rrandomb (unsigned long bits)
+{
+  char *res;
+  mpz_t x;
+
+  mpz_init (x);
+  mpz_rrandomb (x, state, bits);
+  gmp_asprintf (&res, "%Zx", x);
+  mpz_clear (x);
+  return res;
+}
+
+char *
+hex_rrandomb_export (void *dst, size_t *countp,
+                    int order, size_t size, int endian, unsigned long bits)
+{
+  char *res;
+  mpz_t x;
+  mpz_init (x);
+  mpz_rrandomb (x, state, bits);
+  gmp_asprintf (&res, "%Zx", x);
+  mpz_export (dst, countp, order, size, endian, 0, x);
+  mpz_clear (x);
+  return res;
+}
+
+void hex_random_op2 (enum hex_random_op op,  unsigned long maxbits,
+                    char **ap, char **rp)
+{
+  mpz_t a, r;
+  unsigned long abits;
+  unsigned signs;
+
+  mpz_init (a);
+  mpz_init (r);
+
+  abits = gmp_urandomb_ui (state, 32) % maxbits;
+
+  mpz_rrandomb (a, state, abits);
+
+  signs = gmp_urandomb_ui (state, 1);
+  if (signs & 1)
+    mpz_neg (a, a);
+
+  switch (op)
+    {
+    default:
+      abort ();
+    case OP_SQR:
+      mpz_mul (r, a, a);
+      break;
+    }
+
+  gmp_asprintf (ap, "%Zx", a);
+  gmp_asprintf (rp, "%Zx", r);
+
+  mpz_clear (a);
+  mpz_clear (r);
+}
+
+void
+hex_random_op3 (enum hex_random_op op,  unsigned long maxbits,
+               char **ap, char **bp, char **rp)
+{
+  mpz_t a, b, r;
+  unsigned long abits, bbits;
+  unsigned signs;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (r);
+
+  abits = gmp_urandomb_ui (state, 32) % maxbits;
+  bbits = gmp_urandomb_ui (state, 32) % maxbits;
+
+  mpz_rrandomb (a, state, abits);
+  mpz_rrandomb (b, state, bbits);
+
+  signs = gmp_urandomb_ui (state, 3);
+  if (signs & 1)
+    mpz_neg (a, a);
+  if (signs & 2)
+    mpz_neg (b, b);
+
+  switch (op)
+    {
+    default:
+      abort ();
+    case OP_ADD:
+      mpz_add (r, a, b);
+      break;
+    case OP_SUB:
+      mpz_sub (r, a, b);
+      break;
+    case OP_MUL:
+      mpz_mul (r, a, b);
+      break;
+    case OP_GCD:
+      if (signs & 4)
+       {
+         /* Produce a large gcd */
+         unsigned long gbits = gmp_urandomb_ui (state, 32) % maxbits;
+         mpz_rrandomb (r, state, gbits);
+         mpz_mul (a, a, r);
+         mpz_mul (b, b, r);
+       }
+      mpz_gcd (r, a, b);
+      break;
+    case OP_LCM:
+      if (signs & 4)
+       {
+         /* Produce a large gcd */
+         unsigned long gbits = gmp_urandomb_ui (state, 32) % maxbits;
+         mpz_rrandomb (r, state, gbits);
+         mpz_mul (a, a, r);
+         mpz_mul (b, b, r);
+       }
+      mpz_lcm (r, a, b);
+      break;
+    case OP_AND:
+      mpz_and (r, a, b);
+      break;
+    case OP_IOR:
+      mpz_ior (r, a, b);
+      break;
+    case OP_XOR:
+      mpz_xor (r, a, b);
+      break;
+    }
+
+  gmp_asprintf (ap, "%Zx", a);
+  gmp_asprintf (bp, "%Zx", b);
+  gmp_asprintf (rp, "%Zx", r);
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (r);
+}
+
+void
+hex_random_op4 (enum hex_random_op op, unsigned long maxbits,
+               char **ap, char **bp, char **cp, char **dp)
+{
+  mpz_t a, b, c, d;
+  unsigned long abits, bbits;
+  unsigned signs;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (c);
+  mpz_init (d);
+
+  if (op == OP_POWM)
+    {
+      unsigned long cbits;
+      abits = gmp_urandomb_ui (state, 32) % maxbits;
+      bbits = 1 + gmp_urandomb_ui (state, 32) % maxbits;
+      cbits = 2 + gmp_urandomb_ui (state, 32) % maxbits;
+
+      mpz_rrandomb (a, state, abits);
+      mpz_rrandomb (b, state, bbits);
+      mpz_rrandomb (c, state, cbits);
+
+      signs = gmp_urandomb_ui (state, 3);
+      if (signs & 1)
+       mpz_neg (a, a);
+      if (signs & 2)
+       {
+         mpz_t g;
+
+         /* If we negate the exponent, must make sure that gcd(a, c) = 1 */
+         if (mpz_sgn (a) == 0)
+           mpz_set_ui (a, 1);
+         else
+           {
+             mpz_init (g);
+
+             for (;;)
+               {
+                 mpz_gcd (g, a, c);
+                 if (mpz_cmp_ui (g, 1) == 0)
+                   break;
+                 mpz_divexact (a, a, g);
+               }
+             mpz_clear (g);
+           }
+         mpz_neg (b, b);
+       }
+      if (signs & 4)
+       mpz_neg (c, c);
+
+      mpz_powm (d, a, b, c);
+    }
+  else
+    {
+      unsigned long qbits;
+      bbits = 1 + gmp_urandomb_ui (state, 32) % maxbits;
+      qbits = gmp_urandomb_ui (state, 32) % maxbits;
+      abits = bbits + qbits;
+      if (abits > 30)
+       abits -= 30;
+      else
+       abits = 0;
+
+      mpz_rrandomb (a, state, abits);
+      mpz_rrandomb (b, state, bbits);
+
+      signs = gmp_urandomb_ui (state, 2);
+      if (signs & 1)
+       mpz_neg (a, a);
+      if (signs & 2)
+       mpz_neg (b, b);
+
+      switch (op)
+       {
+       default:
+         abort ();
+       case OP_CDIV:
+         mpz_cdiv_qr (c, d, a, b);
+         break;
+       case OP_FDIV:
+         mpz_fdiv_qr (c, d, a, b);
+         break;
+       case OP_TDIV:
+         mpz_tdiv_qr (c, d, a, b);
+         break;
+       }
+    }
+  gmp_asprintf (ap, "%Zx", a);
+  gmp_asprintf (bp, "%Zx", b);
+  gmp_asprintf (cp, "%Zx", c);
+  gmp_asprintf (dp, "%Zx", d);
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (c);
+  mpz_clear (d);
+}
+
+void
+hex_random_bit_op (enum hex_random_op op, unsigned long maxbits,
+                  char **ap, unsigned long *b, char **rp)
+{
+  mpz_t a, r;
+  unsigned long abits, bbits;
+  unsigned signs;
+
+  mpz_init (a);
+  mpz_init (r);
+
+  abits = gmp_urandomb_ui (state, 32) % maxbits;
+  bbits = gmp_urandomb_ui (state, 32) % (maxbits + 100);
+
+  mpz_rrandomb (a, state, abits);
+
+  signs = gmp_urandomb_ui (state, 1);
+  if (signs & 1)
+    mpz_neg (a, a);
+
+  switch (op)
+    {
+    default:
+      abort ();
+
+    case OP_SETBIT:
+      mpz_set (r, a);
+      mpz_setbit (r, bbits);
+      break;
+    case OP_CLRBIT:
+      mpz_set (r, a);
+      mpz_clrbit (r, bbits);
+      break;
+    case OP_COMBIT:
+      mpz_set (r, a);
+      mpz_combit (r, bbits);
+      break;
+    case OP_CDIV_Q_2:
+      mpz_cdiv_q_2exp (r, a, bbits);
+      break;
+    case OP_CDIV_R_2:
+      mpz_cdiv_r_2exp (r, a, bbits);
+      break;
+    case OP_FDIV_Q_2:
+      mpz_fdiv_q_2exp (r, a, bbits);
+      break;
+    case OP_FDIV_R_2:
+      mpz_fdiv_r_2exp (r, a, bbits);
+      break;
+    case OP_TDIV_Q_2:
+      mpz_tdiv_q_2exp (r, a, bbits);
+      break;
+    case OP_TDIV_R_2:
+      mpz_tdiv_r_2exp (r, a, bbits);
+      break;
+    }
+
+  gmp_asprintf (ap, "%Zx", a);
+  *b = bbits;
+  gmp_asprintf (rp, "%Zx", r);
+
+  mpz_clear (a);
+  mpz_clear (r);
+}
+
+void
+hex_random_scan_op (enum hex_random_op op, unsigned long maxbits,
+                   char **ap, unsigned long *b, unsigned long *r)
+{
+  mpz_t a;
+  unsigned long abits, bbits;
+  unsigned signs;
+
+  mpz_init (a);
+
+  abits = gmp_urandomb_ui (state, 32) % maxbits;
+  bbits = gmp_urandomb_ui (state, 32) % (maxbits + 100);
+
+  mpz_rrandomb (a, state, abits);
+
+  signs = gmp_urandomb_ui (state, 1);
+  if (signs & 1)
+    mpz_neg (a, a);
+
+  switch (op)
+    {
+    default:
+      abort ();
+
+    case OP_SCAN0:
+      *r = mpz_scan0 (a, bbits);
+      break;
+    case OP_SCAN1:
+      *r = mpz_scan1 (a, bbits);
+      break;
+    }
+  gmp_asprintf (ap, "%Zx", a);
+  *b = bbits;
+
+  mpz_clear (a);
+}
+
+void
+hex_random_str_op (unsigned long maxbits,
+                  int base, char **ap, char **rp)
+{
+  mpz_t a;
+  unsigned long abits;
+  unsigned signs;
+
+  mpz_init (a);
+
+  abits = gmp_urandomb_ui (state, 32) % maxbits;
+
+  mpz_rrandomb (a, state, abits);
+
+  signs = gmp_urandomb_ui (state, 2);
+  if (signs & 1)
+    mpz_neg (a, a);
+
+  *ap = mpz_get_str (NULL, 16, a);
+  *rp = mpz_get_str (NULL, base, a);
+
+  mpz_clear (a);
+}
diff --git a/mini-gmp/tests/hex-random.h b/mini-gmp/tests/hex-random.h

new file mode 100644 (file)

index 0000000..996bc4b
--- /dev/null
+++ b/mini-gmp/tests/hex-random.h
@@ -0,0 +1,50 @@
+/*
+
+Copyright 2011, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+enum hex_random_op
+  {
+    OP_ADD, OP_SUB, OP_MUL, OP_SQR,
+    OP_CDIV, OP_FDIV, OP_TDIV,
+    OP_CDIV_Q_2, OP_CDIV_R_2,
+    OP_FDIV_Q_2, OP_FDIV_R_2,
+    OP_TDIV_Q_2,  OP_TDIV_R_2,
+    OP_GCD, OP_LCM, OP_POWM, OP_AND, OP_IOR, OP_XOR,
+    OP_SETBIT, OP_CLRBIT, OP_COMBIT,
+    OP_SCAN0, OP_SCAN1,
+  };
+
+void hex_random_init (void);
+char *hex_urandomb (unsigned long bits);
+char *hex_rrandomb (unsigned long bits);
+char *hex_rrandomb_export (void *dst, size_t *countp,
+                          int order, size_t size, int endian,
+                          unsigned long bits);
+
+void hex_random_op2 (enum hex_random_op op,  unsigned long maxbits,
+                    char **ap, char **rp);
+void hex_random_op3 (enum hex_random_op op,  unsigned long maxbits,
+                    char **ap, char **bp, char **rp);
+void hex_random_op4 (enum hex_random_op op,  unsigned long maxbits,
+                    char **ap, char **bp, char **rp, char **qp);
+void hex_random_bit_op (enum hex_random_op op, unsigned long maxbits,
+                       char **ap, unsigned long *b, char **rp);
+void hex_random_scan_op (enum hex_random_op op, unsigned long maxbits,
+                       char **ap, unsigned long *b, unsigned long *r);
+void hex_random_str_op (unsigned long maxbits,
+                       int base, char **ap, char **rp);
diff --git a/mini-gmp/tests/mini-random.c b/mini-gmp/tests/mini-random.c

new file mode 100644 (file)

index 0000000..87a9cea
--- /dev/null
+++ b/mini-gmp/tests/mini-random.c
@@ -0,0 +1,142 @@
+/*
+
+Copyright 2011, 2013, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mini-random.h"
+
+static void
+set_str (mpz_t r, const char *s)
+{
+  if (mpz_set_str (r, s, 16) != 0)
+    {
+      fprintf (stderr, "mpz_set_str failed on input %s\n", s);
+      abort ();
+    }
+}
+
+void
+mini_urandomb (mpz_t r, unsigned long bits)
+{
+  char *s;
+  s = hex_urandomb (bits);
+  set_str (r, s);
+  free (s);
+}
+
+void
+mini_rrandomb (mpz_t r, unsigned long bits)
+{
+  char *s;
+  s = hex_rrandomb (bits);
+  set_str (r, s);
+  free (s);
+}
+
+void
+mini_rrandomb_export (mpz_t r, void *dst, size_t *countp,
+                     int order, size_t size, int endian, unsigned long bits)
+{
+  char *s;
+  s = hex_rrandomb_export (dst, countp, order, size, endian, bits);
+  set_str (r, s);
+  free (s);
+}
+
+void
+mini_random_op2 (enum hex_random_op op, unsigned long maxbits,
+                mpz_t a, mpz_t r)
+{
+  char *ap;
+  char *rp;
+
+  hex_random_op2 (op, maxbits, &ap, &rp);
+  set_str (a, ap);
+  set_str (r, rp);
+
+  free (ap);
+  free (rp);
+}
+
+void
+mini_random_op3 (enum hex_random_op op, unsigned long maxbits,
+                mpz_t a, mpz_t b, mpz_t r)
+{
+  char *ap;
+  char *bp;
+  char *rp;
+
+  hex_random_op3 (op, maxbits, &ap, &bp, &rp);
+  set_str (a, ap);
+  set_str (b, bp);
+  set_str (r, rp);
+
+  free (ap);
+  free (bp);
+  free (rp);
+}
+
+void
+mini_random_op4 (enum hex_random_op op, unsigned long maxbits,
+                mpz_t a, mpz_t b, mpz_t c, mpz_t d)
+{
+  char *ap;
+  char *bp;
+  char *cp;
+  char *dp;
+
+  hex_random_op4 (op, maxbits, &ap, &bp, &cp, &dp);
+  set_str (a, ap);
+  set_str (b, bp);
+  set_str (c, cp);
+  set_str (d, dp);
+
+  free (ap);
+  free (bp);
+  free (cp);
+  free (dp);
+}
+
+void
+mini_random_bit_op (enum hex_random_op op, unsigned long maxbits,
+                        mpz_t a, mp_bitcnt_t *b, mpz_t r)
+{
+  char *ap;
+  char *rp;
+
+  hex_random_bit_op (op, maxbits, &ap, b, &rp);
+  set_str (a, ap);
+  set_str (r, rp);
+
+  free (ap);
+  free (rp);
+}
+
+void
+mini_random_scan_op (enum hex_random_op op, unsigned long maxbits,
+                    mpz_t a, mp_bitcnt_t *b, mp_bitcnt_t *r)
+{
+  char *ap;
+
+  hex_random_scan_op (op, maxbits, &ap, b, r);
+  set_str (a, ap);
+
+  free (ap);
+}
diff --git a/mini-gmp/tests/mini-random.h b/mini-gmp/tests/mini-random.h

new file mode 100644 (file)

index 0000000..def83b0
--- /dev/null
+++ b/mini-gmp/tests/mini-random.h
@@ -0,0 +1,33 @@
+/*
+
+Copyright 2011, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "mini-gmp.h"
+#include "hex-random.h"
+
+void mini_urandomb (mpz_t, unsigned long);
+void mini_rrandomb (mpz_t, unsigned long);
+void mini_rrandomb_export (mpz_t r, void *dst, size_t *countp,
+                          int order, size_t size, int endian,
+                          unsigned long bits);
+
+void mini_random_op2 (enum hex_random_op,  unsigned long, mpz_t, mpz_t);
+void mini_random_op3 (enum hex_random_op,  unsigned long, mpz_t, mpz_t, mpz_t);
+void mini_random_op4 (enum hex_random_op, unsigned long, mpz_t, mpz_t, mpz_t, mpz_t);
+void mini_random_scan_op (enum hex_random_op, unsigned long, mpz_t, mp_bitcnt_t *, mp_bitcnt_t *);
+void mini_random_bit_op (enum hex_random_op, unsigned long, mpz_t, mp_bitcnt_t *, mpz_t);
diff --git a/mini-gmp/tests/run-tests b/mini-gmp/tests/run-tests

new file mode 100755 (executable)

index 0000000..3d5655c
--- /dev/null
+++ b/mini-gmp/tests/run-tests
@@ -0,0 +1,123 @@
+#! /bin/sh
+
+# Copyright (C) 2000, 2001, 2002, 2004, 2005, 2011, 2012  Niels Möller
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+failed=0
+all=0
+
+debug='no'
+testflags=''
+
+if [ -z "$srcdir" ] ; then
+  srcdir=`pwd`
+fi
+
+export srcdir
+
+# When used in make rules, we sometimes get the filenames VPATH
+# expanded, but usually not.
+find_program () {
+    case "$1" in
+       */*)
+         echo "$1"
+         ;;
+       *)
+         if [ -x "$1" ] ; then
+             echo "./$1"
+         else
+             echo "$srcdir/$1"
+         fi
+         ;;
+    esac
+}
+
+env_program () {
+  if [ -x "$1" ] ; then
+    if "$1"; then : ; else
+      echo FAIL: $1
+      exit 1
+    fi
+  fi
+}
+
+test_program () {
+  testname=`basename "$1" .exe`
+  testname=`basename "$testname" -test`
+  if [ -z "$EMULATOR" ] || head -1 "$1" | grep '^#!' > /dev/null; then
+    "$1" $testflags
+  else
+    $EMULATOR "$1" $testflags
+  fi
+  case "$?" in
+      0)
+       echo PASS: $testname
+       all=`expr $all + 1`
+       ;;
+      77)
+       echo SKIP: $testname
+      ;;
+      *)
+       echo FAIL: $testname
+       failed=`expr $failed + 1`
+       all=`expr $all + 1`
+       ;;
+  esac
+}
+
+env_program `find_program setup-env`
+
+while test $# != 0
+do
+  case "$1" in
+  --debug)
+    debug=yes
+    ;;
+  -v)
+    testflags='-v'
+    ;;
+  -*)
+    echo >&2 'Unknown option `'"$1'"
+    exit 1
+    ;;
+  *)
+    break
+    ;;
+  esac
+  shift
+done
+
+if [ $# -eq 0 ] ; then
+  for f in *-test; do test_program "./$f"; done
+else
+  for f in "$@" ; do test_program `find_program "$f"`; done
+fi
+
+if [ $failed -eq 0 ] ; then
+  banner="All $all tests passed"
+else
+  banner="$failed of $all tests failed"
+fi
+dashes=`echo "$banner" | sed s/./=/g`
+echo "$dashes"
+echo "$banner"
+echo "$dashes"
+
+if [ "x$debug" = xno ] ; then
+  env_program `find_program teardown-env`
+fi
+
+[ "$failed" -eq 0 ]
diff --git a/mini-gmp/tests/t-add.c b/mini-gmp/tests/t-add.c

new file mode 100644 (file)

index 0000000..0d3a0da
--- /dev/null
+++ b/mini-gmp/tests/t-add.c
@@ -0,0 +1,65 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t a, b, res, ref;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (res);
+  mpz_init (ref);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_random_op3 (OP_ADD, MAXBITS, a, b, ref);
+      mpz_add (res, a, b);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_add failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+    }
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (res);
+  mpz_clear (ref);
+}
diff --git a/mini-gmp/tests/t-bitops.c b/mini-gmp/tests/t-bitops.c

new file mode 100644 (file)

index 0000000..7d8bdc2
--- /dev/null
+++ b/mini-gmp/tests/t-bitops.c
@@ -0,0 +1,111 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t a, res, ref;
+  mp_bitcnt_t b;
+
+  mpz_init (a);
+  mpz_init (res);
+  mpz_init (ref);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_random_bit_op (OP_SETBIT, MAXBITS, a, &b, ref);
+      mpz_set (res, a);
+      mpz_setbit (res, b);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_setbit failed:\n");
+         dump ("a", a);
+         fprintf (stderr, "b: %lu\n", b);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+      if (!mpz_tstbit (res, b))
+       {
+         fprintf (stderr, "mpz_tstbit failed (after mpz_setbit):\n");
+         dump ("res", a);
+         fprintf (stderr, "b: %lu\n", b);
+         abort ();
+       }
+      mini_random_bit_op (OP_CLRBIT, MAXBITS, a, &b, ref);
+      mpz_set (res, a);
+      mpz_clrbit (res, b);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_clrbit failed:\n");
+         dump ("a", a);
+         fprintf (stderr, "b: %lu\n", b);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+      if (mpz_tstbit (res, b))
+       {
+         fprintf (stderr, "mpz_tstbit failed (after mpz_clrbit):\n");
+         dump ("res", a);
+         fprintf (stderr, "b: %lu\n", b);
+         abort ();
+       }
+      mini_random_bit_op (OP_COMBIT, MAXBITS, a, &b, ref);
+      mpz_set (res, a);
+      mpz_combit (res, b);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_combit failed:\n");
+         dump ("a", a);
+         fprintf (stderr, "b: %lu\n", b);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+      if (mpz_tstbit (res, b) == mpz_tstbit (a, b))
+       {
+         fprintf (stderr, "mpz_tstbit failed (after mpz_combit):\n");
+         dump ("res", a);
+         fprintf (stderr, "b: %lu\n", b);
+         abort ();
+       }
+    }
+  mpz_clear (a);
+  mpz_clear (res);
+  mpz_clear (ref);
+}
diff --git a/mini-gmp/tests/t-cmp_d.c b/mini-gmp/tests/t-cmp_d.c

new file mode 100644 (file)

index 0000000..c08e3a5
--- /dev/null
+++ b/mini-gmp/tests/t-cmp_d.c
@@ -0,0 +1,295 @@
+/* Test mpz_cmp_d and mpz_cmpabs_d.
+
+Copyright 2001, 2002, 2003, 2005, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <math.h>
+
+#include "testutils.h"
+
+/* FIXME: Not sure if the tests here are exhaustive.  Ought to try to get
+   each possible exit from mpz_cmp_d (and mpz_cmpabs_d) exercised.  */
+
+
+#define SGN(n)  ((n) > 0 ? 1 : (n) < 0 ? -1 : 0)
+
+
+void
+check_one (const char *name, mpz_srcptr x, double y, int cmp, int cmpabs)
+{
+  int   got;
+
+  got = mpz_cmp_d (x, y);
+  if (SGN(got) != cmp)
+    {
+      int i;
+      printf    ("mpz_cmp_d wrong (from %s)\n", name);
+      printf    ("  got  %d\n", got);
+      printf    ("  want %d\n", cmp);
+    fail:
+      printf ("  x=");
+      mpz_out_str (stdout, 10, x);
+      printf    ("\n  y %g\n", y);
+      printf ("  x=0x");
+      mpz_out_str (stdout, -16, x);
+      printf    ("\n  y %g\n", y);
+      printf    ("  y");
+      for (i = 0; i < sizeof(y); i++)
+        printf (" %02X", (unsigned) ((unsigned char *) &y)[i]);
+      printf ("\n");
+      abort ();
+    }
+
+  got = mpz_cmpabs_d (x, y);
+  if (SGN(got) != cmpabs)
+    {
+      printf    ("mpz_cmpabs_d wrong\n");
+      printf    ("  got  %d\n", got);
+      printf    ("  want %d\n", cmpabs);
+      goto fail;
+    }
+}
+
+static void
+mpz_set_str_or_abort (mpz_ptr z, const char *str, int base)
+{
+  if (mpz_set_str (z, str, base) != 0)
+    {
+      fprintf (stderr, "ERROR: mpz_set_str failed\n");
+      fprintf (stderr, "   str  = \"%s\"\n", str);
+      fprintf (stderr, "   base = %d\n", base);
+      abort();
+    }
+}
+
+void
+check_data (void)
+{
+  static const struct {
+    const char  *x;
+    double      y;
+    int         cmp, cmpabs;
+
+  } data[] = {
+
+    {  "0",  0.0,  0,  0 },
+
+    {  "1",  0.0,  1,  1 },
+    { "-1",  0.0, -1,  1 },
+
+    {  "1",  0.5,  1,  1 },
+    { "-1", -0.5, -1,  1 },
+
+    {  "0",  1.0, -1, -1 },
+    {  "0", -1.0,  1, -1 },
+
+    {  "0x1000000000000000000000000000000000000000000000000", 1.0,  1, 1 },
+    { "-0x1000000000000000000000000000000000000000000000000", 1.0, -1, 1 },
+
+    {  "0",  1e100, -1, -1 },
+    {  "0", -1e100,  1, -1 },
+
+    {  "2",  1.5,   1,  1 },
+    {  "2", -1.5,   1,  1 },
+    { "-2",  1.5,  -1,  1 },
+    { "-2", -1.5,  -1,  1 },
+  };
+
+  mpz_t  x;
+  int    i;
+
+  mpz_init (x);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      mpz_set_str_or_abort (x, data[i].x, 0);
+      check_one ("check_data", x, data[i].y, data[i].cmp, data[i].cmpabs);
+    }
+
+  mpz_clear (x);
+}
+
+
+/* Equality of integers with up to 53 bits */
+void
+check_onebits (void)
+{
+  mpz_t   x, x2;
+  double  y;
+  int     i;
+
+  mpz_init_set_ui (x, 0L);
+  mpz_init (x2);
+
+  for (i = 0; i < 512; i++)
+    {
+      mpz_mul_2exp (x, x, 1);
+      mpz_add_ui (x, x, 1L);
+
+      y = mpz_get_d (x);
+      mpz_set_d (x2, y);
+
+      /* stop if any truncation is occurring */
+      if (mpz_cmp (x, x2) != 0)
+        break;
+
+      check_one ("check_onebits", x, y, 0, 0);
+      check_one ("check_onebits", x, -y, 1, 0);
+      mpz_neg (x, x);
+      check_one ("check_onebits", x, y, -1, 0);
+      check_one ("check_onebits", x, -y, 0, 0);
+      mpz_neg (x, x);
+    }
+
+  mpz_clear (x);
+  mpz_clear (x2);
+}
+
+
+/* With the mpz differing by 1, in a limb position possibly below the double */
+void
+check_low_z_one (void)
+{
+  mpz_t          x;
+  double         y;
+  unsigned long  i;
+
+  mpz_init (x);
+
+  /* FIXME: It'd be better to base this on the float format. */
+#if defined (__vax) || defined (__vax__)
+#define LIM 127                        /* vax fp numbers have limited range */
+#else
+#define LIM 512
+#endif
+
+  for (i = 1; i < LIM; i++)
+    {
+      mpz_set_ui (x, 1L);
+      mpz_mul_2exp (x, x, i);
+      y = mpz_get_d (x);
+
+      check_one ("check_low_z_one", x, y,   0, 0);
+      check_one ("check_low_z_one", x, -y,  1, 0);
+      mpz_neg (x, x);
+      check_one ("check_low_z_one", x, y,  -1, 0);
+      check_one ("check_low_z_one", x, -y,  0, 0);
+      mpz_neg (x, x);
+
+      mpz_sub_ui (x, x, 1);
+
+      check_one ("check_low_z_one", x, y,  -1, -1);
+      check_one ("check_low_z_one", x, -y,  1, -1);
+      mpz_neg (x, x);
+      check_one ("check_low_z_one", x, y,  -1, -1);
+      check_one ("check_low_z_one", x, -y,  1, -1);
+      mpz_neg (x, x);
+
+      mpz_add_ui (x, x, 2);
+
+      check_one ("check_low_z_one", x, y,   1, 1);
+      check_one ("check_low_z_one", x, -y,  1, 1);
+      mpz_neg (x, x);
+      check_one ("check_low_z_one", x, y,  -1, 1);
+      check_one ("check_low_z_one", x, -y, -1, 1);
+      mpz_neg (x, x);
+    }
+
+  mpz_clear (x);
+}
+
+/* Comparing 1 and 1+2^-n.  "y" is volatile to make gcc store and fetch it,
+   which forces it to a 64-bit double, whereas on x86 it would otherwise
+   remain on the float stack as an 80-bit long double.  */
+void
+check_one_2exp (void)
+{
+  double           e;
+  mpz_t            x;
+  volatile double  y;
+  int              i;
+
+  mpz_init (x);
+
+  e = 1.0;
+  for (i = 0; i < 128; i++)
+    {
+      e /= 2.0;
+      y = 1.0 + e;
+      if (y == 1.0)
+        break;
+
+      mpz_set_ui (x, 1L);
+      check_one ("check_one_2exp", x,  y, -1, -1);
+      check_one ("check_one_2exp", x, -y,  1, -1);
+
+      mpz_set_si (x, -1L);
+      check_one ("check_one_2exp", x,  y, -1, -1);
+      check_one ("check_one_2exp", x, -y,  1, -1);
+    }
+
+  mpz_clear (x);
+}
+
+void
+check_infinity (void)
+{
+  mpz_t   x;
+  double  y = HUGE_VAL;
+  if (y != 2*y)
+    return;
+
+  mpz_init (x);
+
+  /* 0 cmp inf */
+  mpz_set_ui (x, 0L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* 123 cmp inf */
+  mpz_set_ui (x, 123L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* -123 cmp inf */
+  mpz_set_si (x, -123L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* 2^5000 cmp inf */
+  mpz_set_ui (x, 1L);
+  mpz_mul_2exp (x, x, 5000L);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  /* -2^5000 cmp inf */
+  mpz_neg (x, x);
+  check_one ("check_infinity", x,  y, -1, -1);
+  check_one ("check_infinity", x, -y,  1, -1);
+
+  mpz_clear (x);
+}
+
+void
+testmain (int argc, char *argv[])
+{
+  check_data ();
+  check_onebits ();
+  check_low_z_one ();
+  check_one_2exp ();
+  check_infinity ();
+}
diff --git a/mini-gmp/tests/t-comb.c b/mini-gmp/tests/t-comb.c

new file mode 100644 (file)

index 0000000..2fe097d
--- /dev/null
+++ b/mini-gmp/tests/t-comb.c
@@ -0,0 +1,164 @@
+/* Exercise mpz_fac_ui and mpz_bin_uiui.
+
+Copyright 2000, 2001, 2002, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "testutils.h"
+
+/* Usage: t-fac_ui [x|num]
+
+   With no arguments testing goes up to the initial value of "limit" below.
+   With a number argument tests are carried that far, or with a literal "x"
+   tests are continued without limit (this being meant only for development
+   purposes).  */
+
+void
+try_mpz_bin_uiui (mpz_srcptr want, unsigned long n, unsigned long k)
+{
+  mpz_t  got;
+
+  mpz_init (got);
+  mpz_bin_uiui (got, n, k);
+  if (mpz_cmp (got, want) != 0)
+    {
+      printf ("mpz_bin_uiui wrong\n");
+      printf ("  n=%lu\n", n);
+      printf ("  k=%lu\n", k);
+      printf ("  got="); mpz_out_str (stdout, 10, got); printf ("\n");
+      printf ("  want="); mpz_out_str (stdout, 10, want); printf ("\n");
+      abort();
+    }
+  mpz_clear (got);
+}
+
+/* Test all bin(n,k) cases, with 0 <= k <= n + 1 <= count.  */
+void
+bin_smallexaustive (unsigned int count)
+{
+  mpz_t          want;
+  unsigned long  n, k;
+
+  mpz_init (want);
+
+  for (n = 0; n < count; n++)
+    {
+      mpz_set_ui (want, 1);
+      for (k = 0; k <= n; k++)
+       {
+         try_mpz_bin_uiui (want, n, k);
+         mpz_mul_ui (want, want, n - k);
+         mpz_fdiv_q_ui (want, want, k + 1);
+       }
+      try_mpz_bin_uiui (want, n, k);
+    }
+
+  mpz_clear (want);
+}
+
+/* Test all fac(n) cases, with 0 <= n <= limit.  */
+void
+fac_smallexaustive (unsigned int limit)
+{
+  mpz_t          f, r;
+  unsigned long  n;
+  mpz_init_set_si (f, 1);  /* 0! = 1 */
+  mpz_init (r);
+
+  for (n = 0; n < limit; n++)
+    {
+      mpz_fac_ui (r, n);
+
+      if (mpz_cmp (f, r) != 0)
+        {
+          printf ("mpz_fac_ui(%lu) wrong\n", n);
+          printf ("  got  "); mpz_out_str (stdout, 10, r); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, f); printf("\n");
+          abort ();
+        }
+
+      mpz_mul_ui (f, f, n+1);  /* (n+1)! = n! * (n+1) */
+    }
+
+  mpz_clear (f);
+  mpz_clear (r);
+}
+
+void checkWilson (mpz_t f, unsigned long n)
+{
+  unsigned long m;
+
+  mpz_fac_ui (f, n - 1);
+  m = mpz_fdiv_ui (f, n);
+  if ( m != n - 1)
+    {
+      printf ("mpz_fac_ui(%lu) wrong\n", n - 1);
+      printf ("  Wilson's theorem not verified: got %lu, expected %lu.\n",m ,n - 1);
+      abort ();
+    }
+}
+
+void
+checkprimes (unsigned long p1, unsigned long p2, unsigned long p3)
+{
+  mpz_t          b, f;
+
+  if (p1 - 1 != p2 - 1 + p3 - 1)
+    {
+      printf ("checkprimes(%lu, %lu, %lu) wrong\n", p1, p2, p3);
+      printf (" %lu - 1 != %lu - 1 + %lu - 1 \n", p1, p2, p3);
+      abort ();
+    }
+
+  mpz_init (b);
+  mpz_init (f);
+
+  checkWilson (b, p1); /* b = (p1-1)! */
+  checkWilson (f, p2); /* f = (p2-1)! */
+  mpz_divexact (b, b, f);
+  checkWilson (f, p3); /* f = (p3-1)! */
+  mpz_divexact (b, b, f); /* b = (p1-1)!/((p2-1)!(p3-1)!) */
+  mpz_bin_uiui (f, p1 - 1, p2 - 1);
+  if (mpz_cmp (f, b) != 0)
+    {
+      printf ("checkprimes(%lu, %lu, %lu) wrong\n", p1, p2, p3);
+      printf ("  got  "); mpz_out_str (stdout, 10, b); printf("\n");
+      printf ("  want "); mpz_out_str (stdout, 10, f); printf("\n");
+      abort ();
+    }
+
+  mpz_clear (b);
+  mpz_clear (f);
+
+}
+
+void
+testmain (int argc, char *argv[])
+{
+  unsigned long  limit = 128;
+
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ~ limit;
+  else if (argc > 1)
+    limit = atoi (argv[1]);
+
+  checkprimes(1009, 733, 277);
+  fac_smallexaustive (limit);
+  bin_smallexaustive (limit);
+}
diff --git a/mini-gmp/tests/t-div.c b/mini-gmp/tests/t-div.c

new file mode 100644 (file)

index 0000000..1eece29
--- /dev/null
+++ b/mini-gmp/tests/t-div.c
@@ -0,0 +1,262 @@
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+typedef void div_qr_func (mpz_t, mpz_t, const mpz_t, const mpz_t);
+typedef unsigned long div_qr_ui_func (mpz_t, mpz_t, const mpz_t, unsigned long);
+typedef void div_func (mpz_t, const mpz_t, const mpz_t);
+typedef unsigned long div_x_ui_func (mpz_t, const mpz_t, unsigned long);
+typedef unsigned long div_ui_func (const mpz_t, unsigned long);
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t a, b, q, r, rq, rr;
+  int div_p;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (r);
+  mpz_init (q);
+  mpz_init (rr);
+  mpz_init (rq);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      unsigned j;
+      for (j = 0; j < 3; j++)
+       {
+         static const enum hex_random_op ops[3] = { OP_CDIV, OP_FDIV, OP_TDIV };
+         static const char name[3] = { 'c', 'f', 't'};
+         static div_qr_func * const div_qr [3] =
+           {
+             mpz_cdiv_qr, mpz_fdiv_qr, mpz_tdiv_qr
+           };
+         static div_qr_ui_func  *div_qr_ui[3] =
+           {
+             mpz_cdiv_qr_ui, mpz_fdiv_qr_ui, mpz_tdiv_qr_ui
+           };
+         static div_func * const div_q [3] =
+           {
+             mpz_cdiv_q, mpz_fdiv_q, mpz_tdiv_q
+           };
+         static div_x_ui_func  *div_q_ui[3] =
+           {
+             mpz_cdiv_q_ui, mpz_fdiv_q_ui, mpz_tdiv_q_ui
+           };
+         static div_func * const div_r [3] =
+           {
+             mpz_cdiv_r, mpz_fdiv_r, mpz_tdiv_r
+           };
+         static div_x_ui_func  *div_r_ui[3] =
+           {
+             mpz_cdiv_r_ui, mpz_fdiv_r_ui, mpz_tdiv_r_ui
+           };
+         static div_ui_func  *div_ui[3] =
+           {
+             mpz_cdiv_ui, mpz_fdiv_ui, mpz_tdiv_ui
+           };
+
+         mini_random_op4 (ops[j], MAXBITS, a, b, rq, rr);
+         div_qr[j] (q, r, a, b);
+         if (mpz_cmp (r, rr) || mpz_cmp (q, rq))
+           {
+             fprintf (stderr, "mpz_%cdiv_qr failed:\n", name[j]);
+             dump ("a", a);
+             dump ("b", b);
+             dump ("r   ", r);
+             dump ("rref", rr);
+             dump ("q   ", q);
+             dump ("qref", rq);
+             abort ();
+           }
+         mpz_set_si (q, -5);
+         div_q[j] (q, a, b);
+         if (mpz_cmp (q, rq))
+           {
+             fprintf (stderr, "mpz_%cdiv_q failed:\n", name[j]);
+             dump ("a", a);
+             dump ("b", b);
+             dump ("q   ", q);
+             dump ("qref", rq);
+             abort ();
+           }
+         mpz_set_ui (r, ~5);
+         div_r[j] (r, a, b);
+         if (mpz_cmp (r, rr))
+           {
+             fprintf (stderr, "mpz_%cdiv_r failed:\n", name[j]);
+             dump ("a", a);
+             dump ("b", b);
+             dump ("r   ", r);
+             dump ("rref", rr);
+             abort ();
+           }
+
+         if (j == 0)           /* do this once, not for all roundings */
+           {
+             div_p = mpz_divisible_p (a, b);
+             if ((mpz_sgn (r) == 0) ^ (div_p != 0))
+               {
+                 fprintf (stderr, "mpz_divisible_p failed:\n");
+                 dump ("a", a);
+                 dump ("b", b);
+                 dump ("r   ", r);
+                 abort ();
+               }
+           }
+
+         if (j == 0 && mpz_sgn (b) < 0)  /* ceil, negative divisor */
+           {
+             mpz_mod (r, a, b);
+             if (mpz_cmp (r, rr))
+               {
+                 fprintf (stderr, "mpz_mod failed:\n");
+                 dump ("a", a);
+                 dump ("b", b);
+                 dump ("r   ", r);
+                 dump ("rref", rr);
+                 abort ();
+               }
+           }
+
+         if (j == 1 && mpz_sgn (b) > 0) /* floor, positive divisor */
+           {
+             mpz_mod (r, a, b);
+             if (mpz_cmp (r, rr))
+               {
+                 fprintf (stderr, "mpz_mod failed:\n");
+                 dump ("a", a);
+                 dump ("b", b);
+                 dump ("r   ", r);
+                 dump ("rref", rr);
+                 abort ();
+               }
+           }
+
+         if (mpz_fits_ulong_p (b))
+           {
+             mp_limb_t rl;
+
+             rl = div_qr_ui[j] (q, r, a, mpz_get_ui (b));
+             if (rl != mpz_get_ui (rr)
+                 || mpz_cmp (r, rr) || mpz_cmp (q, rq))
+               {
+                 fprintf (stderr, "mpz_%cdiv_qr_ui failed:\n", name[j]);
+                 dump ("a", a);
+                 dump ("b", b);
+                 fprintf(stderr, "rl   = %lx\n", rl);
+                 dump ("r   ", r);
+                 dump ("rref", rr);
+                 dump ("q   ", q);
+                 dump ("qref", rq);
+                 abort ();
+               }
+
+             mpz_set_si (q, 3);
+             rl = div_q_ui[j] (q, a, mpz_get_ui (b));
+             if (rl != mpz_get_ui (rr) || mpz_cmp (q, rq))
+               {
+                 fprintf (stderr, "mpz_%cdiv_q_ui failed:\n", name[j]);
+                 dump ("a", a);
+                 dump ("b", b);
+                 fprintf(stderr, "rl   = %lx\n", rl);
+                 dump ("rref", rr);
+                 dump ("q   ", q);
+                 dump ("qref", rq);
+                 abort ();
+               }
+
+             mpz_set_ui (r, 7);
+             rl = div_r_ui[j] (r, a, mpz_get_ui (b));
+             if (rl != mpz_get_ui (rr) || mpz_cmp (r, rr))
+               {
+                 fprintf (stderr, "mpz_%cdiv_qr_ui failed:\n", name[j]);
+                 dump ("a", a);
+                 dump ("b", b);
+                 fprintf(stderr, "rl   = %lx\n", rl);
+                 dump ("r   ", r);
+                 dump ("rref", rr);
+                 abort ();
+               }
+
+             rl = div_ui[j] (a, mpz_get_ui (b));
+             if (rl != mpz_get_ui (rr))
+               {
+                 fprintf (stderr, "mpz_%cdiv_qr_ui failed:\n", name[j]);
+                 dump ("a", a);
+                 dump ("b", b);
+                 fprintf(stderr, "rl   = %lx\n", rl);
+                 dump ("rref", rr);
+                 abort ();
+               }
+
+             if (j == 0)       /* do this once, not for all roundings */
+               {
+                 div_p = mpz_divisible_ui_p (a, mpz_get_ui (b));
+                 if ((mpz_sgn (r) == 0) ^ (div_p != 0))
+                   {
+                     fprintf (stderr, "mpz_divisible_ui_p failed:\n");
+                     dump ("a", a);
+                     dump ("b", b);
+                     dump ("r   ", r);
+                     abort ();
+                   }
+               }
+
+             if (j == 1)       /* floor */
+               {
+                 mpz_mod_ui (r, a, mpz_get_ui (b));
+                 if (mpz_cmp (r, rr))
+                   {
+                     fprintf (stderr, "mpz_mod failed:\n");
+                     dump ("a", a);
+                     dump ("b", b);
+                     dump ("r   ", r);
+                     dump ("rref", rr);
+                     abort ();
+                   }
+               }
+           }
+       }
+    }
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (r);
+  mpz_clear (q);
+  mpz_clear (rr);
+  mpz_clear (rq);
+}
diff --git a/mini-gmp/tests/t-div_2exp.c b/mini-gmp/tests/t-div_2exp.c

new file mode 100644 (file)

index 0000000..c407f7f
--- /dev/null
+++ b/mini-gmp/tests/t-div_2exp.c
@@ -0,0 +1,90 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+typedef void div_func (mpz_t, const mpz_t, mp_bitcnt_t);
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t a, res, ref;
+  mp_bitcnt_t b;
+
+  mpz_init (a);
+  mpz_init (res);
+  mpz_init (ref);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      unsigned j;
+      for (j = 0; j < 6; j++)
+       {
+         static const enum hex_random_op ops[6] =
+           {
+             OP_CDIV_Q_2, OP_CDIV_R_2,
+             OP_FDIV_Q_2, OP_FDIV_R_2,
+             OP_TDIV_Q_2, OP_TDIV_R_2
+           };
+         static const char *name[6] =
+           {
+             "cdiv_q", "cdiv_r",
+             "fdiv_q", "fdiv_r",
+             "tdiv_q", "tdiv_r"
+           };
+         static div_func * const div [6] =
+           {
+             mpz_cdiv_q_2exp, mpz_cdiv_r_2exp,
+             mpz_fdiv_q_2exp, mpz_fdiv_r_2exp,
+             mpz_tdiv_q_2exp, mpz_tdiv_r_2exp
+           };
+
+         mini_random_bit_op (ops[j], MAXBITS, a, &b, ref);
+         div[j] (res, a, b);
+         if (mpz_cmp (ref, res))
+           {
+             fprintf (stderr, "mpz_%s_2exp failed:\n", name[j]);
+             dump ("a", a);
+             fprintf (stderr, "b: %lu\n", b);
+             dump ("r", res);
+             dump ("ref", ref);
+             abort ();
+           }
+       }
+    }
+  mpz_clear (a);
+  mpz_clear (res);
+  mpz_clear (ref);
+}
diff --git a/mini-gmp/tests/t-double.c b/mini-gmp/tests/t-double.c

new file mode 100644 (file)

index 0000000..e62345e
--- /dev/null
+++ b/mini-gmp/tests/t-double.c
@@ -0,0 +1,146 @@
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+static const struct
+{
+  double d;
+  const char *s;
+} values[] = {
+  { 0.0, "0" },
+  { 0.3, "0" },
+  { -0.3, "0" },
+  { M_PI, "3" },
+  { M_PI*1e15, "b29430a256d21" },
+  { -M_PI*1e15, "-b29430a256d21" },
+  /* 17 * 2^{200} =
+     27317946752402834684213355569799764242877450894307478200123392 */
+  {0.2731794675240283468421335556979976424288e62,
+    "1100000000000000000000000000000000000000000000000000" },
+  { 0.0, NULL }
+};
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t x;
+
+  for (i = 0; values[i].s; i++)
+    {
+      char *s;
+      mpz_init_set_d (x, values[i].d);
+      s = mpz_get_str (NULL, 16, x);
+      if (strcmp (s, values[i].s) != 0)
+       {
+         fprintf (stderr, "mpz_set_d failed:\n"
+                  "d = %.20g\n"
+                  "s = %s\n"
+                  "r = %s\n",
+                  values[i].d, s, values[i].s);
+         abort ();
+       }
+      testfree (s);
+      mpz_clear (x);
+    }
+
+  mpz_init (x);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      /* Use volatile, to avoid extended precision in floating point
+        registers, e.g., on m68k and 80387. */
+      volatile double d, f;
+      unsigned long m;
+      int e;
+
+      mini_rrandomb (x, GMP_LIMB_BITS);
+      m = mpz_get_ui (x);
+      mini_urandomb (x, 8);
+      e = mpz_get_ui (x) - 100;
+
+      d = ldexp ((double) m, e);
+      mpz_set_d (x, d);
+      f = mpz_get_d (x);
+      if (f != floor (d))
+       {
+         fprintf (stderr, "mpz_set_d/mpz_get_d failed:\n");
+         goto dumperror;
+       }
+      if ((f == d) ? (mpz_cmp_d (x, d) != 0) : (mpz_cmp_d (x, d) >= 0))
+       {
+         fprintf (stderr, "mpz_cmp_d (x, d) failed:\n");
+         goto dumperror;
+       }
+      f = d + 1.0;
+      if (f > d && ! (mpz_cmp_d (x, f) < 0))
+       {
+         fprintf (stderr, "mpz_cmp_d (x, f) failed:\n");
+         goto dumperror;
+       }
+
+      d = - d;
+
+      mpz_set_d (x, d);
+      f = mpz_get_d (x);
+      if (f != ceil (d))
+       {
+         fprintf (stderr, "mpz_set_d/mpz_get_d failed:\n");
+       dumperror:
+         dump ("x", x);
+         fprintf (stderr, "m = %lx, e = %i\n", m, e);
+         fprintf (stderr, "d = %.15g\n", d);
+         fprintf (stderr, "f = %.15g\n", f);
+         fprintf (stderr, "f - d = %.5g\n", f - d);
+         abort ();
+       }
+      if ((f == d) ? (mpz_cmp_d (x, d) != 0) : (mpz_cmp_d (x, d) <= 0))
+       {
+         fprintf (stderr, "mpz_cmp_d (x, d) failed:\n");
+         goto dumperror;
+       }
+      f = d - 1.0;
+      if (f < d && ! (mpz_cmp_d (x, f) > 0))
+       {
+         fprintf (stderr, "mpz_cmp_d (x, f) failed:\n");
+         goto dumperror;
+       }
+    }
+
+  mpz_clear (x);
+}
diff --git a/mini-gmp/tests/t-gcd.c b/mini-gmp/tests/t-gcd.c

new file mode 100644 (file)

index 0000000..d70514d
--- /dev/null
+++ b/mini-gmp/tests/t-gcd.c
@@ -0,0 +1,184 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+/* Called when g is supposed to be gcd(a,b), and g = s a + t b. */
+static int
+gcdext_valid_p (const mpz_t a, const mpz_t b,
+               const mpz_t g, const mpz_t s, const mpz_t t)
+{
+  mpz_t ta, tb, r;
+
+  /* It's not clear that gcd(0,0) is well defined, but we allow it and
+     require that gcd(0,0) = 0. */
+  if (mpz_sgn (g) < 0)
+    return 0;
+
+  if (mpz_sgn (a) == 0)
+    {
+      /* Must have g == abs (b). Any value for s is in some sense "correct",
+        but it makes sense to require that s == 0. */
+      return mpz_cmpabs (g, b) == 0 && mpz_sgn (s) == 0;
+    }
+  else if (mpz_sgn (b) == 0)
+    {
+      /* Must have g == abs (a), s == sign (a) */
+      return mpz_cmpabs (g, a) == 0 && mpz_cmp_si (s, mpz_sgn (a)) == 0;
+    }
+
+  if (mpz_sgn (g) <= 0)
+    return 0;
+
+  mpz_init (ta);
+  mpz_init (tb);
+  mpz_init (r);
+
+  mpz_mul (ta, s, a);
+  mpz_mul (tb, t, b);
+  mpz_add (ta, ta, tb);
+
+  if (mpz_cmp (ta, g) != 0)
+    {
+    fail:
+      mpz_clear (ta);
+      mpz_clear (tb);
+      mpz_clear (r);
+      return 0;
+    }
+  mpz_tdiv_qr (ta, r, a, g);
+  if (mpz_sgn (r) != 0)
+    goto fail;
+
+  mpz_tdiv_qr (tb, r, b, g);
+  if (mpz_sgn (r) != 0)
+    goto fail;
+
+  /* Require that 2 |s| < |b/g|, or |s| == 1. */
+  if (mpz_cmpabs_ui (s, 1) > 0)
+    {
+      mpz_mul_2exp (r, s, 1);
+      if (mpz_cmpabs (r, tb) > 0)
+       goto fail;
+    }
+
+  /* Require that 2 |t| < |a/g| or |t| == 1*/
+  if (mpz_cmpabs_ui (t, 1) > 0)
+    {
+      mpz_mul_2exp (r, t, 1);
+      if (mpz_cmpabs (r, ta) > 0)
+       return 0;
+    }
+
+  mpz_clear (ta);
+  mpz_clear (tb);
+  mpz_clear (r);
+
+  return 1;
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t a, b, g, s, t;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (g);
+  mpz_init (s);
+  mpz_init (t);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_random_op3 (OP_GCD, MAXBITS, a, b, s);
+      mpz_gcd (g, a, b);
+      if (mpz_cmp (g, s))
+       {
+         fprintf (stderr, "mpz_gcd failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", g);
+         dump ("ref", s);
+         abort ();
+       }
+    }
+
+  for (i = 0; i < COUNT; i++)
+    {
+      unsigned flags;
+      mini_urandomb (a, 32);
+      flags = mpz_get_ui (a);
+      mini_rrandomb (a, MAXBITS);
+      mini_rrandomb (b, MAXBITS);
+
+      if (flags % 37 == 0)
+       mpz_mul (a, a, b);
+      if (flags % 37 == 1)
+       mpz_mul (b, a, b);
+
+      if (flags & 1)
+       mpz_neg (a, a);
+      if (flags & 2)
+       mpz_neg (b, b);
+
+      mpz_gcdext (g, s, t, a, b);
+      if (!gcdext_valid_p (a, b, g, s, t))
+       {
+         fprintf (stderr, "mpz_gcdext failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("g", g);
+         dump ("s", s);
+         dump ("t", t);
+         abort ();
+       }
+
+      mpz_gcd (s, a, b);
+      if (mpz_cmp (g, s))
+       {
+         fprintf (stderr, "mpz_gcd failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", g);
+         dump ("ref", s);
+         abort ();
+       }
+    }
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (g);
+  mpz_clear (s);
+  mpz_clear (t);
+}
diff --git a/mini-gmp/tests/t-import.c b/mini-gmp/tests/t-import.c

new file mode 100644 (file)

index 0000000..baefe67
--- /dev/null
+++ b/mini-gmp/tests/t-import.c
@@ -0,0 +1,107 @@
+/*
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define MAX_WORDS 20
+#define MAX_WORD_SIZE 10
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+static void
+dump_bytes (const char *label, const unsigned char *s, size_t n)
+{
+  size_t i;
+  fprintf (stderr, "%s:", label);
+  for (i = 0; i < n; i++)
+    {
+      if (i && (i % 16) == 0)
+       fprintf (stderr, "\n");
+      fprintf (stderr, " %02x", s[i]);
+    }
+  fprintf (stderr, "\n");
+}
+
+/* Tests both mpz_import and mpz_export. */
+void
+testmain (int argc, char **argv)
+{
+  unsigned char input[MAX_WORDS * MAX_WORD_SIZE];
+  unsigned char output[MAX_WORDS * MAX_WORD_SIZE + 2];
+  size_t count, in_count, out_count, size;
+  int endian, order;
+
+  mpz_t a, res;
+
+  mpz_init (a);
+  mpz_init (res);
+
+  for (size = 0; size <= MAX_WORD_SIZE; size++)
+    for (count = 0; count <= MAX_WORDS; count++)
+      for (endian = -1; endian <= 1; endian++)
+       for (order = -1; order <= 1; order += 2)
+         {
+           mini_rrandomb_export (a, input, &in_count,
+                                 order, size, endian, size*count * 8);
+           mpz_import (res, in_count, order, size, endian, 0, input);
+           if (mpz_cmp (a, res))
+             {
+               fprintf (stderr, "mpz_import failed:\n"
+                        "in_count %lu, out_count %lu, endian = %d, order = %d\n",
+                        (unsigned long) in_count, (unsigned long) out_count, endian, order);
+               dump ("a", a);
+               dump ("res", res);
+               abort ();
+             }
+           output[0] = 17;
+           output[1+in_count*size] = 17;
+
+           mpz_export (output+1, &out_count, order, size, endian, 0, a);
+           if (out_count != in_count
+               || memcmp (output+1, input, in_count * size)
+               || output[0] != 17
+               || output[1+in_count*size] != 17)
+             {
+               fprintf (stderr, "mpz_export failed:\n"
+                        "in_count %lu, out_count %lu, endian = %d, order = %d\n",
+                        (unsigned long) in_count, (unsigned long) out_count, endian, order);
+               dump_bytes ("input", input, in_count * size);
+               dump_bytes ("output", output+1, out_count * size);
+               if (output[0] != 17)
+                 fprintf (stderr, "Overwrite at -1, value %02x\n", output[0]);
+               if (output[1+in_count*size] != 17)
+                 fprintf (stderr, "Overwrite at %lu, value %02x\n",
+                          (unsigned long) (in_count*size), output[1+in_count*size]);
+
+               abort ();
+             }
+         }
+  mpz_clear (a);
+  mpz_clear (res);
+}
diff --git a/mini-gmp/tests/t-invert.c b/mini-gmp/tests/t-invert.c

new file mode 100644 (file)

index 0000000..c6bc5c3
--- /dev/null
+++ b/mini-gmp/tests/t-invert.c
@@ -0,0 +1,106 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t u, m, p, t;
+
+  mpz_init (u);
+  mpz_init (m);
+  mpz_init (p);
+  mpz_init (t);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_urandomb (u, GMP_LIMB_BITS);
+      mpz_setbit (u, GMP_LIMB_BITS -1);
+
+      mpz_set_ui (m, mpn_invert_limb (u->_mp_d[0]));
+      mpz_setbit (m, GMP_LIMB_BITS);
+
+      mpz_mul (p, m, u);
+
+      mpz_set_ui (t, 0);
+      mpz_setbit (t, 2* GMP_LIMB_BITS);
+      mpz_sub (t, t, p);
+
+      /* Should have 0 < B^2 - m u <= u */
+      if (mpz_sgn (t) <= 0 || mpz_cmp (t, u) > 0)
+       {
+         fprintf (stderr, "mpn_invert_limb failed:\n");
+         dump ("u", u);
+         dump ("m", m);
+         dump ("p", p);
+         dump ("t", t);
+         abort ();
+       }
+    }
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_urandomb (u, 2*GMP_LIMB_BITS);
+      mpz_setbit (u, 2*GMP_LIMB_BITS -1);
+
+      mpz_set_ui (m, mpn_invert_3by2 (u->_mp_d[1], u[0]._mp_d[0]));
+
+      mpz_setbit (m, GMP_LIMB_BITS);
+
+      mpz_mul (p, m, u);
+
+      mpz_set_ui (t, 0);
+      mpz_setbit (t, 3 * GMP_LIMB_BITS);
+      mpz_sub (t, t, p);
+
+      /* Should have 0 < B^3 - m u <= u */
+      if (mpz_sgn (t) <= 0 || mpz_cmp (t, u) > 0)
+       {
+         fprintf (stderr, "mpn_invert_3by2 failed:\n");
+         dump ("u", u);
+         dump ("m", m);
+         dump ("p", p);
+         dump ("t", t);
+         abort ();
+       }
+    }
+
+  mpz_clear (u);
+  mpz_clear (m);
+  mpz_clear (p);
+  mpz_clear (t);
+}
diff --git a/mini-gmp/tests/t-lcm.c b/mini-gmp/tests/t-lcm.c

new file mode 100644 (file)

index 0000000..dfb0916
--- /dev/null
+++ b/mini-gmp/tests/t-lcm.c
@@ -0,0 +1,81 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t a, b, g, s;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (g);
+  mpz_init (s);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_random_op3 (OP_LCM, MAXBITS, a, b, s);
+      mpz_lcm (g, a, b);
+      if (mpz_cmp (g, s))
+       {
+         fprintf (stderr, "mpz_lcm failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", g);
+         dump ("ref", s);
+         abort ();
+       }
+      if (mpz_fits_ulong_p (b))
+       {
+         mpz_set_si (g, 0);
+         mpz_lcm_ui (g, a, mpz_get_ui (b));
+         if (mpz_cmp (g, s))
+           {
+             fprintf (stderr, "mpz_lcm_ui failed:\n");
+             dump ("a", a);
+             dump ("b", b);
+             dump ("r", g);
+             dump ("ref", s);
+             abort ();
+           }
+       }
+    }
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (g);
+  mpz_clear (s);
+}
diff --git a/mini-gmp/tests/t-logops.c b/mini-gmp/tests/t-logops.c

new file mode 100644 (file)

index 0000000..1b93ff2
--- /dev/null
+++ b/mini-gmp/tests/t-logops.c
@@ -0,0 +1,120 @@
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+void
+testlogops (int count)
+{
+  unsigned i;
+  mpz_t a, b, res, ref;
+  mp_bitcnt_t c;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (res);
+  mpz_init (ref);
+
+  for (i = 0; i < count; i++)
+    {
+      mini_random_op3 (OP_AND, MAXBITS, a, b, ref);
+      mpz_and (res, a, b);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_and failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+
+      mini_random_op3 (OP_IOR, MAXBITS, a, b, ref);
+      mpz_ior (res, a, b);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_ior failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+
+      mini_random_op3 (OP_XOR, MAXBITS, a, b, ref);
+      mpz_xor (res, a, b);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_xor failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+
+      if (i % 8) {
+       c = 0;
+       mpz_mul_2exp (res, res, i % 8);
+      } else if (mpz_sgn (res) >= 0) {
+       c = mpz_odd_p (res) != 0;
+       mpz_tdiv_q_2exp (res, res, 1);
+      } else {
+       c = (~ (mp_bitcnt_t) 0) - 3;
+       mpz_set_ui (res, 11 << ((i >> 3)%4)); /* set 3 bits */
+      }
+
+      if (mpz_popcount (res) + c != mpz_hamdist (a, b))
+       {
+         fprintf (stderr, "mpz_popcount(r) + %lu and mpz_hamdist(a,b) differ:\n", c);
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", res);
+         fprintf (stderr, "mpz_popcount(r) = %lu:\n", mpz_popcount (res));
+         fprintf (stderr, "mpz_hamdist(a,b) = %lu:\n", mpz_hamdist (a, b));
+         abort ();
+       }
+    }
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (res);
+  mpz_clear (ref);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  testhalves (COUNT*2/3, testlogops);
+  testlogops (COUNT/3);
+}
diff --git a/mini-gmp/tests/t-mul.c b/mini-gmp/tests/t-mul.c

new file mode 100644 (file)

index 0000000..a36b73a
--- /dev/null
+++ b/mini-gmp/tests/t-mul.c
@@ -0,0 +1,120 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+#define MAXLIMBS ((MAXBITS + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t a, b, res, res_ui, ref;
+  mp_limb_t t[2*MAXLIMBS];
+  mp_size_t an, rn;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (res);
+  mpz_init (res_ui);
+  mpz_init (ref);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_random_op3 (OP_MUL, MAXBITS, a, b, ref);
+      mpz_mul (res, a, b);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_mul failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+      if (mpz_size (a) == mpz_size (b))
+       {
+         memset (t, 0, sizeof(t));
+         an = mpz_size (a);
+         if (an > 0)
+           {
+             mpn_mul_n (t, a->_mp_d, b->_mp_d, an);
+             rn = 2*an - (res->_mp_d[2*an-1] == 0);
+             if (rn != mpz_size (ref) || mpn_cmp (t, ref->_mp_d, rn))
+               {
+                 fprintf (stderr, "mpn_mul_n failed:\n");
+                 dump ("a", a);
+                 dump ("b", b);
+                 dump ("ref", ref);
+                 abort ();
+               }
+           }
+       }
+      if (mpz_fits_slong_p (b)) {
+       mpz_mul_si (res_ui, a, mpz_get_si (b));
+       if (mpz_cmp (res_ui, ref))
+         {
+           fprintf (stderr, "mpz_mul_si failed:\n");
+           dump ("a", a);
+           dump ("b", b);
+           dump ("r", res_ui);
+           dump ("ref", ref);
+           abort ();
+         }
+      }
+      mini_random_op2 (OP_SQR, MAXBITS, a, ref);
+      an = mpz_size (a);
+      if (an > 0)
+       {
+         memset (t, 0, sizeof(t));
+         mpn_sqr (t, a->_mp_d, an);
+
+         rn = 2*an - (t[2*an-1] == 0);
+         if (rn != mpz_size (ref) || mpn_cmp (t, ref->_mp_d, rn))
+           {
+             fprintf (stderr, "mpn (squaring) failed:\n");
+             dump ("a", a);
+             dump ("ref", ref);
+             abort ();
+           }
+       }
+    }
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (res);
+  mpz_clear (res_ui);
+  mpz_clear (ref);
+}
diff --git a/mini-gmp/tests/t-powm.c b/mini-gmp/tests/t-powm.c

new file mode 100644 (file)

index 0000000..9068d57
--- /dev/null
+++ b/mini-gmp/tests/t-powm.c
@@ -0,0 +1,69 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 1000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t b, e, m, res, ref;
+
+  mpz_init (b);
+  mpz_init (e);
+  mpz_init (m);
+  mpz_init (res);
+  mpz_init (ref);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_random_op4 (OP_POWM, MAXBITS, b, e, m, ref);
+      mpz_powm (res, b, e, m);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_powm failed:\n");
+         dump ("b", b);
+         dump ("e", e);
+         dump ("m", m);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+    }
+  mpz_clear (b);
+  mpz_clear (e);
+  mpz_clear (m);
+  mpz_clear (res);
+  mpz_clear (ref);
+}
diff --git a/mini-gmp/tests/t-reuse.c b/mini-gmp/tests/t-reuse.c

new file mode 100644 (file)

index 0000000..1bab241
--- /dev/null
+++ b/mini-gmp/tests/t-reuse.c
@@ -0,0 +1,663 @@
+/* Test that routines allow reusing a source variable as destination.
+
+Copyright 1996, 1999, 2000, 2001, 2002, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define COUNT 100
+
+void dump (const char *, mpz_t, mpz_t, mpz_t);
+void mpz_check_format (const mpz_t);
+
+typedef void (*dss_func) (mpz_t, const mpz_t, const mpz_t);
+typedef void (*dsi_func) (mpz_t, const mpz_t, unsigned long int);
+typedef unsigned long int (*dsi_div_func) (mpz_t, const mpz_t, unsigned long int);
+typedef unsigned long int (*ddsi_div_func) (mpz_t, mpz_t, const mpz_t, unsigned long int);
+typedef void (*ddss_div_func) (mpz_t, mpz_t, const mpz_t, const mpz_t);
+typedef void (*ds_func) (mpz_t, const mpz_t);
+
+
+void
+mpz_xinvert (mpz_t r, const mpz_t a, const mpz_t b)
+{
+  int res;
+  res = mpz_invert (r, a, b);
+  if (res == 0)
+    mpz_set_ui (r, 0);
+}
+
+dss_func dss_funcs[] =
+{
+  mpz_add, mpz_sub, mpz_mul,
+  mpz_cdiv_q, mpz_cdiv_r, mpz_fdiv_q, mpz_fdiv_r, mpz_tdiv_q, mpz_tdiv_r,
+  mpz_xinvert,
+  mpz_gcd, mpz_lcm, mpz_and, mpz_ior, mpz_xor
+};
+const char *dss_func_names[] =
+{
+  "mpz_add", "mpz_sub", "mpz_mul",
+  "mpz_cdiv_q", "mpz_cdiv_r", "mpz_fdiv_q", "mpz_fdiv_r", "mpz_tdiv_q", "mpz_tdiv_r",
+  "mpz_xinvert",
+  "mpz_gcd", "mpz_lcm", "mpz_and", "mpz_ior", "mpz_xor"
+};
+char dss_func_division[] = {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};
+
+dsi_func dsi_funcs[] =
+{
+  /* Don't change order here without changing the code in main(). */
+  mpz_add_ui, mpz_mul_ui, mpz_sub_ui,
+  mpz_fdiv_q_2exp, mpz_fdiv_r_2exp,
+  mpz_cdiv_q_2exp, mpz_cdiv_r_2exp,
+  mpz_tdiv_q_2exp, mpz_tdiv_r_2exp,
+  mpz_mul_2exp,
+  mpz_pow_ui
+};
+const char *dsi_func_names[] =
+{
+  "mpz_add_ui", "mpz_mul_ui", "mpz_sub_ui",
+  "mpz_fdiv_q_2exp", "mpz_fdiv_r_2exp",
+  "mpz_cdiv_q_2exp", "mpz_cdiv_r_2exp",
+  "mpz_tdiv_q_2exp", "mpz_tdiv_r_2exp",
+  "mpz_mul_2exp",
+  "mpz_pow_ui"
+};
+
+dsi_div_func dsi_div_funcs[] =
+{
+  mpz_cdiv_q_ui, mpz_cdiv_r_ui,
+  mpz_fdiv_q_ui, mpz_fdiv_r_ui,
+  mpz_tdiv_q_ui, mpz_tdiv_r_ui
+};
+const char *dsi_div_func_names[] =
+{
+  "mpz_cdiv_q_ui", "mpz_cdiv_r_ui",
+  "mpz_fdiv_q_ui", "mpz_fdiv_r_ui",
+  "mpz_tdiv_q_ui", "mpz_tdiv_r_ui"
+};
+
+ddsi_div_func ddsi_div_funcs[] =
+{
+  mpz_cdiv_qr_ui,
+  mpz_fdiv_qr_ui,
+  mpz_tdiv_qr_ui
+};
+const char *ddsi_div_func_names[] =
+{
+  "mpz_cdiv_qr_ui",
+  "mpz_fdiv_qr_ui",
+  "mpz_tdiv_qr_ui"
+};
+
+ddss_div_func ddss_div_funcs[] =
+{
+  mpz_cdiv_qr,
+  mpz_fdiv_qr,
+  mpz_tdiv_qr
+};
+const char *ddss_div_func_names[] =
+{
+  "mpz_cdiv_qr",
+  "mpz_fdiv_qr",
+  "mpz_tdiv_qr"
+};
+
+ds_func ds_funcs[] =
+{
+  mpz_abs, mpz_com, mpz_neg, mpz_sqrt
+};
+const char *ds_func_names[] =
+{
+  "mpz_abs", "mpz_com", "mpz_neg", "mpz_sqrt"
+};
+
+
+#define FAIL(class,indx,op1,op2,op3) \
+  do {                                                                 \
+  class##_funcs[indx] = 0;                                             \
+  dump (class##_func_names[indx], op1, op2, op3);                      \
+  failures++;                                                          \
+  } while (0)
+#define FAIL2(fname,op1,op2,op3) \
+  do {                                                                 \
+  dump (#fname, op1, op2, op3);                                                \
+  failures++;                                                          \
+  } while (0)
+
+void
+testmain (int argc, char **argv)
+{
+  int i;
+  int pass, reps = COUNT;
+  mpz_t in1, in2, in3;
+  unsigned long int in2i;
+  mp_size_t size;
+  mpz_t res1, res2, res3;
+  mpz_t ref1, ref2, ref3;
+  mpz_t t;
+  unsigned long int r1, r2;
+  long failures = 0;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+
+  mpz_init (bs);
+
+  mpz_init (in1);
+  mpz_init (in2);
+  mpz_init (in3);
+  mpz_init (ref1);
+  mpz_init (ref2);
+  mpz_init (ref3);
+  mpz_init (res1);
+  mpz_init (res2);
+  mpz_init (res3);
+  mpz_init (t);
+
+  for (pass = 1; pass <= reps; pass++)
+    {
+      mini_urandomb (bs, 32);
+      size_range = mpz_get_ui (bs) % 12 + 2;
+
+      mini_urandomb (bs, size_range);
+      size = mpz_get_ui (bs);
+      mini_rrandomb (in1, size);
+
+      mini_urandomb (bs, size_range);
+      size = mpz_get_ui (bs);
+      mini_rrandomb (in2, size);
+
+      mini_urandomb (bs, size_range);
+      size = mpz_get_ui (bs);
+      mini_rrandomb (in3, size);
+
+      mini_urandomb (bs, 3);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpz_neg (in1, in1);
+      if ((bsi & 2) != 0)
+       mpz_neg (in2, in2);
+      if ((bsi & 4) != 0)
+       mpz_neg (in3, in3);
+
+      for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+       {
+         if (dss_funcs[i] == 0)
+           continue;
+         if (dss_func_division[i] && mpz_sgn (in2) == 0)
+           continue;
+
+         (dss_funcs[i]) (ref1, in1, in2);
+         mpz_check_format (ref1);
+
+         mpz_set (res1, in1);
+         (dss_funcs[i]) (res1, res1, in2);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL (dss, i, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         (dss_funcs[i]) (res1, in1, res1);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL (dss, i, in1, in2, NULL);
+       }
+
+      for (i = 0; i < sizeof (ddss_div_funcs) / sizeof (ddss_div_func); i++)
+       {
+         if (ddss_div_funcs[i] == 0)
+           continue;
+         if (mpz_sgn (in2) == 0)
+           continue;
+
+         (ddss_div_funcs[i]) (ref1, ref2, in1, in2);
+         mpz_check_format (ref1);
+         mpz_check_format (ref2);
+
+         mpz_set (res1, in1);
+         (ddss_div_funcs[i]) (res1, res2, res1, in2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL (ddss_div, i, in1, in2, NULL);
+
+         mpz_set (res2, in1);
+         (ddss_div_funcs[i]) (res1, res2, res2, in2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL (ddss_div, i, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         (ddss_div_funcs[i]) (res1, res2, in1, res1);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL (ddss_div, i, in1, in2, NULL);
+
+         mpz_set (res2, in2);
+         (ddss_div_funcs[i]) (res1, res2, in1, res2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL (ddss_div, i, in1, in2, NULL);
+       }
+
+      for (i = 0; i < sizeof (ds_funcs) / sizeof (ds_func); i++)
+       {
+         if (ds_funcs[i] == 0)
+           continue;
+         if (strcmp (ds_func_names[i], "mpz_sqrt") == 0
+             && mpz_sgn (in1) < 0)
+           continue;
+
+         (ds_funcs[i]) (ref1, in1);
+         mpz_check_format (ref1);
+
+         mpz_set (res1, in1);
+         (ds_funcs[i]) (res1, res1);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL (ds, i, in1, in2, NULL);
+       }
+
+      in2i = mpz_get_ui (in2);
+
+      for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+       {
+         if (dsi_funcs[i] == 0)
+           continue;
+         if (strcmp (dsi_func_names[i], "mpz_fdiv_q_2exp") == 0)
+           /* Limit exponent to something reasonable for the division
+              functions.  Without this, we'd  normally shift things off
+              the end and just generate the trivial values 1, 0, -1.  */
+           in2i %= 0x1000;
+         if (strcmp (dsi_func_names[i], "mpz_mul_2exp") == 0)
+           /* Limit exponent more for mpz_mul_2exp to save time.  */
+           in2i %= 0x100;
+         if (strcmp (dsi_func_names[i], "mpz_pow_ui") == 0)
+           /* Limit exponent yet more for mpz_pow_ui to save time.  */
+           in2i %= 0x10;
+
+         (dsi_funcs[i]) (ref1, in1, in2i);
+         mpz_check_format (ref1);
+
+         mpz_set (res1, in1);
+         (dsi_funcs[i]) (res1, res1, in2i);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL (dsi, i, in1, in2, NULL);
+       }
+
+      if (in2i != 0)     /* Don't divide by 0.  */
+       {
+         for (i = 0; i < sizeof (dsi_div_funcs) / sizeof (dsi_div_funcs); i++)
+           {
+             r1 = (dsi_div_funcs[i]) (ref1, in1, in2i);
+             mpz_check_format (ref1);
+
+             mpz_set (res1, in1);
+             r2 = (dsi_div_funcs[i]) (res1, res1, in2i);
+             mpz_check_format (res1);
+             if (mpz_cmp (ref1, res1) != 0 || r1 != r2)
+               FAIL (dsi_div, i, in1, in2, NULL);
+           }
+
+         for (i = 0; i < sizeof (ddsi_div_funcs) / sizeof (ddsi_div_funcs); i++)
+           {
+             r1 = (ddsi_div_funcs[i]) (ref1, ref2, in1, in2i);
+             mpz_check_format (ref1);
+
+             mpz_set (res1, in1);
+             r2 = (ddsi_div_funcs[i]) (res1, res2, res1, in2i);
+             mpz_check_format (res1);
+             if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
+               FAIL (ddsi_div, i, in1, in2, NULL);
+
+             mpz_set (res2, in1);
+             (ddsi_div_funcs[i]) (res1, res2, res2, in2i);
+             mpz_check_format (res1);
+             if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
+               FAIL (ddsi_div, i, in1, in2, NULL);
+           }
+       }
+
+      if (mpz_sgn (in1) >= 0)
+       {
+         mpz_sqrtrem (ref1, ref2, in1);
+         mpz_check_format (ref1);
+         mpz_check_format (ref2);
+
+         mpz_set (res1, in1);
+         mpz_sqrtrem (res1, res2, res1);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+
+         mpz_set (res2, in1);
+         mpz_sqrtrem (res1, res2, res2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+       }
+
+      if (mpz_sgn (in1) >= 0)
+       {
+         mpz_root (ref1, in1, in2i % 0x1000 + 1);
+         mpz_check_format (ref1);
+
+         mpz_set (res1, in1);
+         mpz_root (res1, res1, in2i % 0x1000 + 1);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_root, in1, in2, NULL);
+       }
+
+      if (mpz_sgn (in1) >= 0)
+       {
+         mpz_rootrem (ref1, ref2, in1, in2i % 0x1000 + 1);
+         mpz_check_format (ref1);
+         mpz_check_format (ref2);
+
+         mpz_set (res1, in1);
+         mpz_rootrem (res1, res2, res1, in2i % 0x1000 + 1);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL2 (mpz_rootrem, in1, in2, NULL);
+
+         mpz_set (res2, in1);
+         mpz_rootrem (res1, res2, res2, in2i % 0x1000 + 1);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+           FAIL2 (mpz_rootrem, in1, in2, NULL);
+       }
+
+      if (pass < reps / 2)     /* run fewer tests since gcdext lots of time */
+       {
+         mpz_gcdext (ref1, ref2, ref3, in1, in2);
+         mpz_check_format (ref1);
+         mpz_check_format (ref2);
+         mpz_check_format (ref3);
+
+         mpz_set (res1, in1);
+         mpz_gcdext (res1, res2, res3, res1, in2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         mpz_check_format (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res2, in1);
+         mpz_gcdext (res1, res2, res3, res2, in2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         mpz_check_format (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res3, in1);
+         mpz_gcdext (res1, res2, res3, res3, in2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         mpz_check_format (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_gcdext (res1, res2, res3, in1, res1);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         mpz_check_format (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res2, in2);
+         mpz_gcdext (res1, res2, res3, in1, res2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         mpz_check_format (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res3, in2);
+         mpz_gcdext (res1, res2, res3, in1, res3);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         mpz_check_format (res3);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res1, in1);
+         mpz_gcdext (res1, res2, NULL, res1, in2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res2, in1);
+         mpz_gcdext (res1, res2, NULL, res2, in2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_gcdext (res1, res2, NULL, in1, res1);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+         mpz_set (res2, in2);
+         mpz_gcdext (res1, res2, NULL, in1, res2);
+         mpz_check_format (res1);
+         mpz_check_format (res2);
+         if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+             || mpz_cmp (ref3, res3) != 0)
+           FAIL2 (mpz_gcdext, in1, in2, NULL);
+       }
+
+      /* Don't run mpz_powm for huge exponents or when undefined.  */
+      if (mpz_sizeinbase (in2, 2) < 250 && mpz_sgn (in3) != 0
+         && (mpz_sgn (in2) >= 0 || mpz_invert (t, in1, in3)))
+       {
+         mpz_powm (ref1, in1, in2, in3);
+         mpz_check_format (ref1);
+
+         mpz_set (res1, in1);
+         mpz_powm (res1, res1, in2, in3);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm, in1, in2, in3);
+
+         mpz_set (res1, in2);
+         mpz_powm (res1, in1, res1, in3);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm, in1, in2, in3);
+
+         mpz_set (res1, in3);
+         mpz_powm (res1, in1, in2, res1);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm, in1, in2, in3);
+       }
+
+      /* Don't run mpz_powm_ui when undefined.  */
+      if (mpz_sgn (in3) != 0)
+       {
+         mpz_powm_ui (ref1, in1, in2i, in3);
+         mpz_check_format (ref1);
+
+         mpz_set (res1, in1);
+         mpz_powm_ui (res1, res1, in2i, in3);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm_ui, in1, in2, in3);
+
+         mpz_set (res1, in3);
+         mpz_powm_ui (res1, in1, in2i, res1);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_powm_ui, in1, in2, in3);
+       }
+
+      {
+       r1 = mpz_gcd_ui (ref1, in1, in2i);
+       mpz_check_format (ref1);
+
+       mpz_set (res1, in1);
+       r2 = mpz_gcd_ui (res1, res1, in2i);
+       mpz_check_format (res1);
+       if (mpz_cmp (ref1, res1) != 0)
+         FAIL2 (mpz_gcd_ui, in1, in2, NULL);
+      }
+#if 0
+      if (mpz_cmp_ui (in2, 1L) > 0 && mpz_sgn (in1) != 0)
+       {
+         /* Test mpz_remove */
+         mpz_remove (ref1, in1, in2);
+         mpz_check_format (ref1);
+
+         mpz_set (res1, in1);
+         mpz_remove (res1, res1, in2);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_remove, in1, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_remove (res1, in1, res1);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_remove, in1, in2, NULL);
+       }
+#endif
+      if (mpz_sgn (in2) != 0)
+       {
+         /* Test mpz_divexact */
+         mpz_mul (t, in1, in2);
+         mpz_divexact (ref1, t, in2);
+         mpz_check_format (ref1);
+
+         mpz_set (res1, t);
+         mpz_divexact (res1, res1, in2);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_divexact, t, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_divexact (res1, t, res1);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_divexact, t, in2, NULL);
+       }
+
+#if 0
+      if (mpz_sgn (in2) > 0)
+       {
+         /* Test mpz_divexact_gcd, same as mpz_divexact */
+         mpz_mul (t, in1, in2);
+         mpz_divexact_gcd (ref1, t, in2);
+         mpz_check_format (ref1);
+
+         mpz_set (res1, t);
+         mpz_divexact_gcd (res1, res1, in2);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_divexact_gcd, t, in2, NULL);
+
+         mpz_set (res1, in2);
+         mpz_divexact_gcd (res1, t, res1);
+         mpz_check_format (res1);
+         if (mpz_cmp (ref1, res1) != 0)
+           FAIL2 (mpz_divexact_gcd, t, in2, NULL);
+       }
+#endif
+    }
+
+  if (failures != 0)
+    {
+      fprintf (stderr, "mpz/reuse: %ld error%s\n", failures, "s" + (failures == 1));
+      exit (1);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (in1);
+  mpz_clear (in2);
+  mpz_clear (in3);
+  mpz_clear (ref1);
+  mpz_clear (ref2);
+  mpz_clear (ref3);
+  mpz_clear (res1);
+  mpz_clear (res2);
+  mpz_clear (res3);
+  mpz_clear (t);
+}
+
+void
+dump (const char *name, mpz_t in1, mpz_t in2, mpz_t in3)
+{
+  printf ("failure in %s (", name);
+  mpz_out_str (stdout, -16, in1);
+  if (in2 != NULL)
+    {
+      printf (" ");
+      mpz_out_str (stdout, -16, in2);
+    }
+  if (in3 != NULL)
+    {
+      printf (" ");
+      mpz_out_str (stdout, -16, in3);
+    }
+  printf (")\n");
+}
+
+void
+mpz_check_format (const mpz_t x)
+{
+  mp_size_t n = x ->_mp_size;
+  if (n < 0)
+    n = - n;
+
+  if (n > x->_mp_alloc)
+    {
+      fprintf (stderr, "mpz_t size exceeds allocation!\n");
+      abort ();
+    }
+
+  if (n > 0 && x->_mp_d[n-1] == 0)
+    {
+      fprintf (stderr, "Unnormalized mpz_t!\n");
+      abort ();
+    }
+}
diff --git a/mini-gmp/tests/t-root.c b/mini-gmp/tests/t-root.c

new file mode 100644 (file)

index 0000000..45305e0
--- /dev/null
+++ b/mini-gmp/tests/t-root.c
@@ -0,0 +1,103 @@
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+/* Called when s is supposed to be floor(root(u,z)), and r = u - s^z */
+static int
+rootrem_valid_p (const mpz_t u, const mpz_t s, const mpz_t r, unsigned long z)
+{
+  mpz_t t;
+
+  mpz_init (t);
+  if (mpz_fits_ulong_p (s))
+    mpz_ui_pow_ui (t, mpz_get_ui (s), z);
+  else
+    mpz_pow_ui (t, s, z);
+  mpz_sub (t, u, t);
+  if (mpz_sgn (t) != mpz_sgn(u) || mpz_cmp (t, r) != 0)
+    {
+      mpz_clear (t);
+      return 0;
+    }
+  if (mpz_sgn (s) > 0)
+    mpz_add_ui (t, s, 1);
+  else
+    mpz_sub_ui (t, s, 1);
+  mpz_pow_ui (t, t, z);
+  if (mpz_cmpabs (t, u) <= 0)
+    {
+      mpz_clear (t);
+      return 0;
+    }
+
+  mpz_clear (t);
+  return 1;
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  unsigned long e;
+  mpz_t u, s, r, bs;
+
+  mpz_init (u);
+  mpz_init (s);
+  mpz_init (r);
+  mpz_init (bs);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_rrandomb (u, MAXBITS);
+      mini_rrandomb (bs, 12);
+      e = mpz_getlimbn (bs, 0) % mpz_sizeinbase (u, 2) + 2;
+      if ((e & 1) && (mpz_getlimbn (bs, 0) & (1L<<10)))
+       mpz_neg (u, u);
+      mpz_rootrem (s, r, u, e);
+
+      if (!rootrem_valid_p (u, s, r, e))
+       {
+         fprintf (stderr, "mpz_rootrem(%lu-th) failed:\n", e);
+         dump ("u", u);
+         dump ("root", s);
+         dump ("rem", r);
+         abort ();
+       }
+    }
+  mpz_clear (bs);
+  mpz_clear (u);
+  mpz_clear (s);
+  mpz_clear (r);
+}
diff --git a/mini-gmp/tests/t-scan.c b/mini-gmp/tests/t-scan.c

new file mode 100644 (file)

index 0000000..d7e27ec
--- /dev/null
+++ b/mini-gmp/tests/t-scan.c
@@ -0,0 +1,72 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t a;
+  mp_bitcnt_t b, res, ref;
+
+  mpz_init (a);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_random_scan_op (OP_SCAN0, MAXBITS, a, &b, &ref);
+      res = mpz_scan0 (a, b);
+      if (res != ref)
+       {
+         fprintf (stderr, "mpz_scan0 failed:\n");
+         dump ("a", a);
+         fprintf (stderr, "b: %lu\n", b);
+         fprintf (stderr, "r: %lu\n", res);
+         fprintf (stderr, "ref: %lu\n", ref);
+         abort ();
+       }
+      mini_random_scan_op (OP_SCAN1, MAXBITS, a, &b, &ref);
+      res = mpz_scan1 (a, b);
+      if (res != ref)
+       {
+         fprintf (stderr, "mpz_scan1 failed:\n");
+         dump ("a", a);
+         fprintf (stderr, "b: %lu\n", b);
+         fprintf (stderr, "r: %lu\n", res);
+         fprintf (stderr, "ref: %lu\n", ref);
+         abort ();
+       }
+    }
+  mpz_clear (a);
+}
diff --git a/mini-gmp/tests/t-signed.c b/mini-gmp/tests/t-signed.c

new file mode 100644 (file)

index 0000000..3fe54a2
--- /dev/null
+++ b/mini-gmp/tests/t-signed.c
@@ -0,0 +1,142 @@
+/* Exercise some mpz_..._si functions.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "testutils.h"
+
+int
+check_si (mpz_t sz, mpz_t oz, long si, long oi, int c)
+{
+  mpz_t t;
+  int fail;
+
+  if (mpz_cmp_si (sz, oi) != c)
+    {
+      printf ("mpz_cmp_si (sz, %ld) != %i.\n", oi, c);
+      printf (" sz="); mpz_out_str (stdout, 10, sz); printf ("\n");
+      abort ();
+    }
+
+  if ((si < oi ? -1 : si > oi) != c)
+    return 1;
+
+  mpz_init_set_si (t, si);
+
+  if ((fail = mpz_cmp_si (sz, si)) != 0)
+    printf ("mpz_cmp_si (sz, %ld) != 0.\n", si);
+  if (mpz_cmp_si (oz, si) != -c)
+    printf ("mpz_cmp_si (oz, %ld) != %i.\n", si, -c), fail = 1;
+  if (! mpz_fits_slong_p (sz))
+    printf ("mpz_fits_slong_p (sz) != 1.\n"), fail = 1;
+  if (mpz_get_si (sz) != si)
+    printf ("mpz_get_si (sz) != %ld.\n", si), fail = 1;
+  if (mpz_cmp (t, sz) != 0)
+    {
+      printf ("mpz_init_set_si (%ld) failed.\n", si);
+      printf (" got="); mpz_out_str (stdout, 10, t); printf ("\n");
+      fail = 1;
+    }
+
+  mpz_clear (t);
+
+  if (fail)
+    {
+      printf (" sz="); mpz_out_str (stdout, 10, sz); printf ("\n");
+      printf (" oz="); mpz_out_str (stdout, 10, oz); printf ("\n");
+      printf (" si=%ld\n", si);
+      abort ();
+    }
+
+  return 0;
+}
+
+void
+try_op_si (int c)
+{
+  long  si, oi;
+  mpz_t sz, oz;
+
+  si = c;
+  mpz_init_set_si (sz, si);
+
+  oi = si;
+  mpz_init_set (oz, sz);
+
+  do {
+    si *= 2; /* c * 2^k */
+    mpz_mul_2exp (sz, sz, 1);
+
+    if (check_si (sz, oz, si, oi, c))
+      {
+       mpz_set (oz, sz);
+       break;
+      }
+
+    oi = si + c; /* c * (2^k + 1) */
+    if (c == -1)
+      mpz_sub_ui (oz, sz, 1);
+    else
+      mpz_add_ui (oz, sz, 1);
+
+    if (check_si (oz, sz, oi, si, c))
+      break;
+
+    oi = (si - c) * 2 + c; /* c * (2^K - 1) */
+    mpz_mul_si (oz, sz, 2*c);
+    if (c == -1)
+      mpz_ui_sub (oz, 1, oz); /* oz = sz * 2 + 1 */
+    else
+      mpz_sub_ui (oz, oz, 1); /* oz = sz * 2 - 1 */
+  } while (check_si (oz, sz, oi, si, c) == 0);
+
+  mpz_clear (sz);
+
+  if (mpz_fits_slong_p (oz))
+    {
+      printf ("Should not fit a signed long any more.\n");
+      printf (" oz="); mpz_out_str (stdout, 10, oz); printf ("\n");
+      abort ();
+    }
+
+  if (mpz_cmp_si (oz, -c) != c)
+      {
+       printf ("mpz_cmp_si (oz, %i) != %i.\n", c, c);
+       printf (" oz="); mpz_out_str (stdout, 10, oz); printf ("\n");
+       abort ();
+      }
+
+  mpz_mul_2exp (oz, oz, 1);
+  if (mpz_cmp_si (oz, -c) != c)
+      {
+       printf ("mpz_cmp_si (oz, %i) != %i.\n", c, c);
+       printf (" oz="); mpz_out_str (stdout, 10, oz); printf ("\n");
+       abort ();
+      }
+
+  mpz_clear (oz);
+}
+
+void
+testmain (int argc, char *argv[])
+{
+  try_op_si (-1);
+  try_op_si (1);
+}
diff --git a/mini-gmp/tests/t-sqrt.c b/mini-gmp/tests/t-sqrt.c

new file mode 100644 (file)

index 0000000..980ae35
--- /dev/null
+++ b/mini-gmp/tests/t-sqrt.c
@@ -0,0 +1,90 @@
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+/* Called when s is supposed to be floor(sqrt(u)), and r = u - s^2 */
+static int
+sqrtrem_valid_p (const mpz_t u, const mpz_t s, const mpz_t r)
+{
+  mpz_t t;
+
+  mpz_init (t);
+  mpz_mul (t, s, s);
+  mpz_sub (t, u, t);
+  if (mpz_sgn (t) < 0 || mpz_cmp (t, r) != 0)
+    {
+      mpz_clear (t);
+      return 0;
+    }
+  mpz_add_ui (t, s, 1);
+  mpz_mul (t, t, t);
+  if (mpz_cmp (t, u) <= 0)
+    {
+      mpz_clear (t);
+      return 0;
+    }
+
+  mpz_clear (t);
+  return 1;
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t u, s, r;
+
+  mpz_init (u);
+  mpz_init (s);
+  mpz_init (r);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_rrandomb (u, MAXBITS);
+      mpz_sqrtrem (s, r, u);
+
+      if (!sqrtrem_valid_p (u, s, r))
+       {
+         fprintf (stderr, "mpz_sqrtrem failed:\n");
+         dump ("u", u);
+         dump ("sqrt", s);
+         dump ("rem", r);
+         abort ();
+       }
+    }
+  mpz_clear (u);
+  mpz_clear (s);
+  mpz_clear (r);
+}
diff --git a/mini-gmp/tests/t-str.c b/mini-gmp/tests/t-str.c

new file mode 100644 (file)

index 0000000..00f82ab
--- /dev/null
+++ b/mini-gmp/tests/t-str.c
@@ -0,0 +1,315 @@
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 2000
+
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+#define MAXLIMBS ((MAXBITS + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  testfree (buf);
+}
+
+static void
+test_small (void)
+{
+  struct {
+    const char *input;
+    const char *decimal;
+  } data[] = {
+    { "183407", "183407" },
+    { " 763959", "763959" },
+    { "9 81999", "981999" },
+    { "10\t7398", "107398" },
+    { "-9585 44", "-00958544" },
+    { "-0", "0000" },
+    { " -000  ", "0" },
+    { "0704436", "231710" },
+    { " 02503517", "689999" },
+    { "0 1312143", "365667" },
+    { "-03 274062", "-882738" },
+    { "012\t242", "005282" },
+    { "0b11010111110010001111", "883855" },
+    { " 0b11001010010100001", "103585" },
+    { "-0b101010110011101111", "-175343" },
+    { "0b 1111111011011100110", "521958" },
+    { "0b1 1111110111001000011", "1044035" },
+    { " 0x53dfc", "343548" },
+    { "0xfA019", "1024025" },
+    { "0x 642d1", "410321" },
+    { "0x5 8067", "360551" },
+    { "-0xd6Be6", "-879590" },
+    { "\t0B1110000100000000011", "460803" },
+    { "0B\t1111110010010100101", "517285" },
+    { "0B1\t010111101101110100", "359284" },
+    { "-0B101\t1001101111111001", "-367609" },
+    { "0B10001001010111110000", "562672" },
+    { "0Xe4B7e", "936830" },
+    { "0X1E4bf", "124095" },
+    { "-0Xfdb90", "-1039248" },
+    { "0X7fc47", "523335" },
+    { "0X8167c", "530044" },
+    /* Some invalid inputs */
+    { "0ab", NULL },
+    { "10x0", NULL },
+    { "0xxab", NULL },
+    { "ab", NULL },
+    { "0%#", NULL },
+    { "$foo", NULL },
+    { NULL, NULL }
+  };
+  unsigned i;
+  mpz_t a, b;
+  mpz_init (b);
+
+  for (i = 0; data[i].input; i++)
+    {
+      int res = mpz_init_set_str (a, data[i].input, 0);
+      if (data[i].decimal)
+       {
+         if (res != 0)
+           {
+             fprintf (stderr, "mpz_set_str returned -1, input: %s\n",
+                      data[i].input);
+             abort ();
+           }
+         if (mpz_set_str (b, data[i].decimal, 10) != 0)
+           {
+             fprintf (stderr, "mpz_set_str returned -1, decimal input: %s\n",
+                      data[i].input);
+             abort ();
+           }
+         if (mpz_cmp (a, b) != 0)
+           {
+             fprintf (stderr, "mpz_set_str failed for input: %s\n",
+                      data[i].input);
+
+             dump ("got", a);
+             dump ("ref", b);
+             abort ();
+           }
+       }
+      else if (res != -1)
+       {
+         fprintf (stderr, "mpz_set_str returned %d, invalid input: %s\n",
+                  res, data[i].input);
+         abort ();
+       }
+      mpz_clear (a);
+    }
+
+  mpz_clear (b);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  char *ap;
+  char *bp;
+  char *rp;
+  size_t bn, rn, arn;
+
+  mpz_t a, b;
+
+  FILE *tmp;
+
+  test_small ();
+
+  mpz_init (a);
+  mpz_init (b);
+
+  tmp = tmpfile ();
+  if (!tmp)
+    fprintf (stderr,
+            "Failed to create temporary file. Skipping mpz_out_str tests.\n");
+
+  for (i = 0; i < COUNT; i++)
+    {
+      int base;
+      for (base = 0; base <= 36; base += 1 + (base == 0))
+       {
+         hex_random_str_op (MAXBITS, i&1 ? base: -base, &ap, &rp);
+         if (mpz_set_str (a, ap, 16) != 0)
+           {
+             fprintf (stderr, "mpz_set_str failed on input %s\n", ap);
+             abort ();
+           }
+
+         rn = strlen (rp);
+         arn = rn - (rp[0] == '-');
+
+         bn = mpz_sizeinbase (a, base ? base : 10);
+         if (bn < arn || bn > (arn + 1))
+           {
+             fprintf (stderr, "mpz_sizeinbase failed:\n");
+             dump ("a", a);
+             fprintf (stderr, "r = %s\n", rp);
+             fprintf (stderr, "  base %d, correct size %u, got %u\n",
+                      base, (unsigned) arn, (unsigned)bn);
+             abort ();
+           }
+         bp = mpz_get_str (NULL, i&1 ? base: -base, a);
+         if (strcmp (bp, rp))
+           {
+             fprintf (stderr, "mpz_get_str failed:\n");
+             dump ("a", a);
+             fprintf (stderr, "b = %s\n", bp);
+             fprintf (stderr, "  base = %d\n", base);
+             fprintf (stderr, "r = %s\n", rp);
+             abort ();
+           }
+
+         /* Just a few tests with file i/o. */
+         if (tmp && i < 20)
+           {
+             size_t tn;
+             rewind (tmp);
+             tn = mpz_out_str (tmp, i&1 ? base: -base, a);
+             if (tn != rn)
+               {
+                 fprintf (stderr, "mpz_out_str, bad return value:\n");
+                 dump ("a", a);
+                 fprintf (stderr, "r = %s\n", rp);
+                 fprintf (stderr, "  base %d, correct size %u, got %u\n",
+                          base, (unsigned) rn, (unsigned)tn);
+                 abort ();
+               }
+             rewind (tmp);
+             memset (bp, 0, rn);
+             tn = fread (bp, 1, rn, tmp);
+             if (tn != rn)
+               {
+                 fprintf (stderr,
+                          "fread failed, expected %lu bytes, got only %lu.\n",
+                          (unsigned long) rn, (unsigned long) tn);
+                 abort ();
+               }
+
+             if (memcmp (bp, rp, rn) != 0)
+               {
+                 fprintf (stderr, "mpz_out_str failed:\n");
+                 dump ("a", a);
+                 fprintf (stderr, "b = %s\n", bp);
+                 fprintf (stderr, "  base = %d\n", base);
+                 fprintf (stderr, "r = %s\n", rp);
+                 abort ();
+               }
+           }
+
+         mpz_set_str (b, rp, base);
+
+         if (mpz_cmp (a, b))
+           {
+             fprintf (stderr, "mpz_set_str failed:\n");
+             fprintf (stderr, "r = %s\n", rp);
+             fprintf (stderr, "  base = %d\n", base);
+             fprintf (stderr, "r = %s\n", ap);
+             fprintf (stderr, "  base = 16\n");
+             dump ("b", b);
+             dump ("r", a);
+             abort ();
+           }
+
+         /* Test mpn interface */
+         if (base && mpz_sgn (a))
+           {
+             size_t i;
+             const char *absr;
+             mp_limb_t t[MAXLIMBS];
+             mp_size_t tn = mpz_size (a);
+
+             assert (tn <= MAXLIMBS);
+             mpn_copyi (t, a->_mp_d, tn);
+
+             bn = mpn_get_str (bp, base, t, tn);
+             if (bn != arn)
+               {
+                 fprintf (stderr, "mpn_get_str failed:\n");
+                 fprintf (stderr, "returned length: %lu (bad)\n", (unsigned long) bn);
+                 fprintf (stderr, "expected: %lu\n", (unsigned long) arn);
+                 fprintf (stderr, "  base = %d\n", base);
+                 fprintf (stderr, "r = %s\n", ap);
+                 fprintf (stderr, "  base = 16\n");
+                 dump ("b", b);
+                 dump ("r", a);
+                 abort ();
+               }
+             absr = rp + (rp[0] == '-');
+
+             for (i = 0; i < bn; i++)
+               {
+                 unsigned char digit = absr[i];
+                 unsigned value;
+                 if (digit >= '0' && digit <= '9')
+                   value = digit - '0';
+                 else if (digit >= 'a' && digit <= 'z')
+                   value = digit - 'a' + 10;
+                 else if (digit >= 'A' && digit <= 'Z')
+                   value = digit - 'A' + 10;
+                 else
+                   {
+                     fprintf (stderr, "Internal error in test.\n");
+                     abort();
+                   }
+                 if (bp[i] != value)
+                   {
+                     fprintf (stderr, "mpn_get_str failed:\n");
+                     fprintf (stderr, "digit %lu: %d (bad)\n", (unsigned long) i, bp[i]);
+                     fprintf (stderr, "expected: %d\n", value);
+                     fprintf (stderr, "  base = %d\n", base);
+                     fprintf (stderr, "r = %s\n", ap);
+                     fprintf (stderr, "  base = 16\n");
+                     dump ("b", b);
+                     dump ("r", a);
+                     abort ();
+                   }
+               }
+             tn = mpn_set_str (t, bp, bn, base);
+             if (tn != mpz_size (a) || mpn_cmp (t, a->_mp_d, tn))
+               {
+                 fprintf (stderr, "mpn_set_str failed:\n");
+                 fprintf (stderr, "r = %s\n", rp);
+                 fprintf (stderr, "  base = %d\n", base);
+                 fprintf (stderr, "r = %s\n", ap);
+                 fprintf (stderr, "  base = 16\n");
+                 dump ("r", a);
+                 abort ();
+               }
+           }
+         free (ap);
+         testfree (bp);
+       }
+    }
+  mpz_clear (a);
+  mpz_clear (b);
+}
diff --git a/mini-gmp/tests/t-sub.c b/mini-gmp/tests/t-sub.c

new file mode 100644 (file)

index 0000000..2a1e113
--- /dev/null
+++ b/mini-gmp/tests/t-sub.c
@@ -0,0 +1,79 @@
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+  char *buf = mpz_get_str (NULL, 16, x);
+  fprintf (stderr, "%s: %s\n", label, buf);
+  free (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+  unsigned i;
+  mpz_t a, b, res, res_ui, ref;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (res);
+  mpz_init (res_ui);
+  mpz_init (ref);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      mini_random_op3 (OP_SUB, MAXBITS, a, b, ref);
+      mpz_sub (res, a, b);
+      if (mpz_cmp (res, ref))
+       {
+         fprintf (stderr, "mpz_sub failed:\n");
+         dump ("a", a);
+         dump ("b", b);
+         dump ("r", res);
+         dump ("ref", ref);
+         abort ();
+       }
+      if (mpz_fits_ulong_p (a)) {
+       mpz_ui_sub (res_ui, mpz_get_ui (a), b);
+       if (mpz_cmp (res_ui, ref))
+         {
+           fprintf (stderr, "mpz_ui_sub failed:\n");
+           dump ("a", a);
+           dump ("b", b);
+           dump ("r", res_ui);
+           dump ("ref", ref);
+           abort ();
+         }
+      }
+    }
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (res);
+  mpz_clear (res_ui);
+  mpz_clear (ref);
+}
diff --git a/mini-gmp/tests/testutils.c b/mini-gmp/tests/testutils.c

new file mode 100644 (file)

index 0000000..7e0f09d
--- /dev/null
+++ b/mini-gmp/tests/testutils.c
@@ -0,0 +1,153 @@
+/*
+
+Copyright 2013, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "testutils.h"
+
+/* Include it here, so we we could tweak, e.g., how MPZ_REALLOC
+   works. */
+#include "../mini-gmp.c"
+
+static size_t total_alloc = 0;
+
+/* Custom memory allocation to track memory usage, and add a small red
+   zone.
+
+   About alignment: In general, getting a block from malloc, and
+   incrementing it by sizeof(size_t), like we do here, might give a
+   pointer which is not properly aligned for all types. But the
+   largest type we allocate space for is unsigned long (mp_limb_t),
+   which shouldn't have stricter alignment requirements than
+   size_t. */
+
+static char block_end[8] =
+  { 0x7c, 0x37, 0xd6, 0x12, 0xa8, 0x6c, 0x01, 0xd1 };
+
+static void *
+block_init (size_t *block, size_t size)
+{
+  char *p;
+  *block++ = size;
+
+  p = (char *) block;
+  memcpy (p + size, block_end, sizeof(block_end));
+
+  total_alloc += size;
+  return p;
+}
+
+/* Check small redzone, return pointer to malloced block. */
+static size_t *
+block_check  (char *p)
+{
+  size_t *block = (size_t *) p - 1;
+  size_t size = block[0];
+
+  if (memcmp (p + size, block_end, sizeof(block_end)) != 0)
+    {
+      fprintf (stderr, "red zone overwritten.\n");
+      abort ();
+    }
+  total_alloc -= size;
+  return block;
+}
+
+static void *
+tu_alloc (size_t size)
+{
+  size_t *block = malloc (sizeof(size_t) + size + sizeof(block_end));
+  if (!block)
+    {
+      fprintf (stderr, "Virtual memory exhausted.\n");
+      abort ();
+    }
+
+  return block_init (block, size);
+}
+
+static void *
+tu_realloc (void *p, size_t old_size, size_t new_size)
+{
+  size_t *block = block_check (p);
+  block = realloc (block, sizeof(size_t) + new_size + sizeof(block_end));
+  if (!block)
+    {
+      fprintf (stderr, "Virtual memory exhausted.\n");
+      abort ();
+    }
+
+  return block_init (block, new_size);
+}
+
+static void
+tu_free (void *p, size_t old_size)
+{
+  free (block_check (p));
+}
+
+/* Free memory allocated via mini-gmp allocation function. */
+void
+testfree (void *p)
+{
+  void (*freefunc) (void *, size_t);
+  mp_get_memory_functions (NULL, NULL, &freefunc);
+
+  freefunc (p, 0);
+}
+
+int
+main (int argc, char **argv)
+{
+  hex_random_init ();
+
+  mp_set_memory_functions (tu_alloc, tu_realloc, tu_free);
+
+  /* Currently, t-comb seems to be the only program accepting any
+     arguments. It might make sense to parse common arguments here. */
+  testmain (argc, argv);
+
+  if (total_alloc != 0)
+    {
+      fprintf (stderr, "Memory leaked: %lu bytes.\n",
+              (unsigned long) total_alloc);
+      abort ();
+    }
+  return 0;
+}
+
+void
+testhalves (int count, void (*tested_fun) (int))
+{
+  void (*freefunc) (void *, size_t);
+  void *(*reallocfunc) (void *, size_t, size_t);
+  void *(*allocfunc) (size_t);
+  size_t initial_alloc;
+
+  mp_get_memory_functions (&allocfunc, &reallocfunc, &freefunc);
+  initial_alloc = total_alloc;
+  (*tested_fun) (count / 2);
+  if (initial_alloc != total_alloc)
+    {
+      fprintf (stderr, "First half, memory leaked: %lu bytes.\n",
+              (unsigned long) total_alloc - initial_alloc);
+      abort ();
+    }
+  mp_set_memory_functions (NULL, NULL, NULL);
+  (*tested_fun) (count / 2);
+  mp_set_memory_functions (allocfunc, reallocfunc, freefunc);
+}
diff --git a/mini-gmp/tests/testutils.h b/mini-gmp/tests/testutils.h

new file mode 100644 (file)

index 0000000..4b22b94
--- /dev/null
+++ b/mini-gmp/tests/testutils.h
@@ -0,0 +1,31 @@
+/*
+
+Copyright 2013, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mini-random.h"
+
+#define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
+
+void testmain (int argc, char **argv);
+
+void testhalves (int count, void (*tested_fun) (int));
+
+void testfree (void *p);
diff --git a/mp-h.in b/mp-h.in

deleted file mode 100644 (file)

index 35bc34b..0000000
--- a/mp-h.in
+++ /dev/null
@@ -1,164 +0,0 @@
-/* mp-h.in -- Definitions for the GNU multiple precision library  -*-mode:c-*-
-   BSD mp compatible functions.
-
-Copyright 1991, 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2004 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#ifndef __MP_H__
-
-
-/* The following (everything under ifndef __GNU_MP__) must be identical in
-   gmp.h and mp.h to allow both to be included in an application or during
-   the library build.  Use the t-gmp-mp-h.pl script to check.  */
-#ifndef __GNU_MP__
-#define __GNU_MP__ 5
-
-#define __need_size_t  /* tell gcc stddef.h we only want size_t */
-#if defined (__cplusplus)
-#include <cstddef>     /* for size_t */
-#else
-#include <stddef.h>    /* for size_t */
-#endif
-#undef __need_size_t
-
-/* The following instantiated by configure, for internal use only */
-#if ! defined (__GMP_WITHIN_CONFIGURE)
-@DEFN_LONG_LONG_LIMB@
-#define __GMP_LIBGMP_DLL  @LIBGMP_DLL@
-#endif
-
-#if  defined (__STDC__)                                 \
-  || defined (__cplusplus)                              \
-  || defined (_AIX)                                     \
-  || defined (__DECC)                                   \
-  || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
-  || defined (_MSC_VER)                                 \
-  || defined (_WIN32)
-#define __GMP_HAVE_CONST        1
-#define __GMP_HAVE_PROTOTYPES   1
-#define __GMP_HAVE_TOKEN_PASTE  1
-#else
-#define __GMP_HAVE_CONST        0
-#define __GMP_HAVE_PROTOTYPES   0
-#define __GMP_HAVE_TOKEN_PASTE  0
-#endif
-
-
-#if __GMP_HAVE_CONST
-#define __gmp_const   const
-#define __gmp_signed  signed
-#else
-#define __gmp_const
-#define __gmp_signed
-#endif
-
-#if defined (__GNUC__)
-#define __GMP_DECLSPEC_EXPORT  __declspec(__dllexport__)
-#define __GMP_DECLSPEC_IMPORT  __declspec(__dllimport__)
-#endif
-#if defined (_MSC_VER) || defined (__BORLANDC__)
-#define __GMP_DECLSPEC_EXPORT  __declspec(dllexport)
-#define __GMP_DECLSPEC_IMPORT  __declspec(dllimport)
-#endif
-#ifdef __WATCOMC__
-#define __GMP_DECLSPEC_EXPORT  __export
-#define __GMP_DECLSPEC_IMPORT  __import
-#endif
-#ifdef __IBMC__
-#define __GMP_DECLSPEC_EXPORT  _Export
-#define __GMP_DECLSPEC_IMPORT  _Import
-#endif
-
-#if __GMP_LIBGMP_DLL
-#if __GMP_WITHIN_GMP
-#define __GMP_DECLSPEC  __GMP_DECLSPEC_EXPORT
-#else
-#define __GMP_DECLSPEC  __GMP_DECLSPEC_IMPORT
-#endif
-#else
-#define __GMP_DECLSPEC
-#endif
-
-#ifdef __GMP_SHORT_LIMB
-typedef unsigned int           mp_limb_t;
-typedef int                    mp_limb_signed_t;
-#else
-#ifdef _LONG_LONG_LIMB
-typedef unsigned long long int mp_limb_t;
-typedef long long int          mp_limb_signed_t;
-#else
-typedef unsigned long int      mp_limb_t;
-typedef long int               mp_limb_signed_t;
-#endif
-#endif
-typedef unsigned long int      mp_bitcnt_t;
-
-typedef struct
-{
-  int _mp_alloc;               /* Number of *limbs* allocated and pointed
-                                  to by the _mp_d field.  */
-  int _mp_size;                        /* abs(_mp_size) is the number of limbs the
-                                  last field points to.  If _mp_size is
-                                  negative this is a negative number.  */
-  mp_limb_t *_mp_d;            /* Pointer to the limbs.  */
-} __mpz_struct;
-
-#endif /* __GNU_MP__ */
-
-/* User-visible types.  */
-typedef __mpz_struct MINT;
-
-
-#if __GMP_HAVE_PROTOTYPES
-#define __GMP_PROTO(x) x
-#else
-#define __GMP_PROTO(x) ()
-#endif
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#define mp_set_memory_functions __gmp_set_memory_functions
-__GMP_DECLSPEC void mp_set_memory_functions __GMP_PROTO ((void *(*) (size_t),
-                                      void *(*) (void *, size_t, size_t),
-                                      void (*) (void *, size_t)));
-__GMP_DECLSPEC MINT *itom __GMP_PROTO ((signed short int));
-__GMP_DECLSPEC MINT *xtom __GMP_PROTO ((const char *));
-__GMP_DECLSPEC void move __GMP_PROTO ((const MINT *, MINT *));
-__GMP_DECLSPEC void madd __GMP_PROTO ((const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC void msub __GMP_PROTO ((const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC void mult __GMP_PROTO ((const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC void mdiv __GMP_PROTO ((const MINT *, const MINT *, MINT *, MINT *));
-__GMP_DECLSPEC void sdiv __GMP_PROTO ((const MINT *, signed short int, MINT *, signed short int *));
-__GMP_DECLSPEC void msqrt __GMP_PROTO ((const MINT *, MINT *, MINT *));
-__GMP_DECLSPEC void pow __GMP_PROTO ((const MINT *, const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC void rpow __GMP_PROTO ((const MINT *, signed short int, MINT *));
-__GMP_DECLSPEC void gcd __GMP_PROTO ((const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC int  mcmp __GMP_PROTO ((const MINT *, const MINT *));
-__GMP_DECLSPEC void min __GMP_PROTO ((MINT *));
-__GMP_DECLSPEC void mout __GMP_PROTO ((const MINT *));
-__GMP_DECLSPEC char *mtox __GMP_PROTO ((const MINT *));
-__GMP_DECLSPEC void mfree __GMP_PROTO ((MINT *));
-
-#if defined (__cplusplus)
-}
-#endif
-
-#define __MP_H__
-#endif /* __MP_H__ */
diff --git a/mp_clz_tab.c b/mp_clz_tab.c

index 1e0cee46bfd074be80aca32c2b804b189a53d5a6..ec56b4b26631b5b6311c78e31350c4b99984320e 100644 (file)
--- a/mp_clz_tab.c
+++ b/mp_clz_tab.c
@@ -27,11 +27,12 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
  const
-unsigned char __clz_tab[128] =
+unsigned char __clz_tab[129] =
  {
    1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  9
  };
  #endif
diff --git a/mpbsd/Makefile.am b/mpbsd/Makefile.am

deleted file mode 100644 (file)

index 4272bbf..0000000
--- a/mpbsd/Makefile.am
+++ /dev/null
@@ -1,37 +0,0 @@
-## Process this file with automake to generate Makefile.in
-
-# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-#
-# This file is part of the GNU MP Library.
-#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-# -I$(top_srcdir)/mpz is for #includes done by mpz .c files.  Perhaps most
-# compilers are smart enough to look in the same directory as the .c file
-# already, but lets make absolutely sure.
-#
-INCLUDES = -DBERKELEY_MP -D__GMP_WITHIN_GMP -D__gmpz_realloc=_mp_realloc \
-       -I$(top_srcdir) -I$(top_srcdir)/mpz
-
-# The mpz sources here all know to look for -DBERKELEY_MP to compile to in
-# mpbsd form.
-#
-libmpbsd_la_SOURCES = itom.c mfree.c min.c mout.c mtox.c rpow.c sdiv.c xtom.c \
-  ../mpz/add.c ../mpz/cmp.c ../mpz/gcd.c ../mpz/mul.c ../mpz/powm.c \
-  ../mpz/realloc.c ../mpz/set.c ../mpz/sqrtrem.c ../mpz/sub.c ../mpz/tdiv_qr.c
-
-if WANT_MPBSD
-noinst_LTLIBRARIES = libmpbsd.la
-endif
diff --git a/mpbsd/Makefile.in b/mpbsd/Makefile.in

deleted file mode 100644 (file)

index b2f7e5c..0000000
--- a/mpbsd/Makefile.in
+++ /dev/null
@@ -1,643 +0,0 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-#
-# This file is part of the GNU MP Library.
-#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
-subdir = mpbsd
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
-       $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-LTLIBRARIES = $(noinst_LTLIBRARIES)
-libmpbsd_la_LIBADD =
-am_libmpbsd_la_OBJECTS = itom$U.lo mfree$U.lo min$U.lo mout$U.lo \
-       mtox$U.lo rpow$U.lo sdiv$U.lo xtom$U.lo add$U.lo cmp$U.lo \
-       gcd$U.lo mul$U.lo powm$U.lo realloc$U.lo set$U.lo sqrtrem$U.lo \
-       sub$U.lo tdiv_qr$U.lo
-libmpbsd_la_OBJECTS = $(am_libmpbsd_la_OBJECTS)
-@WANT_MPBSD_TRUE@am_libmpbsd_la_rpath =
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
-depcomp =
-am__depfiles_maybe =
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
-       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
-       $(LDFLAGS) -o $@
-SOURCES = $(libmpbsd_la_SOURCES)
-DIST_SOURCES = $(libmpbsd_la_SOURCES)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ABI = @ABI@
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AS = @AS@
-ASMFLAGS = @ASMFLAGS@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
-CC = @CC@
-CCAS = @CCAS@
-CC_FOR_BUILD = @CC_FOR_BUILD@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CPP_FOR_BUILD = @CPP_FOR_BUILD@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
-FGREP = @FGREP@
-GMP_LDFLAGS = @GMP_LDFLAGS@
-GMP_LIMB_BITS = @GMP_LIMB_BITS@
-GMP_NAIL_BITS = @GMP_NAIL_BITS@
-GREP = @GREP@
-HAVE_CLOCK_01 = @HAVE_CLOCK_01@
-HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
-HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
-HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
-HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
-HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
-HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
-HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
-HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
-HAVE_STACK_T_01 = @HAVE_STACK_T_01@
-HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LEX = @LEX@
-LEXLIB = @LEXLIB@
-LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
-LIBCURSES = @LIBCURSES@
-LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
-LIBGMP_DLL = @LIBGMP_DLL@
-LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
-LIBM = @LIBM@
-LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
-LIBOBJS = @LIBOBJS@
-LIBREADLINE = @LIBREADLINE@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-M4 = @M4@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-RANLIB = @RANLIB@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
-STRIP = @STRIP@
-TAL_OBJECT = @TAL_OBJECT@
-TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
-U_FOR_BUILD = @U_FOR_BUILD@
-VERSION = @VERSION@
-WITH_READLINE_01 = @WITH_READLINE_01@
-YACC = @YACC@
-YFLAGS = @YFLAGS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-gmp_srclinks = @gmp_srclinks@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-mpn_objects = @mpn_objects@
-mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-
-# -I$(top_srcdir)/mpz is for #includes done by mpz .c files.  Perhaps most
-# compilers are smart enough to look in the same directory as the .c file
-# already, but lets make absolutely sure.
-#
-INCLUDES = -DBERKELEY_MP -D__GMP_WITHIN_GMP -D__gmpz_realloc=_mp_realloc \
-       -I$(top_srcdir) -I$(top_srcdir)/mpz
-
-
-# The mpz sources here all know to look for -DBERKELEY_MP to compile to in
-# mpbsd form.
-#
-libmpbsd_la_SOURCES = itom.c mfree.c min.c mout.c mtox.c rpow.c sdiv.c xtom.c \
-  ../mpz/add.c ../mpz/cmp.c ../mpz/gcd.c ../mpz/mul.c ../mpz/powm.c \
-  ../mpz/realloc.c ../mpz/set.c ../mpz/sqrtrem.c ../mpz/sub.c ../mpz/tdiv_qr.c
-
-@WANT_MPBSD_TRUE@noinst_LTLIBRARIES = libmpbsd.la
-all: all-am
-
-.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
-       @for dep in $?; do \
-         case '$(am__configure_deps)' in \
-           *$$dep*) \
-             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
-               && { if test -f $@; then exit 0; else break; fi; }; \
-             exit 1;; \
-         esac; \
-       done; \
-       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps mpbsd/Makefile'; \
-       $(am__cd) $(top_srcdir) && \
-         $(AUTOMAKE) --gnu --ignore-deps mpbsd/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
-       @case '$?' in \
-         *config.status*) \
-           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
-         *) \
-           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
-           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
-       esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
-       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
-       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
-       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-clean-noinstLTLIBRARIES:
-       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
-       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
-         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
-         test "$$dir" != "$$p" || dir=.; \
-         echo "rm -f \"$${dir}/so_locations\""; \
-         rm -f "$${dir}/so_locations"; \
-       done
-libmpbsd.la: $(libmpbsd_la_OBJECTS) $(libmpbsd_la_DEPENDENCIES) 
-       $(LINK) $(am_libmpbsd_la_rpath) $(libmpbsd_la_OBJECTS) $(libmpbsd_la_LIBADD) $(LIBS)
-
-mostlyclean-compile:
-       -rm -f *.$(OBJEXT)
-
-distclean-compile:
-       -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
-
-.c.o:
-       $(COMPILE) -c $<
-
-.c.obj:
-       $(COMPILE) -c `$(CYGPATH_W) '$<'`
-
-.c.lo:
-       $(LTCOMPILE) -c -o $@ $<
-
-add_.lo: add_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o add_.lo `test -f 'add_.c' || echo '$(srcdir)/'`add_.c
-
-add.lo: ../mpz/add.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o add.lo `test -f '../mpz/add.c' || echo '$(srcdir)/'`../mpz/add.c
-
-cmp_.lo: cmp_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cmp_.lo `test -f 'cmp_.c' || echo '$(srcdir)/'`cmp_.c
-
-cmp.lo: ../mpz/cmp.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cmp.lo `test -f '../mpz/cmp.c' || echo '$(srcdir)/'`../mpz/cmp.c
-
-gcd_.lo: gcd_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o gcd_.lo `test -f 'gcd_.c' || echo '$(srcdir)/'`gcd_.c
-
-gcd.lo: ../mpz/gcd.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o gcd.lo `test -f '../mpz/gcd.c' || echo '$(srcdir)/'`../mpz/gcd.c
-
-mul_.lo: mul_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mul_.lo `test -f 'mul_.c' || echo '$(srcdir)/'`mul_.c
-
-mul.lo: ../mpz/mul.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mul.lo `test -f '../mpz/mul.c' || echo '$(srcdir)/'`../mpz/mul.c
-
-powm_.lo: powm_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o powm_.lo `test -f 'powm_.c' || echo '$(srcdir)/'`powm_.c
-
-powm.lo: ../mpz/powm.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o powm.lo `test -f '../mpz/powm.c' || echo '$(srcdir)/'`../mpz/powm.c
-
-realloc_.lo: realloc_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o realloc_.lo `test -f 'realloc_.c' || echo '$(srcdir)/'`realloc_.c
-
-realloc.lo: ../mpz/realloc.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o realloc.lo `test -f '../mpz/realloc.c' || echo '$(srcdir)/'`../mpz/realloc.c
-
-set_.lo: set_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o set_.lo `test -f 'set_.c' || echo '$(srcdir)/'`set_.c
-
-set.lo: ../mpz/set.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o set.lo `test -f '../mpz/set.c' || echo '$(srcdir)/'`../mpz/set.c
-
-sqrtrem_.lo: sqrtrem_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sqrtrem_.lo `test -f 'sqrtrem_.c' || echo '$(srcdir)/'`sqrtrem_.c
-
-sqrtrem.lo: ../mpz/sqrtrem.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sqrtrem.lo `test -f '../mpz/sqrtrem.c' || echo '$(srcdir)/'`../mpz/sqrtrem.c
-
-sub_.lo: sub_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sub_.lo `test -f 'sub_.c' || echo '$(srcdir)/'`sub_.c
-
-sub.lo: ../mpz/sub.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sub.lo `test -f '../mpz/sub.c' || echo '$(srcdir)/'`../mpz/sub.c
-
-tdiv_qr_.lo: tdiv_qr_.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tdiv_qr_.lo `test -f 'tdiv_qr_.c' || echo '$(srcdir)/'`tdiv_qr_.c
-
-tdiv_qr.lo: ../mpz/tdiv_qr.c
-       $(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tdiv_qr.lo `test -f '../mpz/tdiv_qr.c' || echo '$(srcdir)/'`../mpz/tdiv_qr.c
-add_.c: ../mpz/add.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/add.c; then echo $(srcdir)/../mpz/add.c; else echo ../mpz/add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: ../mpz/cmp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/cmp.c; then echo $(srcdir)/../mpz/cmp.c; else echo ../mpz/cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_.c: ../mpz/gcd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/gcd.c; then echo $(srcdir)/../mpz/gcd.c; else echo ../mpz/gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-itom_.c: itom.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/itom.c; then echo $(srcdir)/itom.c; else echo itom.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mfree_.c: mfree.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mfree.c; then echo $(srcdir)/mfree.c; else echo mfree.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-min_.c: min.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/min.c; then echo $(srcdir)/min.c; else echo min.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mout_.c: mout.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mout.c; then echo $(srcdir)/mout.c; else echo mout.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mtox_.c: mtox.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mtox.c; then echo $(srcdir)/mtox.c; else echo mtox.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: ../mpz/mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/mul.c; then echo $(srcdir)/../mpz/mul.c; else echo ../mpz/mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_.c: ../mpz/powm.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/powm.c; then echo $(srcdir)/../mpz/powm.c; else echo ../mpz/powm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-realloc_.c: ../mpz/realloc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/realloc.c; then echo $(srcdir)/../mpz/realloc.c; else echo ../mpz/realloc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rpow_.c: rpow.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rpow.c; then echo $(srcdir)/rpow.c; else echo rpow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sdiv_.c: sdiv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sdiv.c; then echo $(srcdir)/sdiv.c; else echo sdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_.c: ../mpz/set.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/set.c; then echo $(srcdir)/../mpz/set.c; else echo ../mpz/set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrtrem_.c: ../mpz/sqrtrem.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/sqrtrem.c; then echo $(srcdir)/../mpz/sqrtrem.c; else echo ../mpz/sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_.c: ../mpz/sub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/sub.c; then echo $(srcdir)/../mpz/sub.c; else echo ../mpz/sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_.c: ../mpz/tdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/tdiv_qr.c; then echo $(srcdir)/../mpz/tdiv_qr.c; else echo ../mpz/tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-xtom_.c: xtom.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xtom.c; then echo $(srcdir)/xtom.c; else echo xtom.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_.$(OBJEXT) add_.lo cmp_.$(OBJEXT) cmp_.lo gcd_.$(OBJEXT) gcd_.lo \
-itom_.$(OBJEXT) itom_.lo mfree_.$(OBJEXT) mfree_.lo min_.$(OBJEXT) \
-min_.lo mout_.$(OBJEXT) mout_.lo mtox_.$(OBJEXT) mtox_.lo \
-mul_.$(OBJEXT) mul_.lo powm_.$(OBJEXT) powm_.lo realloc_.$(OBJEXT) \
-realloc_.lo rpow_.$(OBJEXT) rpow_.lo sdiv_.$(OBJEXT) sdiv_.lo \
-set_.$(OBJEXT) set_.lo sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) \
-sub_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo xtom_.$(OBJEXT) xtom_.lo : \
-$(ANSI2KNR)
-
-mostlyclean-libtool:
-       -rm -f *.lo
-
-clean-libtool:
-       -rm -rf .libs _libs
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
-       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
-       unique=`for i in $$list; do \
-           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-         done | \
-         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-             END { if (nonempty) { for (i in files) print i; }; }'`; \
-       mkid -fID $$unique
-tags: TAGS
-
-TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-               $(TAGS_FILES) $(LISP)
-       set x; \
-       here=`pwd`; \
-       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-       unique=`for i in $$list; do \
-           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-         done | \
-         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-             END { if (nonempty) { for (i in files) print i; }; }'`; \
-       shift; \
-       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
-         test -n "$$unique" || unique=$$empty_fix; \
-         if test $$# -gt 0; then \
-           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-             "$$@" $$unique; \
-         else \
-           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-             $$unique; \
-         fi; \
-       fi
-ctags: CTAGS
-CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-               $(TAGS_FILES) $(LISP)
-       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-       unique=`for i in $$list; do \
-           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-         done | \
-         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-             END { if (nonempty) { for (i in files) print i; }; }'`; \
-       test -z "$(CTAGS_ARGS)$$unique" \
-         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
-            $$unique
-
-GTAGS:
-       here=`$(am__cd) $(top_builddir) && pwd` \
-         && $(am__cd) $(top_srcdir) \
-         && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
-       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
-       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-       list='$(DISTFILES)'; \
-         dist_files=`for file in $$list; do echo $$file; done | \
-         sed -e "s|^$$srcdirstrip/||;t" \
-             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
-       case $$dist_files in \
-         */*) $(MKDIR_P) `echo "$$dist_files" | \
-                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
-                          sort -u` ;; \
-       esac; \
-       for file in $$dist_files; do \
-         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
-         if test -d $$d/$$file; then \
-           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
-           if test -d "$(distdir)/$$file"; then \
-             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-           fi; \
-           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
-             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
-             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-           fi; \
-           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
-         else \
-           test -f "$(distdir)/$$file" \
-           || cp -p $$d/$$file "$(distdir)/$$file" \
-           || exit 1; \
-         fi; \
-       done
-check-am: all-am
-check: check-am
-all-am: Makefile $(LTLIBRARIES)
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
-       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
-       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
-       @echo "This command is intended for maintainers to use"
-       @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
-       mostlyclean-am
-
-distclean: distclean-am
-       -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
-       distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
-       -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
-       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
-       distclean-compile distclean-generic distclean-libtool \
-       distclean-tags distdir dvi dvi-am html html-am info info-am \
-       install install-am install-data install-data-am install-dvi \
-       install-dvi-am install-exec install-exec-am install-html \
-       install-html-am install-info install-info-am install-man \
-       install-pdf install-pdf-am install-ps install-ps-am \
-       install-strip installcheck installcheck-am installdirs \
-       maintainer-clean maintainer-clean-generic mostlyclean \
-       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
-       uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/mpbsd/itom.c b/mpbsd/itom.c

deleted file mode 100644 (file)

index 6f0e31c..0000000
--- a/mpbsd/itom.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/* itom -- BSD compatible allocate and initiate a MINT.
-
-Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-MINT *
-itom (signed short int n)
-{
-  MINT *x;
-  mp_ptr xp;
-
-  x = (MINT *) (*__gmp_allocate_func) (sizeof (MINT));
-  x->_mp_alloc = 1;
-  x->_mp_d = xp = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
-  if (n > 0)
-    {
-      x->_mp_size = 1;
-      xp[0] = n;
-    }
-  else if (n < 0)
-    {
-      x->_mp_size = -1;
-      xp[0] = (unsigned short) -n;
-    }
-  else
-    x->_mp_size = 0;
-
-  return x;
-}
diff --git a/mpbsd/mfree.c b/mpbsd/mfree.c

deleted file mode 100644 (file)

index 84e93f8..0000000
--- a/mpbsd/mfree.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* mfree -- BSD compatible mfree.
-
-Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mfree (MINT *m)
-{
-  (*__gmp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB);
-  (*__gmp_free_func) (m, sizeof (MINT));
-}
diff --git a/mpbsd/min.c b/mpbsd/min.c

deleted file mode 100644 (file)

index 113cc17..0000000
--- a/mpbsd/min.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/* min(MINT) -- Do decimal input from standard input and store result in
-   MINT.
-
-Copyright 1991, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <stdio.h>
-#include <ctype.h>
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-extern const unsigned char __gmp_digit_value_tab[];
-#define digit_value_tab __gmp_digit_value_tab
-
-void
-min (MINT *dest)
-{
-  char *str;
-  size_t alloc_size, str_size;
-  int c;
-  int negative;
-  mp_size_t dest_size;
-  const unsigned char *digit_value;
-
-  digit_value = digit_value_tab;
-
-  alloc_size = 100;
-  str = (char *) (*__gmp_allocate_func) (alloc_size);
-  str_size = 0;
-
-  /* Skip whitespace.  */
-  do
-    c = getc (stdin);
-  while (isspace (c));
-
-  negative = 0;
-  if (c == '-')
-    {
-      negative = 1;
-      c = getc (stdin);
-    }
-
-  if (c == EOF || digit_value[c] >= 10)
-    return;                    /* error if no digits */
-
-  do
-    {
-      int dig;
-      dig = digit_value[c];
-      if (dig >= 10)
-       break;
-      if (str_size >= alloc_size)
-       {
-         size_t old_alloc_size = alloc_size;
-         alloc_size = alloc_size * 3 / 2;
-         str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);
-       }
-      str[str_size++] = dig;
-      c = getc (stdin);
-    }
-  while (c != EOF);
-
-  ungetc (c, stdin);
-
-  dest_size = str_size / mp_bases[10].chars_per_limb + 1;
-  if (dest->_mp_alloc < dest_size)
-    _mp_realloc (dest, dest_size);
-
-  dest_size = mpn_set_str (dest->_mp_d, (unsigned char *) str, str_size, 10);
-  dest->_mp_size = negative ? -dest_size : dest_size;
-
-  (*__gmp_free_func) (str, alloc_size);
-  return;
-}
diff --git a/mpbsd/mout.c b/mpbsd/mout.c

deleted file mode 100644 (file)

index 545539c..0000000
--- a/mpbsd/mout.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/* mout(MINT) -- Do decimal output of MINT to standard output.
-
-Copyright 1991, 1994, 1996, 2000, 2001, 2002, 2005 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <stdio.h>
-#include <string.h>
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-void
-mout (const MINT *x)
-{
-  mp_ptr xp;
-  mp_srcptr x_ptr;
-  mp_size_t x_size;
-  unsigned char *str;
-  size_t str_size;
-  int i;
-  TMP_DECL;
-
-  x_size = x->_mp_size;
-  if (x_size == 0)
-    {
-      fputc ('0', stdout);
-      fputc ('\n', stdout);
-      return;
-    }
-  if (x_size < 0)
-    {
-      fputc ('-', stdout);
-      x_size = -x_size;
-    }
-
-  TMP_MARK;
-  x_ptr = x->_mp_d;
-  MPN_SIZEINBASE (str_size, x_ptr, x_size, 10);
-  str_size += 2;
-  str = (unsigned char *) TMP_ALLOC (str_size);
-
-  /* mpn_get_str clobbers its argument */
-  xp = TMP_ALLOC_LIMBS (x_size);
-  MPN_COPY (xp, x_ptr, x_size);
-
-  str_size = mpn_get_str (str, 10, xp, x_size);
-
-  /* mpn_get_str might make a leading zero, skip it.  */
-  str_size -= (*str == 0);
-  str += (*str == 0);
-  ASSERT (*str != 0);
-
-  /* Translate to printable chars.  */
-  for (i = 0; i < str_size; i++)
-    str[i] = "0123456789"[str[i]];
-  str[str_size] = 0;
-
-  str_size = strlen ((char *) str);
-  if (str_size % 10 != 0)
-    {
-      fwrite (str, 1, str_size % 10, stdout);
-      str += str_size % 10;
-      str_size -= str_size % 10;
-      if (str_size != 0)
-       fputc (' ', stdout);
-    }
-  for (i = 0; i < str_size; i += 10)
-    {
-      fwrite (str, 1, 10, stdout);
-      str += 10;
-      if (i + 10 < str_size)
-       fputc (' ', stdout);
-    }
-  fputc ('\n', stdout);
-  TMP_FREE;
-}
diff --git a/mpbsd/mtox.c b/mpbsd/mtox.c

deleted file mode 100644 (file)

index 7babfdd..0000000
--- a/mpbsd/mtox.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* mtox -- Convert OPERAND to hexadecimal and return a malloc'ed string
-   with the result of the conversion.
-
-Copyright 1991, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <string.h>
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-char *
-mtox (const MINT *x)
-{
-  mp_size_t xsize = x->_mp_size;
-  mp_ptr    xp;
-  mp_size_t xsign;
-  unsigned char *str, *s;
-  size_t str_size, alloc_size, i;
-
-  xsign = xsize;
-  if (xsize < 0)
-    xsize = -xsize;
-
-  /* digits, plus '\0', plus possible '-', for an exact size */
-  xp = x->_mp_d;
-  MPN_SIZEINBASE_16 (alloc_size, xp, xsize);
-  alloc_size += 1 + (xsign < 0);
-
-  str = (unsigned char *) (*__gmp_allocate_func) (alloc_size);
-  s = str;
-
-  if (xsign < 0)
-    *s++ = '-';
-
-  str_size = mpn_get_str (s, 16, xp, xsize);
-  ASSERT (str_size <= alloc_size - (xsign < 0));
-  ASSERT (str_size == 1 || *s != 0);
-
-  for (i = 0; i < str_size; i++)
-    s[i] = "0123456789abcdef"[s[i]];
-  s[str_size] = 0;
-
-  ASSERT (strlen (str) + 1 == alloc_size);
-  return (char *) str;
-}
diff --git a/mpbsd/rpow.c b/mpbsd/rpow.c

deleted file mode 100644 (file)

index 827aacb..0000000
--- a/mpbsd/rpow.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* rpow -- MINT raised to short. */
-
-/*
-Copyright 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-rpow (const MINT *b, short e, MINT *r)
-{
-  if (e >= 0)
-    mpz_n_pow_ui (r, PTR(b), (mp_size_t) SIZ(b), (unsigned long) e);
-  else
-    SIZ(r) = 0;
-}
diff --git a/mpbsd/sdiv.c b/mpbsd/sdiv.c

deleted file mode 100644 (file)

index 802fe52..0000000
--- a/mpbsd/sdiv.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/* sdiv -- Divide a MINT by a short integer.  Produce a MINT quotient
-   and a short remainder.
-
-Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-void
-sdiv (const MINT *dividend, signed short int divisor_short, MINT *quot, short *rem_ptr)
-{
-  mp_size_t sign_dividend;
-  signed long int sign_divisor;
-  mp_size_t dividend_size, quot_size;
-  mp_ptr dividend_ptr, quot_ptr;
-  mp_limb_t divisor_limb;
-  mp_limb_t remainder_limb;
-
-  sign_dividend = dividend->_mp_size;
-  dividend_size = ABS (dividend->_mp_size);
-
-  if (dividend_size == 0)
-    {
-      quot->_mp_size = 0;
-      *rem_ptr = 0;
-      return;
-    }
-
-  sign_divisor = divisor_short;
-  divisor_limb = (unsigned short) ABS (divisor_short);
-
-  /* No need for temporary allocation and copying even if QUOT == DIVIDEND
-     as the divisor is just one limb, and thus no intermediate remainders
-     need to be stored.  */
-
-  if (quot->_mp_alloc < dividend_size)
-    _mp_realloc (quot, dividend_size);
-
-  quot_ptr = quot->_mp_d;
-  dividend_ptr = dividend->_mp_d;
-
-  remainder_limb = mpn_divmod_1 (quot_ptr,
-                                dividend_ptr, dividend_size, divisor_limb);
-
-  *rem_ptr = sign_dividend >= 0 ? remainder_limb : -remainder_limb;
-  /* The quotient is DIVIDEND_SIZE limbs, but the most significant
-     might be zero.  Set QUOT_SIZE properly. */
-  quot_size = dividend_size - (quot_ptr[dividend_size - 1] == 0);
-  quot->_mp_size = (sign_divisor ^ sign_dividend) >= 0 ? quot_size : -quot_size;
-}
diff --git a/mpbsd/xtom.c b/mpbsd/xtom.c

deleted file mode 100644 (file)

index 75fa88a..0000000
--- a/mpbsd/xtom.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/* xtom -- convert a hexadecimal string to a MINT, and return a pointer to
-   the MINT.
-
-Copyright 1991, 1994, 1995, 1996, 2000, 2001, 2002, 2005 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <string.h>
-#include <ctype.h>
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-extern const unsigned char __gmp_digit_value_tab[];
-#define digit_value __gmp_digit_value_tab
-
-MINT *
-xtom (const char *str)
-{
-  size_t str_size;
-  char *s, *begs;
-  size_t i;
-  mp_size_t xsize;
-  int c;
-  int negative;
-  MINT *x = (MINT *) (*__gmp_allocate_func) (sizeof (MINT));
-  TMP_DECL;
-
-  /* Skip whitespace.  */
-  do
-    c = (unsigned char) *str++;
-  while (isspace (c));
-
-  negative = 0;
-  if (c == '-')
-    {
-      negative = 1;
-      c = (unsigned char) *str++;
-    }
-
-  if (digit_value[c] >= 16)
-    return 0;                  /* error if no digits */
-
-  TMP_MARK;
-  str_size = strlen (str - 1);
-  s = begs = (char *) TMP_ALLOC (str_size + 1);
-
-  for (i = 0; i < str_size; i++)
-    {
-      if (!isspace (c))
-       {
-         int dig = digit_value[c];
-         if (dig >= 16)
-           {
-             TMP_FREE;
-             return 0;
-           }
-         *s++ = dig;
-       }
-      c = (unsigned char) *str++;
-    }
-
-  str_size = s - begs;
-
-  xsize = str_size / mp_bases[16].chars_per_limb + 1;
-  x->_mp_alloc = xsize;
-  x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (xsize * BYTES_PER_MP_LIMB);
-
-  xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, 16);
-  x->_mp_size = negative ? -xsize : xsize;
-
-  TMP_FREE;
-  return x;
-}
diff --git a/mpf/Makefile.in b/mpf/Makefile.in

index 96a2077eb04fa3d49a38d61c2a0182fec66d319c..3cc2a49668cfa11de9836cdca4a1226e45a8362c 100644 (file)
--- a/mpf/Makefile.in
+++ b/mpf/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -34,6 +34,23 @@
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -52,12 +69,11 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  subdir = mpf
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -66,21 +82,18 @@ CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  LTLIBRARIES = $(noinst_LTLIBRARIES)
  libmpf_la_LIBADD =
-am_libmpf_la_OBJECTS = init$U.lo init2$U.lo inits$U.lo set$U.lo \
-       set_ui$U.lo set_si$U.lo set_str$U.lo set_d$U.lo set_z$U.lo \
-       set_q$U.lo iset$U.lo iset_ui$U.lo iset_si$U.lo iset_str$U.lo \
-       iset_d$U.lo clear$U.lo clears$U.lo get_str$U.lo dump$U.lo \
-       size$U.lo eq$U.lo reldiff$U.lo sqrt$U.lo random2$U.lo \
-       inp_str$U.lo out_str$U.lo add$U.lo add_ui$U.lo sub$U.lo \
-       sub_ui$U.lo ui_sub$U.lo mul$U.lo mul_ui$U.lo div$U.lo \
-       div_ui$U.lo cmp$U.lo cmp_d$U.lo cmp_si$U.lo cmp_ui$U.lo \
-       mul_2exp$U.lo div_2exp$U.lo abs$U.lo neg$U.lo get_d$U.lo \
-       get_d_2exp$U.lo set_dfl_prec$U.lo set_prc$U.lo \
-       set_prc_raw$U.lo get_dfl_prec$U.lo get_prc$U.lo ui_div$U.lo \
-       sqrt_ui$U.lo pow_ui$U.lo urandomb$U.lo swap$U.lo get_si$U.lo \
-       get_ui$U.lo int_p$U.lo ceilfloor$U.lo trunc$U.lo \
-       fits_sint$U.lo fits_slong$U.lo fits_sshort$U.lo fits_uint$U.lo \
-       fits_ulong$U.lo fits_ushort$U.lo
+am_libmpf_la_OBJECTS = init.lo init2.lo inits.lo set.lo set_ui.lo \
+       set_si.lo set_str.lo set_d.lo set_z.lo set_q.lo iset.lo \
+       iset_ui.lo iset_si.lo iset_str.lo iset_d.lo clear.lo clears.lo \
+       get_str.lo dump.lo size.lo eq.lo reldiff.lo sqrt.lo random2.lo \
+       inp_str.lo out_str.lo add.lo add_ui.lo sub.lo sub_ui.lo \
+       ui_sub.lo mul.lo mul_ui.lo div.lo div_ui.lo cmp.lo cmp_d.lo \
+       cmp_si.lo cmp_ui.lo mul_2exp.lo div_2exp.lo abs.lo neg.lo \
+       get_d.lo get_d_2exp.lo set_dfl_prec.lo set_prc.lo \
+       set_prc_raw.lo get_dfl_prec.lo get_prc.lo ui_div.lo sqrt_ui.lo \
+       pow_ui.lo urandomb.lo swap.lo get_si.lo get_ui.lo int_p.lo \
+       ceilfloor.lo trunc.lo fits_sint.lo fits_slong.lo \
+       fits_sshort.lo fits_uint.lo fits_ulong.lo fits_ushort.lo
  libmpf_la_OBJECTS = $(am_libmpf_la_OBJECTS)
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
  depcomp =
@@ -96,6 +109,11 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(libmpf_la_SOURCES)
  DIST_SOURCES = $(libmpf_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -197,8 +215,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -245,7 +263,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -318,7 +335,7 @@ clean-noinstLTLIBRARIES:
           echo "rm -f \"$${dir}/so_locations\""; \
           rm -f "$${dir}/so_locations"; \
         done
-libmpf.la: $(libmpf_la_OBJECTS) $(libmpf_la_DEPENDENCIES) 
+libmpf.la: $(libmpf_la_OBJECTS) $(libmpf_la_DEPENDENCIES) $(EXTRA_libmpf_la_DEPENDENCIES) 
         $(LINK)  $(libmpf_la_OBJECTS) $(libmpf_la_LIBADD) $(LIBS)
  
  mostlyclean-compile:
@@ -326,11 +343,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -340,170 +352,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-abs_.c: abs.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/abs.c; then echo $(srcdir)/abs.c; else echo abs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_.c: add.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_ui_.c: add_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_ui.c; then echo $(srcdir)/add_ui.c; else echo add_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ceilfloor_.c: ceilfloor.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ceilfloor.c; then echo $(srcdir)/ceilfloor.c; else echo ceilfloor.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clear_.c: clear.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clear.c; then echo $(srcdir)/clear.c; else echo clear.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clears_.c: clears.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clears.c; then echo $(srcdir)/clears.c; else echo clears.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: cmp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_d_.c: cmp_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_d.c; then echo $(srcdir)/cmp_d.c; else echo cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_si_.c: cmp_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_si.c; then echo $(srcdir)/cmp_si.c; else echo cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_ui_.c: cmp_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_ui.c; then echo $(srcdir)/cmp_ui.c; else echo cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-div_.c: div.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div.c; then echo $(srcdir)/div.c; else echo div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-div_2exp_.c: div_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div_2exp.c; then echo $(srcdir)/div_2exp.c; else echo div_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-div_ui_.c: div_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div_ui.c; then echo $(srcdir)/div_ui.c; else echo div_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dump_.c: dump.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-eq_.c: eq.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/eq.c; then echo $(srcdir)/eq.c; else echo eq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_sint_.c: fits_sint.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sint.c; then echo $(srcdir)/fits_sint.c; else echo fits_sint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_slong_.c: fits_slong.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_slong.c; then echo $(srcdir)/fits_slong.c; else echo fits_slong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_sshort_.c: fits_sshort.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sshort.c; then echo $(srcdir)/fits_sshort.c; else echo fits_sshort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_uint_.c: fits_uint.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_uint.c; then echo $(srcdir)/fits_uint.c; else echo fits_uint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_ulong_.c: fits_ulong.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ulong.c; then echo $(srcdir)/fits_ulong.c; else echo fits_ulong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_ushort_.c: fits_ushort.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ushort.c; then echo $(srcdir)/fits_ushort.c; else echo fits_ushort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_.c: get_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_2exp_.c: get_d_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d_2exp.c; then echo $(srcdir)/get_d_2exp.c; else echo get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_dfl_prec_.c: get_dfl_prec.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_dfl_prec.c; then echo $(srcdir)/get_dfl_prec.c; else echo get_dfl_prec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_prc_.c: get_prc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_prc.c; then echo $(srcdir)/get_prc.c; else echo get_prc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_si_.c: get_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_si.c; then echo $(srcdir)/get_si.c; else echo get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_ui_.c: get_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_ui.c; then echo $(srcdir)/get_ui.c; else echo get_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init_.c: init.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init.c; then echo $(srcdir)/init.c; else echo init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init2_.c: init2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init2.c; then echo $(srcdir)/init2.c; else echo init2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inits_.c: inits.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inits.c; then echo $(srcdir)/inits.c; else echo inits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inp_str_.c: inp_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_str.c; then echo $(srcdir)/inp_str.c; else echo inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-int_p_.c: int_p.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/int_p.c; then echo $(srcdir)/int_p.c; else echo int_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_.c: iset.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset.c; then echo $(srcdir)/iset.c; else echo iset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_d_.c: iset_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_d.c; then echo $(srcdir)/iset_d.c; else echo iset_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_si_.c: iset_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_si.c; then echo $(srcdir)/iset_si.c; else echo iset_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_str_.c: iset_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_str.c; then echo $(srcdir)/iset_str.c; else echo iset_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_ui_.c: iset_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_ui.c; then echo $(srcdir)/iset_ui.c; else echo iset_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_2exp_.c: mul_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2exp.c; then echo $(srcdir)/mul_2exp.c; else echo mul_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_ui_.c: mul_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_ui.c; then echo $(srcdir)/mul_ui.c; else echo mul_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-neg_.c: neg.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-out_str_.c: out_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_str.c; then echo $(srcdir)/out_str.c; else echo out_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pow_ui_.c: pow_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_ui.c; then echo $(srcdir)/pow_ui.c; else echo pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random2_.c: random2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-reldiff_.c: reldiff.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/reldiff.c; then echo $(srcdir)/reldiff.c; else echo reldiff.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_.c: set.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set.c; then echo $(srcdir)/set.c; else echo set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_d_.c: set_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_d.c; then echo $(srcdir)/set_d.c; else echo set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_dfl_prec_.c: set_dfl_prec.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_dfl_prec.c; then echo $(srcdir)/set_dfl_prec.c; else echo set_dfl_prec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_prc_.c: set_prc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_prc.c; then echo $(srcdir)/set_prc.c; else echo set_prc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_prc_raw_.c: set_prc_raw.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_prc_raw.c; then echo $(srcdir)/set_prc_raw.c; else echo set_prc_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_q_.c: set_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_q.c; then echo $(srcdir)/set_q.c; else echo set_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_si_.c: set_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_si.c; then echo $(srcdir)/set_si.c; else echo set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_ui_.c: set_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_ui.c; then echo $(srcdir)/set_ui.c; else echo set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_z_.c: set_z.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_z.c; then echo $(srcdir)/set_z.c; else echo set_z.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-size_.c: size.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/size.c; then echo $(srcdir)/size.c; else echo size.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrt_.c: sqrt.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrt.c; then echo $(srcdir)/sqrt.c; else echo sqrt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrt_ui_.c: sqrt_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrt_ui.c; then echo $(srcdir)/sqrt_ui.c; else echo sqrt_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_.c: sub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_ui_.c: sub_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_ui.c; then echo $(srcdir)/sub_ui.c; else echo sub_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-swap_.c: swap.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/swap.c; then echo $(srcdir)/swap.c; else echo swap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-trunc_.c: trunc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/trunc.c; then echo $(srcdir)/trunc.c; else echo trunc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ui_div_.c: ui_div.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_div.c; then echo $(srcdir)/ui_div.c; else echo ui_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ui_sub_.c: ui_sub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_sub.c; then echo $(srcdir)/ui_sub.c; else echo ui_sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-urandomb_.c: urandomb.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/urandomb.c; then echo $(srcdir)/urandomb.c; else echo urandomb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-abs_.$(OBJEXT) abs_.lo add_.$(OBJEXT) add_.lo add_ui_.$(OBJEXT) \
-add_ui_.lo ceilfloor_.$(OBJEXT) ceilfloor_.lo clear_.$(OBJEXT) \
-clear_.lo clears_.$(OBJEXT) clears_.lo cmp_.$(OBJEXT) cmp_.lo \
-cmp_d_.$(OBJEXT) cmp_d_.lo cmp_si_.$(OBJEXT) cmp_si_.lo \
-cmp_ui_.$(OBJEXT) cmp_ui_.lo div_.$(OBJEXT) div_.lo \
-div_2exp_.$(OBJEXT) div_2exp_.lo div_ui_.$(OBJEXT) div_ui_.lo \
-dump_.$(OBJEXT) dump_.lo eq_.$(OBJEXT) eq_.lo fits_sint_.$(OBJEXT) \
-fits_sint_.lo fits_slong_.$(OBJEXT) fits_slong_.lo \
-fits_sshort_.$(OBJEXT) fits_sshort_.lo fits_uint_.$(OBJEXT) \
-fits_uint_.lo fits_ulong_.$(OBJEXT) fits_ulong_.lo \
-fits_ushort_.$(OBJEXT) fits_ushort_.lo get_d_.$(OBJEXT) get_d_.lo \
-get_d_2exp_.$(OBJEXT) get_d_2exp_.lo get_dfl_prec_.$(OBJEXT) \
-get_dfl_prec_.lo get_prc_.$(OBJEXT) get_prc_.lo get_si_.$(OBJEXT) \
-get_si_.lo get_str_.$(OBJEXT) get_str_.lo get_ui_.$(OBJEXT) get_ui_.lo \
-init_.$(OBJEXT) init_.lo init2_.$(OBJEXT) init2_.lo inits_.$(OBJEXT) \
-inits_.lo inp_str_.$(OBJEXT) inp_str_.lo int_p_.$(OBJEXT) int_p_.lo \
-iset_.$(OBJEXT) iset_.lo iset_d_.$(OBJEXT) iset_d_.lo \
-iset_si_.$(OBJEXT) iset_si_.lo iset_str_.$(OBJEXT) iset_str_.lo \
-iset_ui_.$(OBJEXT) iset_ui_.lo mul_.$(OBJEXT) mul_.lo \
-mul_2exp_.$(OBJEXT) mul_2exp_.lo mul_ui_.$(OBJEXT) mul_ui_.lo \
-neg_.$(OBJEXT) neg_.lo out_str_.$(OBJEXT) out_str_.lo \
-pow_ui_.$(OBJEXT) pow_ui_.lo random2_.$(OBJEXT) random2_.lo \
-reldiff_.$(OBJEXT) reldiff_.lo set_.$(OBJEXT) set_.lo set_d_.$(OBJEXT) \
-set_d_.lo set_dfl_prec_.$(OBJEXT) set_dfl_prec_.lo set_prc_.$(OBJEXT) \
-set_prc_.lo set_prc_raw_.$(OBJEXT) set_prc_raw_.lo set_q_.$(OBJEXT) \
-set_q_.lo set_si_.$(OBJEXT) set_si_.lo set_str_.$(OBJEXT) set_str_.lo \
-set_ui_.$(OBJEXT) set_ui_.lo set_z_.$(OBJEXT) set_z_.lo \
-size_.$(OBJEXT) size_.lo sqrt_.$(OBJEXT) sqrt_.lo sqrt_ui_.$(OBJEXT) \
-sqrt_ui_.lo sub_.$(OBJEXT) sub_.lo sub_ui_.$(OBJEXT) sub_ui_.lo \
-swap_.$(OBJEXT) swap_.lo trunc_.$(OBJEXT) trunc_.lo ui_div_.$(OBJEXT) \
-ui_div_.lo ui_sub_.$(OBJEXT) ui_sub_.lo urandomb_.$(OBJEXT) \
-urandomb_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -607,10 +455,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -678,7 +531,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -691,7 +544,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool clean-noinstLTLIBRARIES ctags distclean \
@@ -703,9 +556,8 @@ uninstall-am:
         install-pdf install-pdf-am install-ps install-ps-am \
         install-strip installcheck installcheck-am installdirs \
         maintainer-clean maintainer-clean-generic mostlyclean \
-       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
-       uninstall-am
+       mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+       pdf pdf-am ps ps-am tags uninstall uninstall-am
  
  
  # Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/mpf/ceilfloor.c b/mpf/ceilfloor.c

index a0c5d77deac45fb40e03d33905378cecc9c58b11..3a34c1b44c66bdf37921de6ae35920000ecef33c 100644 (file)
--- a/mpf/ceilfloor.c
+++ b/mpf/ceilfloor.c
@@ -1,6 +1,6 @@
  /* mpf_ceil, mpf_floor -- round an mpf to an integer.
  
-Copyright 2001, 2004 Free Software Foundation, Inc.
+Copyright 2001, 2004, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,7 +26,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     Notice the use of prec+1 ensures mpf_ceil and mpf_floor are equivalent to
     mpf_set if u is already an integer.  */
  
-static void __gmpf_ceil_or_floor __GMP_PROTO ((REGPARM_2_1 (mpf_ptr, mpf_srcptr, int))) REGPARM_ATTR (1);
+static void __gmpf_ceil_or_floor (REGPARM_2_1 (mpf_ptr, mpf_srcptr, int)) REGPARM_ATTR (1);
  #define mpf_ceil_or_floor(r,u,dir)  __gmpf_ceil_or_floor (REGPARM_2_1 (r, u, dir))
  
  REGPARM_ATTR (1) static void
diff --git a/mpf/cmp_si.c b/mpf/cmp_si.c

index 9b364a38734bf07af06e06752484c06986e8119b..e1af23c01a0d523a18ed025a32c0d78b5ceba669 100644 (file)
--- a/mpf/cmp_si.c
+++ b/mpf/cmp_si.c
@@ -1,6 +1,6 @@
  /* mpf_cmp_si -- Compare a float with a signed integer.
  
-Copyright 1993, 1994, 1995, 1999, 2000, 2001, 2002, 2004 Free Software
+Copyright 1993, 1994, 1995, 1999, 2000, 2001, 2002, 2004, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
diff --git a/mpf/div.c b/mpf/div.c

index 8f3abc6110baf7c6d5820ffcacb1097e73ed1a31..ea3297ccc131e362524669b1fd3baedd879dc4ad 100644 (file)
--- a/mpf/div.c
+++ b/mpf/div.c
@@ -1,7 +1,7 @@
  /* mpf_div -- Divide two floats.
  
-Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005, 2010 Free Software
-Foundation, Inc.
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005, 2010, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -37,7 +37,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     to save one limb in the division.
  
     If r==u but the size is enough bigger than prec that there won't be an
-   overlap between quotient and dividend in mpn_tdiv_qr, then we can avoid
+   overlap between quotient and dividend in mpn_div_q, then we can avoid
     copying up,usize.  This would only arise from a prec reduced with
     mpf_set_prec_raw and will be pretty unusual, but might be worthwhile if
     it could be worked into the copy_u decision cleanly.  */
@@ -55,12 +55,8 @@ mpf_div (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
  
    usize = SIZ(u);
    vsize = SIZ(v);
-  sign_quotient = usize ^ vsize;
-  usize = ABS (usize);
-  vsize = ABS (vsize);
-  prec = PREC(r);
  
-  if (vsize == 0)
+  if (UNLIKELY (vsize == 0))
      DIVIDE_BY_ZERO;
  
    if (usize == 0)
@@ -70,6 +66,11 @@ mpf_div (mpf_ptr r, mpf_srcptr u, mpf_srcptr v)
        return;
      }
  
+  sign_quotient = usize ^ vsize;
+  usize = ABS (usize);
+  vsize = ABS (vsize);
+  prec = PREC(r);
+
    TMP_MARK;
    rexp = EXP(u) - EXP(v) + 1;
  
diff --git a/mpf/div_ui.c b/mpf/div_ui.c

index 2f4de1511e33588883cd7c5b43ce5b2524bcab14..5ccc00ec3381fd9d243dad907e043a0a8bb13e6e 100644 (file)
--- a/mpf/div_ui.c
+++ b/mpf/div_ui.c
@@ -1,6 +1,6 @@
  /* mpf_div_ui -- Divide a float with an unsigned integer.
  
-Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005 Free Software
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -50,14 +50,11 @@ mpf_div_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
      }
  #endif
  
-  usize = u->_mp_size;
-  sign_quotient = usize;
-  usize = ABS (usize);
-  prec = r->_mp_prec;
-
-  if (v == 0)
+  if (UNLIKELY (v == 0))
      DIVIDE_BY_ZERO;
  
+  usize = u->_mp_size;
+
    if (usize == 0)
      {
        r->_mp_size = 0;
@@ -65,6 +62,10 @@ mpf_div_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
        return;
      }
  
+  sign_quotient = usize;
+  usize = ABS (usize);
+  prec = r->_mp_prec;
+
    TMP_MARK;
  
    rp = r->_mp_d;
diff --git a/mpf/eq.c b/mpf/eq.c

index cdbbcb96da4cacaffc8c6475115461edcf402f71..9efa21220c5065694fd860fd35e1ef9134363164 100644 (file)
--- a/mpf/eq.c
+++ b/mpf/eq.c
@@ -1,7 +1,7 @@
  /* mpf_eq -- Compare two floats up to a specified bit #.
  
-Copyright 1993, 1995, 1996, 2001, 2002, 2008, 2009 Free Software Foundation,
-Inc.
+Copyright 1993, 1995, 1996, 2001, 2002, 2008, 2009, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
diff --git a/mpf/get_str.c b/mpf/get_str.c

index 447bfdbb97370eff4fd0c1274865a01cd31361d9..d0cba5936f5b0ead3bfe4ea2059c3e48d07314f3 100644 (file)
--- a/mpf/get_str.c
+++ b/mpf/get_str.c
@@ -4,8 +4,8 @@
     example, the number 3.1416 would be returned as "31416" in DIGIT_PTR and
     1 in EXP.
  
-Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2006 Free
-Software Foundation, Inc.
+Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2006, 2011
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -131,7 +131,7 @@ mpf_get_str (char *dbuf, mp_exp_t *exp, int base, size_t n_digits, mpf_srcptr u)
    if (base >= 0)
      {
        num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
-      if (base == 0)
+      if (base <= 1)
         base = 10;
        else if (base > 36)
         {
@@ -143,6 +143,10 @@ mpf_get_str (char *dbuf, mp_exp_t *exp, int base, size_t n_digits, mpf_srcptr u)
    else
      {
        base = -base;
+      if (base <= 1)
+       base = 10;
+      else if (base > 36)
+       return NULL;
        num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
      }
  
@@ -174,8 +178,7 @@ mpf_get_str (char *dbuf, mp_exp_t *exp, int base, size_t n_digits, mpf_srcptr u)
       conversion.)  */
    tstr = (unsigned char *) TMP_ALLOC (n_digits + 2 * GMP_LIMB_BITS + 3);
  
-  n_limbs_needed = 2 + (mp_size_t)
-    (n_digits / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+  LIMBS_PER_DIGIT_IN_BASE (n_limbs_needed, n_digits, base);
  
    if (ue <= n_limbs_needed)
      {
@@ -184,7 +187,7 @@ mpf_get_str (char *dbuf, mp_exp_t *exp, int base, size_t n_digits, mpf_srcptr u)
        unsigned long e;
  
        n_more_limbs_needed = n_limbs_needed - ue;
-      e = (unsigned long) n_more_limbs_needed * (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly);
+      DIGITS_IN_BASE_PER_LIMB (e, n_more_limbs_needed, base);
  
        if (un > n_limbs_needed)
         {
@@ -221,7 +224,7 @@ mpf_get_str (char *dbuf, mp_exp_t *exp, int base, size_t n_digits, mpf_srcptr u)
        mp_ptr dummyp, xp;
  
        n_less_limbs_needed = ue - n_limbs_needed;
-      e = (unsigned long) n_less_limbs_needed * (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly);
+      DIGITS_IN_BASE_PER_LIMB (e, n_less_limbs_needed, base);
  
        if (un > n_limbs_needed)
         {
diff --git a/mpf/iset_si.c b/mpf/iset_si.c

index a689d0d05a5ab726b8ea89420bb7aa310e8ae2f3..7eaf08cff44ea9e4a5b2f3f9a2d524b9fb2b9714 100644 (file)
--- a/mpf/iset_si.c
+++ b/mpf/iset_si.c
@@ -1,7 +1,7 @@
  /* mpf_init_set_si() -- Initialize a float and assign it from a signed int.
  
-Copyright 1993, 1994, 1995, 2000, 2001, 2003, 2004 Free Software Foundation,
-Inc.
+Copyright 1993, 1994, 1995, 2000, 2001, 2003, 2004, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
diff --git a/mpf/out_str.c b/mpf/out_str.c

index afccdbb031a302f354538112e00f409ca6033d95..3e34a5341cae772b7cd5ed1d8930daec070b8bf8 100644 (file)
--- a/mpf/out_str.c
+++ b/mpf/out_str.c
@@ -2,7 +2,7 @@
     the float OP to STREAM in base BASE.  Return the number of characters
     written, or 0 if an error occurred.
  
-Copyright 1996, 1997, 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 1996, 1997, 2001, 2002, 2005, 2011 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -36,6 +36,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "gmp.h"
  #include "gmp-impl.h"
+#include "longlong.h"
  
  
  size_t
diff --git a/mpf/pow_ui.c b/mpf/pow_ui.c

index 5d029147eb4db9735f5b1e8796eca42d86a8ca86..589bbee2f97c1c30e4cefa5f8dbd29330c11470c 100644 (file)
--- a/mpf/pow_ui.c
+++ b/mpf/pow_ui.c
@@ -1,6 +1,6 @@
  /* mpf_pow_ui -- Compute b^e.
  
-Copyright 1998, 1999, 2001 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,18 +24,18 @@ void
  mpf_pow_ui (mpf_ptr r, mpf_srcptr b, unsigned long int e)
  {
    mpf_t b2;
-  unsigned long int e2;
  
    mpf_init2 (b2, mpf_get_prec (r));
    mpf_set (b2, b);
-  mpf_set_ui (r, 1);
  
    if ((e & 1) != 0)
-    mpf_set (r, b2);
-  for (e2 = e >> 1; e2 != 0; e2 >>= 1)
+    mpf_set (r, b);
+  else
+    mpf_set_ui (r, 1);
+  while (e >>= 1)
      {
        mpf_mul (b2, b2, b2);
-      if ((e2 & 1) != 0)
+      if ((e & 1) != 0)
         mpf_mul (r, r, b2);
      }
  
diff --git a/mpf/reldiff.c b/mpf/reldiff.c

index f9e40b66e856633acb34858cb2470da0a6231ced..6a941b893f0c2a5b73f3340428d7a49ccac7ea81 100644 (file)
--- a/mpf/reldiff.c
+++ b/mpf/reldiff.c
@@ -22,8 +22,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* The precision we use for d = x-y is based on what mpf_div will want from
-   the dividend.  It calls mpn_tdiv_qr to produce a quotient of rprec+1
-   limbs.  So rprec+1 == dsize - xsize + 1, hence dprec = rprec+xsize.  */
+   the dividend.  It calls mpn_div_q to produce a quotient of rprec+1 limbs.
+   So rprec+1 == dsize - xsize + 1, hence dprec = rprec+xsize.  */
  
  void
  mpf_reldiff (mpf_t rdiff, mpf_srcptr x, mpf_srcptr y)
diff --git a/mpf/set_si.c b/mpf/set_si.c

index aa7b4ee84a707e0693c3306d09b219daa73526f5..b9dfae3ef2d68db8f525e6300f7455612cf09fc2 100644 (file)
--- a/mpf/set_si.c
+++ b/mpf/set_si.c
@@ -1,7 +1,7 @@
  /* mpf_set_si() -- Assign a float from a signed int.
  
-Copyright 1993, 1994, 1995, 2000, 2001, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1993, 1994, 1995, 2000, 2001, 2002, 2004, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
diff --git a/mpf/set_str.c b/mpf/set_str.c

index 01a175fa622aa57d7c81e458701da2db23594659..644b201d8e9eff50dcaf57b23dfbbeb62283591d 100644 (file)
--- a/mpf/set_str.c
+++ b/mpf/set_str.c
@@ -3,7 +3,7 @@
     of STRING is used to figure out the base.
  
  Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2007,
-2008 Free Software Foundation, Inc.
+2008, 2011 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,7 +24,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    This still needs work, as suggested by some FIXME comments.
    1. Don't depend on superfluous mantissa digits.
    2. Allocate temp space more cleverly.
-  3. Use mpn_tdiv_qr instead of mpn_lshift+mpn_divrem.
+  3. Use mpn_div_q instead of mpn_lshift+mpn_divrem.
  */
  
  #define _GNU_SOURCE    /* for DECIMAL_POINT in langinfo.h */
@@ -47,7 +47,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "longlong.h"
  
-extern const unsigned char __gmp_digit_value_tab[];
+
  #define digit_value_tab __gmp_digit_value_tab
  
  /* Compute base^exp and return the most significant prec limbs in rp[].
@@ -164,8 +164,8 @@ mpf_set_str (mpf_ptr x, const char *str, int base)
      {
        /* not a digit, must be a decimal point */
        for (i = 0; i < pointlen; i++)
-        if (str[i] != point[i])
-          return -1;
+       if (str[i] != point[i])
+         return -1;
        if (digit_value[(unsigned char) str[pointlen]] >= (base == 0 ? 10 : base))
         return -1;
      }
@@ -196,10 +196,10 @@ mpf_set_str (mpf_ptr x, const char *str, int base)
         {
           int dig;
  
-          for (j = 0; j < pointlen; j++)
-            if (str[j] != point[j])
-              goto not_point;
-          if (1)
+         for (j = 0; j < pointlen; j++)
+           if (str[j] != point[j])
+             goto not_point;
+         if (1)
             {
               if (dotpos != 0)
                 {
@@ -213,7 +213,7 @@ mpf_set_str (mpf_ptr x, const char *str, int base)
             }
           else
             {
-            not_point:
+           not_point:
               dig = digit_value[c];
               if (dig >= base)
                 {
@@ -244,14 +244,12 @@ mpf_set_str (mpf_ptr x, const char *str, int base)
      /* This breaks things like 0.000...0001.  To safely ignore superfluous
         digits, we need to skip over leading zeros.  */
      /* Just consider the relevant leading digits of the mantissa.  */
-    n_chars_needed = 2 + (size_t)
-      (((size_t) prec * GMP_NUMB_BITS) * mp_bases[base].chars_per_bit_exactly);
+    LIMBS_PER_DIGIT_IN_BASE (n_chars_needed, prec, base);
      if (str_size > n_chars_needed)
        str_size = n_chars_needed;
  #endif
  
-    ma = 2 + (mp_size_t)
-      (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+    LIMBS_PER_DIGIT_IN_BASE (ma, str_size, base);
      mp = TMP_ALLOC_LIMBS (ma);
      mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
  
@@ -323,8 +321,10 @@ mpf_set_str (mpf_ptr x, const char *str, int base)
      if (divflag)
        {
  #if 0
-       /* FIXME: Should use mpn_tdiv here.  */
-       mpn_tdiv_qr (qp, mp, 0L, mp, mn, rp, rn);
+       /* FIXME: Should use mpn_div_q here.  */
+       ...
+       mpn_div_q (tp, mp, mn, rp, rn, scratch);
+       ...
  #else
         mp_ptr qp;
         mp_limb_t qlimb;
diff --git a/mpf/sqrt.c b/mpf/sqrt.c

index dce9aff46f8060bde2a4dbc0d68444c5ff8048c6..bdd79358b77bc74d3303b58e6cc5746723af2b26 100644 (file)
--- a/mpf/sqrt.c
+++ b/mpf/sqrt.c
@@ -1,7 +1,7 @@
  /* mpf_sqrt -- Compute the square root of a float.
  
-Copyright 1993, 1994, 1996, 2000, 2001, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1993, 1994, 1996, 2000, 2001, 2004, 2005, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -59,7 +59,7 @@ mpf_sqrt (mpf_ptr r, mpf_srcptr u)
    TMP_DECL;
  
    usize = u->_mp_size;
-  if (usize <= 0)
+  if (UNLIKELY (usize <= 0))
      {
        if (usize < 0)
          SQRT_OF_NEGATIVE;
diff --git a/mpf/ui_div.c b/mpf/ui_div.c

index 39a0bd9198f690e1a4c2c9516af27d0eadaa267d..3ca717e7512b051a812c7beb99a8af5b7f1b114e 100644 (file)
--- a/mpf/ui_div.c
+++ b/mpf/ui_div.c
@@ -1,7 +1,7 @@
  /* mpf_ui_div -- Divide an unsigned integer with a float.
  
-Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2004, 2005 Free Software
-Foundation, Inc.
+Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2004, 2005, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -38,8 +38,6 @@ mpf_ui_div (mpf_ptr r, unsigned long int u, mpf_srcptr v)
  
    vsize = v->_mp_size;
    sign_quotient = vsize;
-  vsize = ABS (vsize);
-  prec = r->_mp_prec;
  
    if (UNLIKELY (vsize == 0))
      DIVIDE_BY_ZERO;
@@ -51,6 +49,9 @@ mpf_ui_div (mpf_ptr r, unsigned long int u, mpf_srcptr v)
        return;
      }
  
+  vsize = ABS (vsize);
+  prec = r->_mp_prec;
+
    TMP_MARK;
    rexp = 1 - v->_mp_exp + 1;
  
diff --git a/mpn/Makefile.am b/mpn/Makefile.am

index 15705f72029f9d4d87263766eb2cadcf0d89be1a..6b81e4c98ec931c27a62a9d0119ac32dc10fe7a5 100644 (file)
--- a/mpn/Makefile.am
+++ b/mpn/Makefile.am
@@ -24,49 +24,6 @@ INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir) \
  
  OFILES = @mpn_objects@
  
-
-# All possible mpn normal and optional function files are listed here, to
-# get automake to generate ansi2knr rules for each.  Such rules will be
-# ignored for any that are instead implemented with a .asm (or whatever) for
-# a particular target.
-#
-nodist_EXTRA_libmpn_la_SOURCES =                                           \
-  add.c add_1.c add_n.c                                                            \
-  addmul_1.c addmul_2.c addmul_3.c addmul_4.c addmul_5.c addmul_6.c        \
-  addmul_7.c addmul_8.c                                                            \
-  and_n.c andn_n.c                                                         \
-  cmp.c com.c copyd.c copyi.c                                              \
-  dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c                \
-  sbpi1_bdiv_qr.c sbpi1_bdiv_q.c                                           \
-  sbpi1_div_qr.c sbpi1_div_q.c sbpi1_divappr_q.c                           \
-  dcpi1_bdiv_qr.c dcpi1_bdiv_q.c                                           \
-  dcpi1_div_qr.c dcpi1_div_q.c dcpi1_divappr_q.c                           \
-  dump.c fib2_ui.c gcd.c                                                   \
-  gcd_1.c gcdext.c get_d.c get_str.c                                       \
-  hamdist.c hgcd2.c hgcd.c invert_limb.c                                   \
-  ior_n.c iorn_n.c jacbase.c lshift.c                                      \
-  matrix22_mul.c mod_1.c mod_34lsub1.c mode1o.c                                    \
-  mod_1_1.c mod_1_2.c mod_1_3.c mod_1_4.c                                  \
-  mul.c mul_1.c mul_2.c mul_3.c mul_4.c mul_fft.c mul_n.c mul_basecase.c    \
-  nussbaumer_mul.c                                                         \
-  toom22_mul.c toom32_mul.c toom42_mul.c toom52_mul.c toom62_mul.c         \
-  toom33_mul.c toom43_mul.c toom53_mul.c toom63_mul.c                      \
-  toom44_mul.c                                                             \
-  toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c                        \
-  toom_couple_handling.c                                                   \
-  toom2_sqr.c toom3_sqr.c toom4_sqr.c                                      \
-  toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c                                \
-  toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c    \
-  toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c   \
-  toom_interpolate_8pts.c toom_interpolate_12pts.c toom_interpolate_16pts.c \
-  invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c              \
-  mullo_n.c mullo_basecase.c nand_n.c neg.c nior_n.c perfsqr.c     \
-  popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
-  rootrem.c scan0.c scan1.c set_str.c                      \
-  sqr_basecase.c sqr_diagonal.c                                                    \
-  sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c                               \
-  tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
-
  noinst_LTLIBRARIES = libmpn.la
  nodist_libmpn_la_SOURCES = fib_table.c mp_bases.c
  libmpn_la_LIBADD = $(OFILES)
@@ -74,7 +31,7 @@ libmpn_la_DEPENDENCIES = $(OFILES)
  
  TARG_DIST = a29k alpha arm clipper cray generic i960 ia64 lisp m68k m88k \
    minithres mips32 mips64 ns32k pa32 pa64 power powerpc32 powerpc64 pyr \
-  s390_32 s390_64 sh sparc32 sparc64 vax x86 x86_64 z8000 z8000x
+  s390_32 s390_64 sh sparc32 sparc64 thumb vax x86 x86_64 z8000
  
  EXTRA_DIST = asm-defs.m4 cpp-ccas m4-ccas $(TARG_DIST)
  
@@ -89,7 +46,4 @@ mp_bases.c:
  perfsqr.h:
         cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/perfsqr.h
  
-tune-gcd-p: gcd.c
-       $(COMPILE) -g -O1 -I $(top_srcdir)/tune -DTUNE_GCD_P=1 gcd.c -o tune-gcd-p -L ../.libs -L../tune/.libs -lspeed -lgmp -lm
-
  include Makeasm.am
diff --git a/mpn/Makefile.in b/mpn/Makefile.in

index 184c80db9ae3a33af9560d0a38fdf4310a419369..fe4fa2226e470c220e7bcc14b69ccfff74384f2d 100644 (file)
--- a/mpn/Makefile.in
+++ b/mpn/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -52,6 +52,23 @@
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -70,13 +87,12 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  DIST_COMMON = README $(srcdir)/Makeasm.am $(srcdir)/Makefile.am \
         $(srcdir)/Makefile.in
  subdir = mpn
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -85,7 +101,7 @@ CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  LTLIBRARIES = $(noinst_LTLIBRARIES)
  am__DEPENDENCIES_1 =
-nodist_libmpn_la_OBJECTS = fib_table$U.lo mp_bases$U.lo
+nodist_libmpn_la_OBJECTS = fib_table.lo mp_bases.lo
  libmpn_la_OBJECTS = $(nodist_libmpn_la_OBJECTS)
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
  depcomp =
@@ -99,9 +115,13 @@ CCLD = $(CC)
  LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
         $(LDFLAGS) -o $@
-SOURCES = $(nodist_libmpn_la_SOURCES) \
-       $(nodist_EXTRA_libmpn_la_SOURCES)
+SOURCES = $(nodist_libmpn_la_SOURCES)
  DIST_SOURCES =
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -203,8 +223,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -251,7 +271,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -269,56 +288,13 @@ INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir) \
    -DOPERATION_`echo $* | sed 's/_$$//'`
  
  OFILES = @mpn_objects@
-
-# All possible mpn normal and optional function files are listed here, to
-# get automake to generate ansi2knr rules for each.  Such rules will be
-# ignored for any that are instead implemented with a .asm (or whatever) for
-# a particular target.
-#
-nodist_EXTRA_libmpn_la_SOURCES = \
-  add.c add_1.c add_n.c                                                            \
-  addmul_1.c addmul_2.c addmul_3.c addmul_4.c addmul_5.c addmul_6.c        \
-  addmul_7.c addmul_8.c                                                            \
-  and_n.c andn_n.c                                                         \
-  cmp.c com.c copyd.c copyi.c                                              \
-  dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c                \
-  sbpi1_bdiv_qr.c sbpi1_bdiv_q.c                                           \
-  sbpi1_div_qr.c sbpi1_div_q.c sbpi1_divappr_q.c                           \
-  dcpi1_bdiv_qr.c dcpi1_bdiv_q.c                                           \
-  dcpi1_div_qr.c dcpi1_div_q.c dcpi1_divappr_q.c                           \
-  dump.c fib2_ui.c gcd.c                                                   \
-  gcd_1.c gcdext.c get_d.c get_str.c                                       \
-  hamdist.c hgcd2.c hgcd.c invert_limb.c                                   \
-  ior_n.c iorn_n.c jacbase.c lshift.c                                      \
-  matrix22_mul.c mod_1.c mod_34lsub1.c mode1o.c                                    \
-  mod_1_1.c mod_1_2.c mod_1_3.c mod_1_4.c                                  \
-  mul.c mul_1.c mul_2.c mul_3.c mul_4.c mul_fft.c mul_n.c mul_basecase.c    \
-  nussbaumer_mul.c                                                         \
-  toom22_mul.c toom32_mul.c toom42_mul.c toom52_mul.c toom62_mul.c         \
-  toom33_mul.c toom43_mul.c toom53_mul.c toom63_mul.c                      \
-  toom44_mul.c                                                             \
-  toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c                        \
-  toom_couple_handling.c                                                   \
-  toom2_sqr.c toom3_sqr.c toom4_sqr.c                                      \
-  toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c                                \
-  toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c    \
-  toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c   \
-  toom_interpolate_8pts.c toom_interpolate_12pts.c toom_interpolate_16pts.c \
-  invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c              \
-  mullo_n.c mullo_basecase.c nand_n.c neg.c nior_n.c perfsqr.c     \
-  popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
-  rootrem.c scan0.c scan1.c set_str.c                      \
-  sqr_basecase.c sqr_diagonal.c                                                    \
-  sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c                               \
-  tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
-
  noinst_LTLIBRARIES = libmpn.la
  nodist_libmpn_la_SOURCES = fib_table.c mp_bases.c
  libmpn_la_LIBADD = $(OFILES)
  libmpn_la_DEPENDENCIES = $(OFILES)
  TARG_DIST = a29k alpha arm clipper cray generic i960 ia64 lisp m68k m88k \
    minithres mips32 mips64 ns32k pa32 pa64 power powerpc32 powerpc64 pyr \
-  s390_32 s390_64 sh sparc32 sparc64 vax x86 x86_64 z8000 z8000x
+  s390_32 s390_64 sh sparc32 sparc64 thumb vax x86 x86_64 z8000
  
  EXTRA_DIST = asm-defs.m4 cpp-ccas m4-ccas $(TARG_DIST)
  
@@ -375,6 +351,7 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
             echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
             cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
         esac;
+$(srcdir)/Makeasm.am:
  
  $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
         cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -393,7 +370,7 @@ clean-noinstLTLIBRARIES:
           echo "rm -f \"$${dir}/so_locations\""; \
           rm -f "$${dir}/so_locations"; \
         done
-libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES) 
+libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES) $(EXTRA_libmpn_la_DEPENDENCIES) 
         $(LINK)  $(libmpn_la_OBJECTS) $(libmpn_la_LIBADD) $(LIBS)
  
  mostlyclean-compile:
@@ -401,11 +378,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -415,340 +387,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-add_.c: add.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_1_.c: add_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_1.c; then echo $(srcdir)/add_1.c; else echo add_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_n_.c: add_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_n.c; then echo $(srcdir)/add_n.c; else echo add_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_1_.c: addmul_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_1.c; then echo $(srcdir)/addmul_1.c; else echo addmul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_2_.c: addmul_2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_2.c; then echo $(srcdir)/addmul_2.c; else echo addmul_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_3_.c: addmul_3.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_3.c; then echo $(srcdir)/addmul_3.c; else echo addmul_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_4_.c: addmul_4.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_4.c; then echo $(srcdir)/addmul_4.c; else echo addmul_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_5_.c: addmul_5.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_5.c; then echo $(srcdir)/addmul_5.c; else echo addmul_5.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_6_.c: addmul_6.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_6.c; then echo $(srcdir)/addmul_6.c; else echo addmul_6.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_7_.c: addmul_7.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_7.c; then echo $(srcdir)/addmul_7.c; else echo addmul_7.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_8_.c: addmul_8.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_8.c; then echo $(srcdir)/addmul_8.c; else echo addmul_8.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-and_n_.c: and_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/and_n.c; then echo $(srcdir)/and_n.c; else echo and_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-andn_n_.c: andn_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/andn_n.c; then echo $(srcdir)/andn_n.c; else echo andn_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-binvert_.c: binvert.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/binvert.c; then echo $(srcdir)/binvert.c; else echo binvert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: cmp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-com_.c: com.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/com.c; then echo $(srcdir)/com.c; else echo com.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-copyd_.c: copyd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copyd.c; then echo $(srcdir)/copyd.c; else echo copyd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-copyi_.c: copyi.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copyi.c; then echo $(srcdir)/copyi.c; else echo copyi.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_bdiv_q_.c: dcpi1_bdiv_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_q.c; then echo $(srcdir)/dcpi1_bdiv_q.c; else echo dcpi1_bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_bdiv_qr_.c: dcpi1_bdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_qr.c; then echo $(srcdir)/dcpi1_bdiv_qr.c; else echo dcpi1_bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_div_q_.c: dcpi1_div_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_div_q.c; then echo $(srcdir)/dcpi1_div_q.c; else echo dcpi1_div_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_div_qr_.c: dcpi1_div_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_div_qr.c; then echo $(srcdir)/dcpi1_div_qr.c; else echo dcpi1_div_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_divappr_q_.c: dcpi1_divappr_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_divappr_q.c; then echo $(srcdir)/dcpi1_divappr_q.c; else echo dcpi1_divappr_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dive_1_.c: dive_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_1.c; then echo $(srcdir)/dive_1.c; else echo dive_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-diveby3_.c: diveby3.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/diveby3.c; then echo $(srcdir)/diveby3.c; else echo diveby3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divis_.c: divis.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis.c; then echo $(srcdir)/divis.c; else echo divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_.c: divrem.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem.c; then echo $(srcdir)/divrem.c; else echo divrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_1_.c: divrem_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_2_.c: divrem_2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dump_.c: dump.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fib2_ui_.c: fib2_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib2_ui.c; then echo $(srcdir)/fib2_ui.c; else echo fib2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fib_table_.c: fib_table.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib_table.c; then echo $(srcdir)/fib_table.c; else echo fib_table.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_.c: gcd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_1_.c: gcd_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd_1.c; then echo $(srcdir)/gcd_1.c; else echo gcd_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_.c: gcdext.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_.c: get_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hamdist_.c: hamdist.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hamdist.c; then echo $(srcdir)/hamdist.c; else echo hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hgcd_.c: hgcd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hgcd2_.c: hgcd2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd2.c; then echo $(srcdir)/hgcd2.c; else echo hgcd2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invert_.c: invert.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert.c; then echo $(srcdir)/invert.c; else echo invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invert_limb_.c: invert_limb.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert_limb.c; then echo $(srcdir)/invert_limb.c; else echo invert_limb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invertappr_.c: invertappr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invertappr.c; then echo $(srcdir)/invertappr.c; else echo invertappr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ior_n_.c: ior_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ior_n.c; then echo $(srcdir)/ior_n.c; else echo ior_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iorn_n_.c: iorn_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iorn_n.c; then echo $(srcdir)/iorn_n.c; else echo iorn_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacbase_.c: jacbase.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase.c; then echo $(srcdir)/jacbase.c; else echo jacbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lshift_.c: lshift.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lshift.c; then echo $(srcdir)/lshift.c; else echo lshift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_.c: mod_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_1_.c: mod_1_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_1.c; then echo $(srcdir)/mod_1_1.c; else echo mod_1_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_2_.c: mod_1_2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_2.c; then echo $(srcdir)/mod_1_2.c; else echo mod_1_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_3_.c: mod_1_3.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_3.c; then echo $(srcdir)/mod_1_3.c; else echo mod_1_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_4_.c: mod_1_4.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_4.c; then echo $(srcdir)/mod_1_4.c; else echo mod_1_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_34lsub1_.c: mod_34lsub1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_34lsub1.c; then echo $(srcdir)/mod_34lsub1.c; else echo mod_34lsub1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mode1o_.c: mode1o.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mode1o.c; then echo $(srcdir)/mode1o.c; else echo mode1o.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_bases_.c: mp_bases.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bases.c; then echo $(srcdir)/mp_bases.c; else echo mp_bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_1_.c: mul_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_1.c; then echo $(srcdir)/mul_1.c; else echo mul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_2_.c: mul_2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2.c; then echo $(srcdir)/mul_2.c; else echo mul_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_3_.c: mul_3.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_3.c; then echo $(srcdir)/mul_3.c; else echo mul_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_4_.c: mul_4.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_4.c; then echo $(srcdir)/mul_4.c; else echo mul_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_basecase_.c: mul_basecase.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_basecase.c; then echo $(srcdir)/mul_basecase.c; else echo mul_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_fft_.c: mul_fft.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_n_.c: mul_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mullo_basecase_.c: mullo_basecase.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullo_basecase.c; then echo $(srcdir)/mullo_basecase.c; else echo mullo_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mullo_n_.c: mullo_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullo_n.c; then echo $(srcdir)/mullo_n.c; else echo mullo_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mulmod_bnm1_.c: mulmod_bnm1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mulmod_bnm1.c; then echo $(srcdir)/mulmod_bnm1.c; else echo mulmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nand_n_.c: nand_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nand_n.c; then echo $(srcdir)/nand_n.c; else echo nand_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-neg_.c: neg.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nior_n_.c: nior_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nior_n.c; then echo $(srcdir)/nior_n.c; else echo nior_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nussbaumer_mul_.c: nussbaumer_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nussbaumer_mul.c; then echo $(srcdir)/nussbaumer_mul.c; else echo nussbaumer_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-perfsqr_.c: perfsqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfsqr.c; then echo $(srcdir)/perfsqr.c; else echo perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-popcount_.c: popcount.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/popcount.c; then echo $(srcdir)/popcount.c; else echo popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pow_1_.c: pow_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_1.c; then echo $(srcdir)/pow_1.c; else echo pow_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pre_mod_1_.c: pre_mod_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_mod_1.c; then echo $(srcdir)/pre_mod_1.c; else echo pre_mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random_.c: random.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random.c; then echo $(srcdir)/random.c; else echo random.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random2_.c: random2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rootrem_.c: rootrem.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rootrem.c; then echo $(srcdir)/rootrem.c; else echo rootrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rshift_.c: rshift.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rshift.c; then echo $(srcdir)/rshift.c; else echo rshift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_bdiv_q_.c: sbpi1_bdiv_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_bdiv_q.c; then echo $(srcdir)/sbpi1_bdiv_q.c; else echo sbpi1_bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_bdiv_qr_.c: sbpi1_bdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_bdiv_qr.c; then echo $(srcdir)/sbpi1_bdiv_qr.c; else echo sbpi1_bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_div_q_.c: sbpi1_div_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_div_q.c; then echo $(srcdir)/sbpi1_div_q.c; else echo sbpi1_div_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_div_qr_.c: sbpi1_div_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_div_qr.c; then echo $(srcdir)/sbpi1_div_qr.c; else echo sbpi1_div_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_divappr_q_.c: sbpi1_divappr_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_divappr_q.c; then echo $(srcdir)/sbpi1_divappr_q.c; else echo sbpi1_divappr_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scan0_.c: scan0.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan0.c; then echo $(srcdir)/scan0.c; else echo scan0.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scan1_.c: scan1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan1.c; then echo $(srcdir)/scan1.c; else echo scan1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqr_diagonal_.c: sqr_diagonal.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_diagonal.c; then echo $(srcdir)/sqr_diagonal.c; else echo sqr_diagonal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrmod_bnm1_.c: sqrmod_bnm1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrmod_bnm1.c; then echo $(srcdir)/sqrmod_bnm1.c; else echo sqrmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrtrem_.c: sqrtrem.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrtrem.c; then echo $(srcdir)/sqrtrem.c; else echo sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_.c: sub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_1_.c: sub_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_1.c; then echo $(srcdir)/sub_1.c; else echo sub_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_n_.c: sub_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_n.c; then echo $(srcdir)/sub_n.c; else echo sub_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-submul_1_.c: submul_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/submul_1.c; then echo $(srcdir)/submul_1.c; else echo submul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom22_mul_.c: toom22_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom22_mul.c; then echo $(srcdir)/toom22_mul.c; else echo toom22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom2_sqr_.c: toom2_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom2_sqr.c; then echo $(srcdir)/toom2_sqr.c; else echo toom2_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom32_mul_.c: toom32_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom32_mul.c; then echo $(srcdir)/toom32_mul.c; else echo toom32_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom33_mul_.c: toom33_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom33_mul.c; then echo $(srcdir)/toom33_mul.c; else echo toom33_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom3_sqr_.c: toom3_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom3_sqr.c; then echo $(srcdir)/toom3_sqr.c; else echo toom3_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom42_mul_.c: toom42_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom42_mul.c; then echo $(srcdir)/toom42_mul.c; else echo toom42_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom43_mul_.c: toom43_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom43_mul.c; then echo $(srcdir)/toom43_mul.c; else echo toom43_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom44_mul_.c: toom44_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom44_mul.c; then echo $(srcdir)/toom44_mul.c; else echo toom44_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom4_sqr_.c: toom4_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom4_sqr.c; then echo $(srcdir)/toom4_sqr.c; else echo toom4_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom52_mul_.c: toom52_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom52_mul.c; then echo $(srcdir)/toom52_mul.c; else echo toom52_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom53_mul_.c: toom53_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom53_mul.c; then echo $(srcdir)/toom53_mul.c; else echo toom53_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom62_mul_.c: toom62_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom62_mul.c; then echo $(srcdir)/toom62_mul.c; else echo toom62_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom63_mul_.c: toom63_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom63_mul.c; then echo $(srcdir)/toom63_mul.c; else echo toom63_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom6_sqr_.c: toom6_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6_sqr.c; then echo $(srcdir)/toom6_sqr.c; else echo toom6_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom6h_mul_.c: toom6h_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6h_mul.c; then echo $(srcdir)/toom6h_mul.c; else echo toom6h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom8_sqr_.c: toom8_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8_sqr.c; then echo $(srcdir)/toom8_sqr.c; else echo toom8_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom8h_mul_.c: toom8h_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8h_mul.c; then echo $(srcdir)/toom8h_mul.c; else echo toom8h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_couple_handling_.c: toom_couple_handling.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_couple_handling.c; then echo $(srcdir)/toom_couple_handling.c; else echo toom_couple_handling.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_dgr3_pm1_.c: toom_eval_dgr3_pm1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_dgr3_pm1.c; then echo $(srcdir)/toom_eval_dgr3_pm1.c; else echo toom_eval_dgr3_pm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_dgr3_pm2_.c: toom_eval_dgr3_pm2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_dgr3_pm2.c; then echo $(srcdir)/toom_eval_dgr3_pm2.c; else echo toom_eval_dgr3_pm2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_pm1_.c: toom_eval_pm1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_pm1.c; then echo $(srcdir)/toom_eval_pm1.c; else echo toom_eval_pm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_pm2exp_.c: toom_eval_pm2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_pm2exp.c; then echo $(srcdir)/toom_eval_pm2exp.c; else echo toom_eval_pm2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_pm2rexp_.c: toom_eval_pm2rexp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_pm2rexp.c; then echo $(srcdir)/toom_eval_pm2rexp.c; else echo toom_eval_pm2rexp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_12pts_.c: toom_interpolate_12pts.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_12pts.c; then echo $(srcdir)/toom_interpolate_12pts.c; else echo toom_interpolate_12pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_16pts_.c: toom_interpolate_16pts.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_16pts.c; then echo $(srcdir)/toom_interpolate_16pts.c; else echo toom_interpolate_16pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_5pts_.c: toom_interpolate_5pts.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_5pts.c; then echo $(srcdir)/toom_interpolate_5pts.c; else echo toom_interpolate_5pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_6pts_.c: toom_interpolate_6pts.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_6pts.c; then echo $(srcdir)/toom_interpolate_6pts.c; else echo toom_interpolate_6pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_7pts_.c: toom_interpolate_7pts.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_7pts.c; then echo $(srcdir)/toom_interpolate_7pts.c; else echo toom_interpolate_7pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_8pts_.c: toom_interpolate_8pts.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_8pts.c; then echo $(srcdir)/toom_interpolate_8pts.c; else echo toom_interpolate_8pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-udiv_qrnnd_.c: udiv_qrnnd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/udiv_qrnnd.c; then echo $(srcdir)/udiv_qrnnd.c; else echo udiv_qrnnd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-udiv_w_sdiv_.c: udiv_w_sdiv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/udiv_w_sdiv.c; then echo $(srcdir)/udiv_w_sdiv.c; else echo udiv_w_sdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-xnor_n_.c: xnor_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xnor_n.c; then echo $(srcdir)/xnor_n.c; else echo xnor_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-xor_n_.c: xor_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xor_n.c; then echo $(srcdir)/xor_n.c; else echo xor_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_.$(OBJEXT) add_.lo add_1_.$(OBJEXT) add_1_.lo add_n_.$(OBJEXT) \
-add_n_.lo addmul_1_.$(OBJEXT) addmul_1_.lo addmul_2_.$(OBJEXT) \
-addmul_2_.lo addmul_3_.$(OBJEXT) addmul_3_.lo addmul_4_.$(OBJEXT) \
-addmul_4_.lo addmul_5_.$(OBJEXT) addmul_5_.lo addmul_6_.$(OBJEXT) \
-addmul_6_.lo addmul_7_.$(OBJEXT) addmul_7_.lo addmul_8_.$(OBJEXT) \
-addmul_8_.lo and_n_.$(OBJEXT) and_n_.lo andn_n_.$(OBJEXT) andn_n_.lo \
-binvert_.$(OBJEXT) binvert_.lo cmp_.$(OBJEXT) cmp_.lo com_.$(OBJEXT) \
-com_.lo copyd_.$(OBJEXT) copyd_.lo copyi_.$(OBJEXT) copyi_.lo \
-dcpi1_bdiv_q_.$(OBJEXT) dcpi1_bdiv_q_.lo dcpi1_bdiv_qr_.$(OBJEXT) \
-dcpi1_bdiv_qr_.lo dcpi1_div_q_.$(OBJEXT) dcpi1_div_q_.lo \
-dcpi1_div_qr_.$(OBJEXT) dcpi1_div_qr_.lo dcpi1_divappr_q_.$(OBJEXT) \
-dcpi1_divappr_q_.lo dive_1_.$(OBJEXT) dive_1_.lo diveby3_.$(OBJEXT) \
-diveby3_.lo divis_.$(OBJEXT) divis_.lo divrem_.$(OBJEXT) divrem_.lo \
-divrem_1_.$(OBJEXT) divrem_1_.lo divrem_2_.$(OBJEXT) divrem_2_.lo \
-dump_.$(OBJEXT) dump_.lo fib2_ui_.$(OBJEXT) fib2_ui_.lo \
-fib_table_.$(OBJEXT) fib_table_.lo gcd_.$(OBJEXT) gcd_.lo \
-gcd_1_.$(OBJEXT) gcd_1_.lo gcdext_.$(OBJEXT) gcdext_.lo \
-get_d_.$(OBJEXT) get_d_.lo get_str_.$(OBJEXT) get_str_.lo \
-hamdist_.$(OBJEXT) hamdist_.lo hgcd_.$(OBJEXT) hgcd_.lo \
-hgcd2_.$(OBJEXT) hgcd2_.lo invert_.$(OBJEXT) invert_.lo \
-invert_limb_.$(OBJEXT) invert_limb_.lo invertappr_.$(OBJEXT) \
-invertappr_.lo ior_n_.$(OBJEXT) ior_n_.lo iorn_n_.$(OBJEXT) iorn_n_.lo \
-jacbase_.$(OBJEXT) jacbase_.lo lshift_.$(OBJEXT) lshift_.lo \
-matrix22_mul_.$(OBJEXT) matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo \
-mod_1_1_.$(OBJEXT) mod_1_1_.lo mod_1_2_.$(OBJEXT) mod_1_2_.lo \
-mod_1_3_.$(OBJEXT) mod_1_3_.lo mod_1_4_.$(OBJEXT) mod_1_4_.lo \
-mod_34lsub1_.$(OBJEXT) mod_34lsub1_.lo mode1o_.$(OBJEXT) mode1o_.lo \
-mp_bases_.$(OBJEXT) mp_bases_.lo mul_.$(OBJEXT) mul_.lo \
-mul_1_.$(OBJEXT) mul_1_.lo mul_2_.$(OBJEXT) mul_2_.lo mul_3_.$(OBJEXT) \
-mul_3_.lo mul_4_.$(OBJEXT) mul_4_.lo mul_basecase_.$(OBJEXT) \
-mul_basecase_.lo mul_fft_.$(OBJEXT) mul_fft_.lo mul_n_.$(OBJEXT) \
-mul_n_.lo mullo_basecase_.$(OBJEXT) mullo_basecase_.lo \
-mullo_n_.$(OBJEXT) mullo_n_.lo mulmod_bnm1_.$(OBJEXT) mulmod_bnm1_.lo \
-nand_n_.$(OBJEXT) nand_n_.lo neg_.$(OBJEXT) neg_.lo nior_n_.$(OBJEXT) \
-nior_n_.lo nussbaumer_mul_.$(OBJEXT) nussbaumer_mul_.lo \
-perfsqr_.$(OBJEXT) perfsqr_.lo popcount_.$(OBJEXT) popcount_.lo \
-pow_1_.$(OBJEXT) pow_1_.lo pre_divrem_1_.$(OBJEXT) pre_divrem_1_.lo \
-pre_mod_1_.$(OBJEXT) pre_mod_1_.lo random_.$(OBJEXT) random_.lo \
-random2_.$(OBJEXT) random2_.lo rootrem_.$(OBJEXT) rootrem_.lo \
-rshift_.$(OBJEXT) rshift_.lo sbpi1_bdiv_q_.$(OBJEXT) sbpi1_bdiv_q_.lo \
-sbpi1_bdiv_qr_.$(OBJEXT) sbpi1_bdiv_qr_.lo sbpi1_div_q_.$(OBJEXT) \
-sbpi1_div_q_.lo sbpi1_div_qr_.$(OBJEXT) sbpi1_div_qr_.lo \
-sbpi1_divappr_q_.$(OBJEXT) sbpi1_divappr_q_.lo scan0_.$(OBJEXT) \
-scan0_.lo scan1_.$(OBJEXT) scan1_.lo set_str_.$(OBJEXT) set_str_.lo \
-sqr_basecase_.$(OBJEXT) sqr_basecase_.lo sqr_diagonal_.$(OBJEXT) \
-sqr_diagonal_.lo sqrmod_bnm1_.$(OBJEXT) sqrmod_bnm1_.lo \
-sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) sub_.lo sub_1_.$(OBJEXT) \
-sub_1_.lo sub_n_.$(OBJEXT) sub_n_.lo submul_1_.$(OBJEXT) submul_1_.lo \
-tdiv_qr_.$(OBJEXT) tdiv_qr_.lo toom22_mul_.$(OBJEXT) toom22_mul_.lo \
-toom2_sqr_.$(OBJEXT) toom2_sqr_.lo toom32_mul_.$(OBJEXT) \
-toom32_mul_.lo toom33_mul_.$(OBJEXT) toom33_mul_.lo \
-toom3_sqr_.$(OBJEXT) toom3_sqr_.lo toom42_mul_.$(OBJEXT) \
-toom42_mul_.lo toom43_mul_.$(OBJEXT) toom43_mul_.lo \
-toom44_mul_.$(OBJEXT) toom44_mul_.lo toom4_sqr_.$(OBJEXT) \
-toom4_sqr_.lo toom52_mul_.$(OBJEXT) toom52_mul_.lo \
-toom53_mul_.$(OBJEXT) toom53_mul_.lo toom62_mul_.$(OBJEXT) \
-toom62_mul_.lo toom63_mul_.$(OBJEXT) toom63_mul_.lo \
-toom6_sqr_.$(OBJEXT) toom6_sqr_.lo toom6h_mul_.$(OBJEXT) \
-toom6h_mul_.lo toom8_sqr_.$(OBJEXT) toom8_sqr_.lo \
-toom8h_mul_.$(OBJEXT) toom8h_mul_.lo toom_couple_handling_.$(OBJEXT) \
-toom_couple_handling_.lo toom_eval_dgr3_pm1_.$(OBJEXT) \
-toom_eval_dgr3_pm1_.lo toom_eval_dgr3_pm2_.$(OBJEXT) \
-toom_eval_dgr3_pm2_.lo toom_eval_pm1_.$(OBJEXT) toom_eval_pm1_.lo \
-toom_eval_pm2exp_.$(OBJEXT) toom_eval_pm2exp_.lo \
-toom_eval_pm2rexp_.$(OBJEXT) toom_eval_pm2rexp_.lo \
-toom_interpolate_12pts_.$(OBJEXT) toom_interpolate_12pts_.lo \
-toom_interpolate_16pts_.$(OBJEXT) toom_interpolate_16pts_.lo \
-toom_interpolate_5pts_.$(OBJEXT) toom_interpolate_5pts_.lo \
-toom_interpolate_6pts_.$(OBJEXT) toom_interpolate_6pts_.lo \
-toom_interpolate_7pts_.$(OBJEXT) toom_interpolate_7pts_.lo \
-toom_interpolate_8pts_.$(OBJEXT) toom_interpolate_8pts_.lo \
-udiv_qrnnd_.$(OBJEXT) udiv_qrnnd_.lo udiv_w_sdiv_.$(OBJEXT) \
-udiv_w_sdiv_.lo xnor_n_.$(OBJEXT) xnor_n_.lo xor_n_.$(OBJEXT) \
-xor_n_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -852,10 +490,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -923,7 +566,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -936,7 +579,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool clean-noinstLTLIBRARIES ctags distclean \
@@ -948,9 +591,8 @@ uninstall-am:
         install-pdf install-pdf-am install-ps install-ps-am \
         install-strip installcheck installcheck-am installdirs \
         maintainer-clean maintainer-clean-generic mostlyclean \
-       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
-       uninstall-am
+       mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+       pdf pdf-am ps ps-am tags uninstall uninstall-am
  
  
  # These are BUILT_SOURCES at the top-level, so normally they're built before
@@ -963,9 +605,6 @@ mp_bases.c:
  perfsqr.h:
         cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/perfsqr.h
  
-tune-gcd-p: gcd.c
-       $(COMPILE) -g -O1 -I $(top_srcdir)/tune -DTUNE_GCD_P=1 gcd.c -o tune-gcd-p -L ../.libs -L../tune/.libs -lspeed -lgmp -lm
-
  # .s assembler, no preprocessing.
  #
  .s.o:
diff --git a/mpn/alpha/add_n.asm b/mpn/alpha/add_n.asm

index e24c3cb9c8c79bc51156898f89073bd7d3b2786d..819053b8aedfa91bd6b23f00bc7ef1dff0c3c419 100644 (file)
--- a/mpn/alpha/add_n.asm
+++ b/mpn/alpha/add_n.asm
@@ -1,7 +1,7 @@
  dnl  Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
  dnl  store sum in a third limb vector.
  
-dnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -32,9 +32,13 @@ dnl  s2_ptr  r18
  dnl  size      r19
  
  ASM_START()
+PROLOGUE(mpn_add_nc)
+       bis     r20,r31,r25
+       br      L(com)
+EPILOGUE()
  PROLOGUE(mpn_add_n)
         bis     r31,r31,r25             C clear cy
-       subq    r19,4,r19               C decr loop cnt
+L(com):        subq    r19,4,r19               C decr loop cnt
         blt     r19,$Lend2              C if less than 4 limbs, goto 2nd loop
  C Start software pipeline for 1st loop
         ldq     r0,0(r18)
@@ -42,13 +46,16 @@ C Start software pipeline for 1st loop
         ldq     r1,8(r18)
         ldq     r5,8(r17)
         addq    r17,32,r17              C update s1_ptr
+       addq    r0,r4,r28               C 1st main add
         ldq     r2,16(r18)
-       addq    r0,r4,r20               C 1st main add
+       addq    r25,r28,r20             C 1st carry add
         ldq     r3,24(r18)
-       subq    r19,4,r19               C decr loop cnt
+       cmpult  r28,r4,r8               C compute cy from last add
         ldq     r6,-16(r17)
-       cmpult  r20,r0,r25              C compute cy from last add
+       cmpult  r20,r28,r25             C compute cy from last add
         ldq     r7,-8(r17)
+       bis     r8,r25,r25              C combine cy from the two adds
+       subq    r19,4,r19               C decr loop cnt
         addq    r1,r5,r28               C 2nd main add
         addq    r18,32,r18              C update s2_ptr
         addq    r28,r25,r21             C 2nd carry add
@@ -142,5 +149,5 @@ $Lend0:     addq    r0,r4,r28               C main add
  
  $Lret: bis     r25,r31,r0              C return cy
         ret     r31,(r26),1
-EPILOGUE(mpn_add_n)
+EPILOGUE()
  ASM_END()
diff --git a/mpn/alpha/ev5/gmp-mparam.h b/mpn/alpha/ev5/gmp-mparam.h

index c2e7505e6f2a4e3f87cbbb348a9defa251f0fea0..e4396e5db9092742ba08dd1de2f2a3bd8f984c27 100644 (file)
--- a/mpn/alpha/ev5/gmp-mparam.h
+++ b/mpn/alpha/ev5/gmp-mparam.h
@@ -26,37 +26,42 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
  #define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      2
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         32
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
  #define MOD_1U_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         7
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        12
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     73
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        78
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
  #define USE_PREINV_DIVREM_1                  1  /* preinv always */
+#define DIV_QR_2_PI2_THRESHOLD              25
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           87
+#define BMOD_1_TO_MOD_1_THRESHOLD           80
  
-#define MUL_TOOM22_THRESHOLD                16
-#define MUL_TOOM33_THRESHOLD                53
-#define MUL_TOOM44_THRESHOLD               121
-#define MUL_TOOM6H_THRESHOLD               173
+#define MUL_TOOM22_THRESHOLD                14
+#define MUL_TOOM33_THRESHOLD                66
+#define MUL_TOOM44_THRESHOLD               118
+#define MUL_TOOM6H_THRESHOLD               157
  #define MUL_TOOM8H_THRESHOLD               236
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      84
  #define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      66
  
  #define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 28
-#define SQR_TOOM3_THRESHOLD                 78
-#define SQR_TOOM4_THRESHOLD                136
-#define SQR_TOOM6_THRESHOLD                180
+#define SQR_TOOM2_THRESHOLD                 26
+#define SQR_TOOM3_THRESHOLD                 77
+#define SQR_TOOM4_THRESHOLD                130
+#define SQR_TOOM6_THRESHOLD                173
  #define SQR_TOOM8_THRESHOLD                260
  
+#define MULMID_TOOM42_THRESHOLD             20
+
  #define MULMOD_BNM1_THRESHOLD               11
-#define SQRMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               13
  
  #define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
@@ -99,7 +104,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MUL_FFT_TABLE3_SIZE 141
  #define MUL_FFT_THRESHOLD                 3008
  
-#define SQR_FFT_MODF_THRESHOLD             220  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             212  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
    { {    220, 5}, {     13, 6}, {     15, 7}, {      8, 6}, \
      {     17, 7}, {      9, 6}, {     19, 7}, {     13, 8}, \
@@ -136,37 +141,44 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
      {2097152,22}, {4194304,23}, {8388608,24} }
  #define SQR_FFT_TABLE3_SIZE 135
-#define SQR_FFT_THRESHOLD                 2240
+#define SQR_FFT_THRESHOLD                 1984
  
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  55
-#define MULLO_MUL_N_THRESHOLD             5558
+#define MULLO_BASECASE_THRESHOLD             2
+#define MULLO_DC_THRESHOLD                  50
+#define MULLO_MUL_N_THRESHOLD             5397
  
-#define DC_DIV_QR_THRESHOLD                 55
-#define DC_DIVAPPR_Q_THRESHOLD             192
+#define DC_DIV_QR_THRESHOLD                 52
+#define DC_DIVAPPR_Q_THRESHOLD             172
  #define DC_BDIV_QR_THRESHOLD                51
-#define DC_BDIV_Q_THRESHOLD                120
+#define DC_BDIV_Q_THRESHOLD                112
  
-#define INV_MULMOD_BNM1_THRESHOLD           61
-#define INV_NEWTON_THRESHOLD               174
+#define INV_MULMOD_BNM1_THRESHOLD           38
+#define INV_NEWTON_THRESHOLD               179
  #define INV_APPR_THRESHOLD                 180
  
-#define BINV_NEWTON_THRESHOLD              199
-#define REDC_1_TO_REDC_N_THRESHOLD          55
+#define BINV_NEWTON_THRESHOLD              197
+#define REDC_1_TO_REDC_N_THRESHOLD          51
  
-#define MU_DIV_QR_THRESHOLD                979
+#define MU_DIV_QR_THRESHOLD                998
  #define MU_DIVAPPR_Q_THRESHOLD             998
  #define MUPI_DIV_QR_THRESHOLD               90
-#define MU_BDIV_QR_THRESHOLD               792
-#define MU_BDIV_Q_THRESHOLD                942
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                      94
-#define GCD_DC_THRESHOLD                   306
-#define GCDEXT_DC_THRESHOLD                210
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                16
-#define GET_STR_PRECOMPUTE_THRESHOLD        31
-#define SET_STR_DC_THRESHOLD               422
-#define SET_STR_PRECOMPUTE_THRESHOLD      1524
+#define MU_BDIV_QR_THRESHOLD               807
+#define MU_BDIV_Q_THRESHOLD               1078
+
+#define POWM_SEC_TABLE  2,17,188,393
+
+#define MATRIX22_STRASSEN_THRESHOLD         11
+#define HGCD_THRESHOLD                     105
+#define HGCD_APPR_THRESHOLD                105
+#define HGCD_REDUCE_THRESHOLD             1494
+#define GCD_DC_THRESHOLD                   285
+#define GCDEXT_DC_THRESHOLD                206
+#define JACOBI_BASE_METHOD                   3
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        29
+#define SET_STR_DC_THRESHOLD               426
+#define SET_STR_PRECOMPUTE_THRESHOLD      1535
+
+#define FAC_DSC_THRESHOLD                 1502
+#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/mpn/alpha/ev6/gmp-mparam.h b/mpn/alpha/ev6/gmp-mparam.h

index 7541a4ecf731fdc013c47f05dd260232cf05ad28..9932ec1fbd2232422be0238cfcc7b1aa44d8d065 100644 (file)
--- a/mpn/alpha/ev6/gmp-mparam.h
+++ b/mpn/alpha/ev6/gmp-mparam.h
@@ -29,39 +29,44 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
  #define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      2
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
  #define MOD_1N_TO_MOD_1_1_THRESHOLD          3
  #define MOD_1U_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        30
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        17
  #define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
  #define USE_PREINV_DIVREM_1                  1  /* preinv always */
+#define DIV_QR_2_PI2_THRESHOLD               8
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           18
-
-#define MUL_TOOM22_THRESHOLD                35
-#define MUL_TOOM33_THRESHOLD                74
-#define MUL_TOOM44_THRESHOLD               178
-#define MUL_TOOM6H_THRESHOLD               288
-#define MUL_TOOM8H_THRESHOLD               333
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      75
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     101
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     105
-
-#define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 61
-#define SQR_TOOM3_THRESHOLD                107
-#define SQR_TOOM4_THRESHOLD                170
-#define SQR_TOOM6_THRESHOLD                309
-#define SQR_TOOM8_THRESHOLD                360
-
-#define MULMOD_BNM1_THRESHOLD               20
+#define BMOD_1_TO_MOD_1_THRESHOLD           19
+
+#define MUL_TOOM22_THRESHOLD                32
+#define MUL_TOOM33_THRESHOLD               105
+#define MUL_TOOM44_THRESHOLD               166
+#define MUL_TOOM6H_THRESHOLD               232
+#define MUL_TOOM8H_THRESHOLD               357
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      96
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     110
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      93
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     113
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     133
+
+#define SQR_BASECASE_THRESHOLD               4
+#define SQR_TOOM2_THRESHOLD                 60
+#define SQR_TOOM3_THRESHOLD                102
+#define SQR_TOOM4_THRESHOLD                155
+#define SQR_TOOM6_THRESHOLD                306
+#define SQR_TOOM8_THRESHOLD                333
+
+#define MULMID_TOOM42_THRESHOLD             52
+
+#define MULMOD_BNM1_THRESHOLD               15
  #define SQRMOD_BNM1_THRESHOLD               23
  
-#define MUL_FFT_MODF_THRESHOLD             480  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             412  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
    { {    480, 5}, {     18, 6}, {     10, 5}, {     21, 6}, \
      {     11, 5}, {     23, 6}, {     12, 5}, {     25, 6}, \
@@ -104,7 +109,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MUL_FFT_TABLE3_SIZE 151
  #define MUL_FFT_THRESHOLD                 5760
  
-#define SQR_FFT_MODF_THRESHOLD             476  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             412  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
    { {    476, 5}, {     19, 6}, {     10, 5}, {     23, 6}, \
      {     12, 5}, {     25, 6}, {     27, 7}, {     14, 6}, \
@@ -145,37 +150,44 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
      {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
  #define SQR_FFT_TABLE3_SIZE 152
-#define SQR_FFT_THRESHOLD                 3136
+#define SQR_FFT_THRESHOLD                 4224
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 101
-#define MULLO_MUL_N_THRESHOLD            15604
+#define MULLO_DC_THRESHOLD                 113
+#define MULLO_MUL_N_THRESHOLD            11278
  
-#define DC_DIV_QR_THRESHOLD                119
+#define DC_DIV_QR_THRESHOLD                112
  #define DC_DIVAPPR_Q_THRESHOLD             390
  #define DC_BDIV_QR_THRESHOLD               110
-#define DC_BDIV_Q_THRESHOLD                318
+#define DC_BDIV_Q_THRESHOLD                286
  
-#define INV_MULMOD_BNM1_THRESHOLD           79
-#define INV_NEWTON_THRESHOLD               387
-#define INV_APPR_THRESHOLD                 381
+#define INV_MULMOD_BNM1_THRESHOLD           62
+#define INV_NEWTON_THRESHOLD               393
+#define INV_APPR_THRESHOLD                 375
  
-#define BINV_NEWTON_THRESHOLD              393
-#define REDC_1_TO_REDC_N_THRESHOLD         110
+#define BINV_NEWTON_THRESHOLD              390
+#define REDC_1_TO_REDC_N_THRESHOLD         124
  
-#define MU_DIV_QR_THRESHOLD               1718
-#define MU_DIVAPPR_Q_THRESHOLD            1895
-#define MUPI_DIV_QR_THRESHOLD              180
-#define MU_BDIV_QR_THRESHOLD              1387
+#define MU_DIV_QR_THRESHOLD               1652
+#define MU_DIVAPPR_Q_THRESHOLD            1685
+#define MUPI_DIV_QR_THRESHOLD              171
+#define MU_BDIV_QR_THRESHOLD              1470
  #define MU_BDIV_Q_THRESHOLD               1652
  
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     282
-#define GCD_DC_THRESHOLD                  1138
-#define GCDEXT_DC_THRESHOLD                773
+#define POWM_SEC_TABLE  2,23,88,387,961,2578
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                     278
+#define HGCD_APPR_THRESHOLD                357
+#define HGCD_REDUCE_THRESHOLD             2899
+#define GCD_DC_THRESHOLD                  1258
+#define GCDEXT_DC_THRESHOLD                777
  #define JACOBI_BASE_METHOD                   3
  
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        19
-#define SET_STR_DC_THRESHOLD              3754
-#define SET_STR_PRECOMPUTE_THRESHOLD      8097
+#define GET_STR_DC_THRESHOLD                15
+#define GET_STR_PRECOMPUTE_THRESHOLD        24
+#define SET_STR_DC_THRESHOLD              3866
+#define SET_STR_PRECOMPUTE_THRESHOLD      7708
+
+#define FAC_DSC_THRESHOLD                 1025
+#define FAC_ODD_THRESHOLD                   24
diff --git a/mpn/alpha/ev6/mod_1_4.asm b/mpn/alpha/ev6/mod_1_4.asm

new file mode 100644 (file)

index 0000000..d833af5
--- /dev/null
+++ b/mpn/alpha/ev6/mod_1_4.asm
@@ -0,0 +1,326 @@
+dnl Alpha mpn_mod_1s_4p
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C  * Optimise.  2.75 c/l should be possible.
+C  * Write a proper mpn_mod_1s_4p_cps.  The code below was compiler generated.
+C  * Optimise feed-in code, starting the sw pipeline in switch code.
+C  * Shorten software pipeline.  The mul instructions are scheduled too far
+C    from their users.  Fixing this will allow us to use fewer registers.
+C  * If we cannot reduce register usage, write perhaps small-n basecase.
+C  * Does this work for PIC?
+
+C      cycles/limb
+C EV4:     ?
+C EV5:    23
+C EV6:     3
+
+define(`ap',     `r16')
+define(`n',      `r17')
+define(`pl',     `r24')
+define(`ph',     `r25')
+define(`rl',     `r6')
+define(`rh',     `r7')
+define(`B1modb', `r1')
+define(`B2modb', `r2')
+define(`B3modb', `r3')
+define(`B4modb', `r4')
+define(`B5modb', `r5')
+
+ASM_START()
+PROLOGUE(mpn_mod_1s_4p)
+       lda     r30, -64(r30)
+       stq     r9, 8(r30)
+       ldq     B1modb, 16(r19)
+       stq     r10, 16(r30)
+       ldq     B2modb, 24(r19)
+       stq     r11, 24(r30)
+       ldq     B3modb, 32(r19)
+       stq     r12, 32(r30)
+       ldq     B4modb, 40(r19)
+       stq     r13, 40(r30)
+       ldq     B5modb, 48(r19)
+       s8addq  n, ap, ap               C point ap at vector end
+
+       and     n, 3, r0
+       lda     n, -4(n)
+       beq     r0, L(b0)
+       lda     r6, -2(r0)
+       blt     r6, L(b1)
+       beq     r6, L(b2)
+
+L(b3): ldq     r21, -16(ap)
+       ldq     r22, -8(ap)
+       ldq     r20, -24(ap)
+       mulq    r21, B1modb, r8
+       umulh   r21, B1modb, r12
+       mulq    r22, B2modb, r9
+       umulh   r22, B2modb, r13
+       addq    r8, r20, pl
+       cmpult  pl, r8, r0
+       addq    r0, r12, ph
+       addq    r9, pl, rl
+       cmpult  rl, r9, r0
+       addq    r13, ph, ph
+       addq    r0, ph, rh
+       lda     ap, -56(ap)
+       br      L(com)
+
+L(b0): ldq     r21, -24(ap)
+       ldq     r22, -16(ap)
+       ldq     r23, -8(ap)
+       ldq     r20, -32(ap)
+       mulq    r21, B1modb, r8
+       umulh   r21, B1modb, r12
+       mulq    r22, B2modb, r9
+       umulh   r22, B2modb, r13
+       mulq    r23, B3modb, r10
+       umulh   r23, B3modb, r27
+       addq    r8, r20, pl
+       cmpult  pl, r8, r0
+       addq    r0, r12, ph
+       addq    r9, pl, pl
+       cmpult  pl, r9, r0
+       addq    r13, ph, ph
+       addq    r0, ph, ph
+       addq    r10, pl, rl
+       cmpult  rl, r10, r0
+       addq    r27, ph, ph
+       addq    r0, ph, rh
+       lda     ap, -64(ap)
+       br      L(com)
+
+L(b1): bis     r31, r31, rh
+       ldq     rl, -8(ap)
+       lda     ap, -40(ap)
+       br      L(com)
+
+L(b2): ldq     rh, -8(ap)
+       ldq     rl, -16(ap)
+       lda     ap, -48(ap)
+
+L(com):        ble     n, L(ed3)
+       ldq     r21, 8(ap)
+       ldq     r22, 16(ap)
+       ldq     r23, 24(ap)
+       ldq     r20, 0(ap)
+       lda     n, -4(n)
+       lda     ap, -32(ap)
+       mulq    r21, B1modb, r8
+       umulh   r21, B1modb, r12
+       mulq    r22, B2modb, r9
+       umulh   r22, B2modb, r13
+       mulq    r23, B3modb, r10
+       umulh   r23, B3modb, r27
+       mulq    rl, B4modb, r11
+       umulh   rl, B4modb, r28
+       ble     n, L(ed2)
+
+       ALIGN(16)
+L(top):        ldq     r21, 8(ap)
+       mulq    rh, B5modb, rl
+       addq    r8, r20, pl
+       ldq     r22, 16(ap)
+       cmpult  pl, r8, r0
+       umulh   rh, B5modb, rh
+       ldq     r23, 24(ap)
+       addq    r0, r12, ph
+       addq    r9, pl, pl
+       mulq    r21, B1modb, r8
+       cmpult  pl, r9, r0
+       addq    r13, ph, ph
+       umulh   r21, B1modb, r12
+       lda     ap, -32(ap)
+       addq    r0, ph, ph
+       addq    r10, pl, pl
+       mulq    r22, B2modb, r9
+       cmpult  pl, r10, r0
+       addq    r27, ph, ph
+       addq    r11, pl, pl
+       umulh   r22, B2modb, r13
+       addq    r0, ph, ph
+       cmpult  pl, r11, r0
+       addq    r28, ph, ph
+       mulq    r23, B3modb, r10
+       ldq     r20, 32(ap)
+       addq    pl, rl, rl
+       umulh   r23, B3modb, r27
+       addq    r0, ph, ph
+       cmpult  rl, pl, r0
+       mulq    rl, B4modb, r11
+       addq    ph, rh, rh
+       umulh   rl, B4modb, r28
+       addq    r0, rh, rh
+       lda     n, -4(n)
+       bgt     n, L(top)
+
+L(ed2):        mulq    rh, B5modb, rl
+       addq    r8, r20, pl
+       umulh   rh, B5modb, rh
+       cmpult  pl, r8, r0
+       addq    r0, r12, ph
+       addq    r9, pl, pl
+       cmpult  pl, r9, r0
+       addq    r13, ph, ph
+       addq    r0, ph, ph
+       addq    r10, pl, pl
+       cmpult  pl, r10, r0
+       addq    r27, ph, ph
+       addq    r11, pl, pl
+       addq    r0, ph, ph
+       cmpult  pl, r11, r0
+       addq    r28, ph, ph
+       addq    pl, rl, rl
+       addq    r0, ph, ph
+       cmpult  rl, pl, r0
+       addq    ph, rh, rh
+       addq    r0, rh, rh
+
+L(ed3):        mulq    rh, B1modb, r8
+       umulh   rh, B1modb, rh
+       addq    r8, rl, rl
+       cmpult  rl, r8, r0
+       addq    r0, rh, rh
+
+       ldq     r24, 8(r19)             C cnt
+       sll     rh, r24, rh
+       subq    r31, r24, r25
+       srl     rl, r25, r2
+       sll     rl, r24, rl
+       or      r2, rh, rh
+
+       ldq     r23, 0(r19)             C bi
+       mulq    rh, r23, r8
+       umulh   rh, r23, r9
+       addq    rh, 1, r7
+       addq    r8, rl, r8              C ql
+       cmpult  r8, rl, r0
+       addq    r9, r7, r9
+       addq    r0, r9, r9              C qh
+       mulq    r9, r18, r21            C qh * b
+       subq    rl, r21, rl
+       cmpult  r8, rl, r0              C rl > ql
+       negq    r0, r0
+       and     r0, r18, r0
+       addq    rl, r0, rl
+       cmpule  r18, rl, r0             C rl >= b
+       negq    r0, r0
+       and     r0, r18, r0
+       subq    rl, r0, rl
+
+       srl     rl, r24, r0
+
+       ldq     r9, 8(r30)
+       ldq     r10, 16(r30)
+       ldq     r11, 24(r30)
+       ldq     r12, 32(r30)
+       ldq     r13, 40(r30)
+       lda     r30, 64(r30)
+       ret     r31, (r26), 1
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_4p_cps,gp)
+       lda     r30, -32(r30)
+       stq     r26, 0(r30)
+       stq     r9, 8(r30)
+       stq     r10, 16(r30)
+       stq     r11, 24(r30)
+       mov     r16, r11
+       LEA(    r4, __clz_tab)
+       lda     r10, 65(r31)
+       cmpbge  r31, r17, r1
+       srl     r1, 1, r1
+       xor     r1, 127, r1
+       addq    r1, r4, r1
+       ldq_u   r2, 0(r1)
+       extbl   r2, r1, r2
+       s8subq  r2, 7, r2
+       srl     r17, r2, r3
+       subq    r10, r2, r10
+       addq    r3, r4, r3
+       ldq_u   r1, 0(r3)
+       extbl   r1, r3, r1
+       subq    r10, r1, r10
+       sll     r17, r10, r9
+       mov     r9, r16
+       jsr     r26, mpn_invert_limb
+       ldah    r29, 0(r26)
+       subq    r31, r10, r2
+       lda     r1, 1(r31)
+       sll     r1, r10, r1
+       subq    r31, r9, r3
+       srl     r0, r2, r2
+       ldq     r26, 0(r30)
+       bis     r2, r1, r2
+       lda     r29, 0(r29)
+       stq     r0, 0(r11)
+       stq     r10, 8(r11)
+       mulq    r2, r3, r2
+       srl     r2, r10, r3
+       umulh   r2, r0, r1
+       stq     r3, 16(r11)
+       mulq    r2, r0, r3
+       ornot   r31, r1, r1
+       subq    r1, r2, r1
+       mulq    r1, r9, r1
+       addq    r1, r9, r2
+       cmpule  r1, r3, r3
+       cmoveq  r3, r2, r1
+       srl     r1, r10, r3
+       umulh   r1, r0, r2
+       stq     r3, 24(r11)
+       mulq    r1, r0, r3
+       ornot   r31, r2, r2
+       subq    r2, r1, r2
+       mulq    r2, r9, r2
+       addq    r2, r9, r1
+       cmpule  r2, r3, r3
+       cmoveq  r3, r1, r2
+       srl     r2, r10, r1
+       umulh   r2, r0, r3
+       stq     r1, 32(r11)
+       mulq    r2, r0, r1
+       ornot   r31, r3, r3
+       subq    r3, r2, r3
+       mulq    r3, r9, r3
+       addq    r3, r9, r2
+       cmpule  r3, r1, r1
+       cmoveq  r1, r2, r3
+       srl     r3, r10, r2
+       umulh   r3, r0, r1
+       stq     r2, 40(r11)
+       mulq    r3, r0, r0
+       ornot   r31, r1, r1
+       subq    r1, r3, r1
+       mulq    r1, r9, r1
+       addq    r1, r9, r9
+       cmpule  r1, r0, r0
+       cmoveq  r0, r9, r1
+       ldq     r9, 8(r30)
+       srl     r1, r10, r1
+       ldq     r10, 16(r30)
+       stq     r1, 48(r11)
+       ldq     r11, 24(r30)
+       lda     r30, 32(r30)
+       ret     r31, (r26), 1
+EPILOGUE()
diff --git a/mpn/alpha/ev6/slot.pl b/mpn/alpha/ev6/slot.pl

old mode 100644 (file)

new mode 100755 (executable)

index 17967e7..563627d
--- a/mpn/alpha/ev6/slot.pl
+++ b/mpn/alpha/ev6/slot.pl
@@ -1,6 +1,6 @@
  #!/usr/bin/perl -w
  
-# Copyright 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2003, 2004, 2005, 2011 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -40,9 +40,12 @@ my %optable =
    (
     'addq'   => 'E',
     'and'    => 'E',
+   'andnot' => 'E',
     'beq'    => 'U',
     'bge'    => 'U',
     'bgt'    => 'U',
+   'bic'    => 'E',
+   'bis'    => 'E',
     'blt'    => 'U',
     'bne'    => 'U',
     'br'     => 'L',
@@ -71,6 +74,7 @@ my %optable =
     'ldt'    => 'L',
     'ret'    => 'L',
     'mov'    => 'E',
+   'mull'   => 'U',
     'mulq'   => 'U',
     'negq'   => 'E',
     'nop'    => 'E',
diff --git a/mpn/alpha/invert_limb.asm b/mpn/alpha/invert_limb.asm

index 99f51a30d582beb5eaf1fbc66eff92637a220c74..3f188ca7e9cf2f5cfb4334964bd3c1c08d396ea8 100644 (file)
--- a/mpn/alpha/invert_limb.asm
+++ b/mpn/alpha/invert_limb.asm
@@ -1,7 +1,7 @@
  dnl  Alpha mpn_invert_limb -- Invert a normalized limb.
  
-dnl  Copyright 1996, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
-dnl  Inc.
+dnl  Copyright 1996, 2000, 2001, 2002, 2003, 2007, 2011 Free Software
+dnl  Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -21,322 +21,93 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C      cycles/limb
-C EV4:    ~175
-C EV5:    ~111-126
-C EV6:    ~52-76
+C EV4:     ?
+C EV5:   137/140  (with BWX/without BWX)
+C EV6:    71/72   (with BWX/without BWX)
  
-C  This is based on ideas of Peter L. Montgomery.
+C This was compiler generated, with minimal manual edits.  Surely several
+C cycles could be cut with some thought.
  
  ASM_START()
-
-FLOAT64($C36,9223372036854775808.0)            C 2^63
-
  PROLOGUE(mpn_invert_limb,gp)
-       lda     r30,-16(r30)
-       addq    r16,r16,r1
-       bne     r1,$73
-       lda     r0,-1
-       br      r31,$Lend
-$73:
-       srl     r16,1,r1
-       stq     r1,0(r30)
-       ldt     f11,0(r30)
-       cvtqt   f11,f1
-       LEA(r1,$C36)
-       ldt     f10,0(r1)               C f10 = 2^63
-       divt    f10,f1,f10              C f10 = 2^63 / (u / 2)
-       LEA(r2,$invtab-4096)
-       srl     r16,52,r1               C extract high 12 bits
-       addq    r1,r1,r1                C align ...0000bbbbbbbb0
-       addq    r1,r2,r1                C compute array offset
-       ldq_u   r2,0(r1)                C load quadword containing our 16 bits
-bigend(`addq   r1,1,r1')
-       extwl   r2,r1,r2                C extract desired 16 bits
-       sll     r2,48,r0
-       umulh   r16,r0,r1
-       addq    r16,r1,r3
-       stq     r3,0(r30)
-       ldt     f11,0(r30)
-       cvtqt   f11,f1
-       mult    f1,f10,f1
-       cvttqc  f1,f1
-       stt     f1,0(r30)
-       ldq     r4,0(r30)
-       subq    r0,r4,r0
-       umulh   r16,r0,r1
-       mulq    r16,r0,r2
-       addq    r16,r1,r3
-       bge     r3,$Loop2
-$Loop1:        addq    r2,r16,r2
-       cmpult  r2,r16,r1
-       addq    r3,r1,r3
-       addq    r0,1,r0
-       blt     r3,$Loop1
-$Loop2:        cmpult  r2,r16,r1
-       subq    r0,1,r0
-       subq    r3,r1,r3
-       subq    r2,r16,r2
-       bge     r3,$Loop2
-$Lend:
-       lda     r30,16(r30)
-       ret     r31,(r26),1
-EPILOGUE(mpn_invert_limb)
-DATASTART($invtab)
-       .word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41
-       .word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46
-       .word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50
-       .word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d
-       .word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e
-       .word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483
-       .word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c
-       .word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8
-       .word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8
-       .word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb
-       .word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22
-       .word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d
-       .word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b
-       .word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad
-       .word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2
-       .word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a
-       .word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056
-       .word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95
-       .word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7
-       .word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d
-       .word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965
-       .word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1
-       .word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600
-       .word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452
-       .word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7
-       .word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100
-       .word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b
-       .word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9
-       .word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a
-       .word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e
-       .word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5
-       .word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f
-       .word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb
-       .word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a
-       .word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c
-       .word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111
-       .word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89
-       .word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03
-       .word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80
-       .word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff
-       .word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981
-       .word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806
-       .word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d
-       .word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516
-       .word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2
-       .word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231
-       .word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2
-       .word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55
-       .word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb
-       .word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83
-       .word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e
-       .word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb
-       .word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a
-       .word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb
-       .word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f
-       .word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445
-       .word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed
-       .word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197
-       .word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044
-       .word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2
-       .word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3
-       .word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56
-       .word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b
-       .word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2
-       .word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b
-       .word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736
-       .word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3
-       .word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3
-       .word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374
-       .word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237
-       .word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc
-       .word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3
-       .word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b
-       .word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56
-       .word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23
-       .word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1
-       .word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1
-       .word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893
-       .word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767
-       .word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d
-       .word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514
-       .word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee
-       .word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9
-       .word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5
-       .word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084
-       .word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64
-       .word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45
-       .word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29
-       .word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e
-       .word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5
-       .word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd
-       .word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7
-       .word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2
-       .word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0
-       .word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e
-       .word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e
-       .word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370
-       .word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264
-       .word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158
-       .word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f
-       .word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46
-       .word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40
-       .word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a
-       .word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37
-       .word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34
-       .word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33
-       .word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934
-       .word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836
-       .word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739
-       .word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e
-       .word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544
-       .word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b
-       .word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354
-       .word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e
-       .word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169
-       .word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076
-       .word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84
-       .word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93
-       .word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4
-       .word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6
-       .word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9
-       .word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add
-       .word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3
-       .word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a
-       .word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822
-       .word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b
-       .word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656
-       .word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571
-       .word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e
-       .word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac
-       .word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb
-       .word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec
-       .word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d
-       .word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030
-       .word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54
-       .word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79
-       .word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f
-       .word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6
-       .word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee
-       .word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18
-       .word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42
-       .word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e
-       .word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a
-       .word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8
-       .word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6
-       .word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626
-       .word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557
-       .word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489
-       .word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc
-       .word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef
-       .word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224
-       .word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a
-       .word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091
-       .word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9
-       .word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01
-       .word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b
-       .word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76
-       .word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1
-       .word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee
-       .word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b
-       .word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a
-       .word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9
-       .word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea
-       .word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b
-       .word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d
-       .word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0
-       .word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4
-       .word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539
-       .word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e
-       .word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5
-       .word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c
-       .word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255
-       .word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e
-       .word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8
-       .word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033
-       .word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e
-       .word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb
-       .word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18
-       .word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66
-       .word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5
-       .word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05
-       .word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56
-       .word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7
-       .word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9
-       .word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c
-       .word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0
-       .word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5
-       .word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a
-       .word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0
-       .word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7
-       .word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e
-       .word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7
-       .word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400
-       .word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a
-       .word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4
-       .word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210
-       .word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c
-       .word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8
-       .word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026
-       .word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84
-       .word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3
-       .word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42
-       .word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3
-       .word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04
-       .word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65
-       .word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8
-       .word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b
-       .word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f
-       .word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3
-       .word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958
-       .word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be
-       .word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824
-       .word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b
-       .word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3
-       .word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b
-       .word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4
-       .word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e
-       .word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498
-       .word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403
-       .word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f
-       .word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db
-       .word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247
-       .word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5
-       .word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123
-       .word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091
-       .word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001
-       .word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70
-       .word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1
-       .word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52
-       .word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3
-       .word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35
-       .word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8
-       .word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c
-       .word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f
-       .word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04
-       .word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79
-       .word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee
-       .word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965
-       .word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db
-       .word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853
-       .word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca
-       .word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743
-       .word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc
-       .word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635
-       .word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af
-       .word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a
-       .word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5
-       .word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420
-       .word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c
-       .word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319
-       .word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296
-       .word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214
-       .word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192
-       .word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111
-       .word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090
-       .word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010
+       LEA(    r2, approx_tab)
+       srl     r16, 54, r1
+       srl     r16, 24, r4
+       and     r16, 1, r5
+       bic     r1, 1, r7
+       lda     r4, 1(r4)
+       srl     r16, 1, r3
+ifdef(`BWX',`
+       addq    r7, r2, r1
+       ldwu    r0, -512(r1)
+',`
+       addq    r1, r2, r1
+       ldq_u   r0, -512(r1)
+       extwl   r0, r7, r0
+')
+       addq    r3, r5, r3
+       mull    r0, r0, r1
+       sll     r0, 11, r0
+       mulq    r1, r4, r1
+       srl     r1, 40, r1
+       subq    r0, r1, r0
+       lda     r0, -1(r0)
+       mulq    r0, r0, r2
+       sll     r0, 60, r1
+       sll     r0, 13, r0
+       mulq    r2, r4, r2
+       subq    r1, r2, r1
+       srl     r1, 47, r1
+       addq    r0, r1, r0
+       mulq    r0, r3, r3
+       srl     r0, 1, r1
+       cmoveq  r5, 0, r1
+       subq    r1, r3, r1
+       umulh   r1, r0, r3
+       sll     r0, 31, r0
+       srl     r3, 1, r1
+       addq    r0, r1, r0
+       mulq    r0, r16, r2
+       umulh   r0, r16, r3
+       addq    r2, r16, r1
+       addq    r3, r16, r16
+       cmpult  r1, r2, r1
+       addq    r16, r1, r3
+       subq    r0, r3, r0
+       ret     r31, (r26), 1
+EPILOGUE()
+DATASTART(approx_tab)
+        .word   0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+        .word   0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+        .word   0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+        .word   0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+        .word   0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+        .word   0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+        .word   0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+        .word   0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+        .word   0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+        .word   0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+        .word   0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+        .word   0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+        .word   0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+        .word   0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+        .word   0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+        .word   0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+        .word   0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+        .word   0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+        .word   0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+        .word   0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+        .word   0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+        .word   0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+        .word   0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+        .word   0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+        .word   0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+        .word   0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+        .word   0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+        .word   0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+        .word   0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+        .word   0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+        .word   0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+        .word   0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
  DATAEND()
  ASM_END()
diff --git a/mpn/alpha/sub_n.asm b/mpn/alpha/sub_n.asm

index 9567e52592989e9da529b6672ca3e28725ba2244..690e07cf2c58fe3c7dbbdaa23fef03d812dec6f0 100644 (file)
--- a/mpn/alpha/sub_n.asm
+++ b/mpn/alpha/sub_n.asm
@@ -1,7 +1,7 @@
  dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
  dnl  and store difference in a third limb vector.
  
-dnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -32,9 +32,13 @@ dnl  s2_ptr  r18
  dnl  size      r19
  
  ASM_START()
+PROLOGUE(mpn_sub_nc)
+       bis     r31,r20,r25
+       br      L(com)
+EPILOGUE()
  PROLOGUE(mpn_sub_n)
         bis     r31,r31,r25             C clear cy
-       subq    r19,4,r19               C decr loop cnt
+L(com):        subq    r19,4,r19               C decr loop cnt
         blt     r19,$Lend2              C if less than 4 limbs, goto 2nd loop
  C Start software pipeline for 1st loop
         ldq     r0,0(r18)
@@ -42,13 +46,16 @@ C Start software pipeline for 1st loop
         ldq     r1,8(r18)
         ldq     r5,8(r17)
         addq    r17,32,r17              C update s1_ptr
+       subq    r4,r0,r28               C 1st main subtract
         ldq     r2,16(r18)
-       subq    r4,r0,r20               C 1st main subtract
+       subq    r28,r25,r20             C 1st carry subtract
         ldq     r3,24(r18)
-       subq    r19,4,r19               C decr loop cnt
+       cmpult  r4,r0,r8                C compute cy from last subtract
         ldq     r6,-16(r17)
-       cmpult  r4,r0,r25               C compute cy from last subtract
+       cmpult  r28,r25,r25             C compute cy from last subtract
         ldq     r7,-8(r17)
+       bis     r8,r25,r25              C combine cy from the two subtracts
+       subq    r19,4,r19               C decr loop cnt
         subq    r5,r1,r28               C 2nd main subtract
         addq    r18,32,r18              C update s2_ptr
         subq    r28,r25,r21             C 2nd carry subtract
@@ -142,5 +149,5 @@ $Lend0:     subq    r4,r0,r28               C main subtract
  
  $Lret: bis     r25,r31,r0              C return cy
         ret     r31,(r26),1
-EPILOGUE(mpn_sub_n)
+EPILOGUE()
  ASM_END()
diff --git a/mpn/arm/README b/mpn/arm/README

index e1ca925a1959e136a8c934fa27a97bb892ef6fa9..ad9545c0d6f4f7975c378c4a2e345d0fb35f04ab 100644 (file)
--- a/mpn/arm/README
+++ b/mpn/arm/README
@@ -1,4 +1,4 @@
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -19,16 +19,6 @@ with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  
  
-This directory contains mpn functions for ARM processors.
-It has been optimized for StrongARM.
-
-TODO
-
-Write mpn_addmul_2.  The speed of mpn_addmul_1 is 9.75 c/l;
-mpn_addmul_2 could run at 8 c/l.  mpn_addmul_N could
-approach 6 c/l, but register shortage will make this hard.
-
-Perhaps nails is the way to go even for an embedded processor like
-this, since the umlal accumulation could be used very effectively in
-that case.  with just 2 nail bits, we should get close to 5 c/l for a
-mpn_addmul_N or mpn_mul_basecase.
+This directory contains mpn functions for ARM processors.  It has been
+optimised for Cortex-A9, but the code in the top-level directory should run
+on all ARM processors at architecture level v4 or later.
diff --git a/mpn/arm/add_n.asm b/mpn/arm/add_n.asm

deleted file mode 100644 (file)

index 0f07917..0000000
--- a/mpn/arm/add_n.asm
+++ /dev/null
@@ -1,69 +0,0 @@
-dnl  ARM mpn_add_n -- Add two limb vectors of the same length > 0 and store sum
-dnl  in a third limb vector.
-dnl  Contributed by Robert Harley.
-
-dnl  Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C This code runs at 5 cycles/limb.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`vp',`r2')
-define(`n',`r3')
-
-
-ASM_START()
-PROLOGUE(mpn_add_n)
-       stmfd   sp!, { r8, r9, lr }
-       movs    n, n, lsr #1
-       bcc     L(skip1)
-       ldr     r12, [up], #4
-       ldr     lr, [vp], #4
-       adds    r12, r12, lr
-       str     r12, [rp], #4
-L(skip1):
-       tst     n, #1
-       beq     L(skip2)
-       ldmia   up!, { r8, r9 }
-       ldmia   vp!, { r12, lr }
-       adcs    r8, r8, r12
-       adcs    r9, r9, lr
-       stmia   rp!, { r8, r9 }
-L(skip2):
-       bics    n, n, #1
-       beq     L(return)
-       stmfd   sp!, { r4, r5, r6, r7 }
-L(add_n_loop):
-       ldmia   up!, { r4, r5, r6, r7 }
-       ldmia   vp!, { r8, r9, r12, lr }
-       adcs    r4, r4, r8
-       ldr     r8, [rp, #12]                   C cache allocate
-       adcs    r5, r5, r9
-       adcs    r6, r6, r12
-       adcs    r7, r7, lr
-       stmia   rp!, { r4, r5, r6, r7 }
-       sub     n, n, #2
-       teq     n, #0
-       bne     L(add_n_loop)
-       ldmfd   sp!, { r4, r5, r6, r7 }
-L(return):
-       adc     r0, n, #0
-       ldmfd   sp!, { r8, r9, pc }
-EPILOGUE(mpn_add_n)
diff --git a/mpn/arm/addmul_1.asm b/mpn/arm/addmul_1.asm

deleted file mode 100644 (file)

index de33f2f..0000000
--- a/mpn/arm/addmul_1.asm
+++ /dev/null
@@ -1,107 +0,0 @@
-dnl  ARM mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
-dnl  to a second limb vector.
-
-dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C StrongARM:  7.75-9.75  (dependent on vl value)
-C XScale:        8-9     (dependent on vl value, estimated)
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n',`r2')
-define(`vl',`r3')
-define(`rl',`r12')
-define(`ul',`r6')
-define(`r',`lr')
-
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-       stmfd   sp!, { r4-r6, lr }
-       mov     r4, #0                  C clear r4
-       adds    r0, r0, #0              C clear cy
-       tst     n, #1
-       beq     L(skip1)
-       ldr     ul, [up], #4
-       ldr     rl, [rp, #0]
-       umull   r5, r4, ul, vl
-       adds    r, rl, r5
-       str     r, [rp], #4
-L(skip1):
-       tst     n, #2
-       beq     L(skip2)
-       ldr     ul, [up], #4
-       ldr     rl, [rp, #0]
-       mov     r5, #0
-       umlal   r4, r5, ul, vl
-       ldr     ul, [up], #4
-       adcs    r, rl, r4
-       ldr     rl, [rp, #4]
-       mov     r4, #0
-       umlal   r5, r4, ul, vl
-       str     r, [rp], #4
-       adcs    r, rl, r5
-       str     r, [rp], #4
-L(skip2):
-       bics    r, n, #3
-       beq     L(return)
-
-       ldr     ul, [up], #4
-       ldr     rl, [rp, #0]
-       mov     r5, #0
-       umlal   r4, r5, ul, vl
-       b       L(in)
-
-L(loop):
-       ldr     ul, [up], #4
-       adcs    r, rl, r5
-       ldr     rl, [rp, #4]
-       mov     r5, #0
-       umlal   r4, r5, ul, vl
-       str     r, [rp], #4
-L(in): ldr     ul, [up], #4
-       adcs    r, rl, r4
-       ldr     rl, [rp, #4]
-       mov     r4, #0
-       umlal   r5, r4, ul, vl
-       str     r, [rp], #4
-       ldr     ul, [up], #4
-       adcs    r, rl, r5
-       ldr     rl, [rp, #4]
-       mov     r5, #0
-       umlal   r4, r5, ul, vl
-       str     r, [rp], #4
-       ldr     ul, [up], #4
-       adcs    r, rl, r4
-       ldr     rl, [rp, #4]
-       mov     r4, #0
-       umlal   r5, r4, ul, vl
-       str     r, [rp], #4
-       sub     n, n, #4
-       bics    r, n, #3
-       bne     L(loop)
-
-       adcs    r, rl, r5
-       str     r, [rp], #4
-L(return):
-       adc     r0, r4, #0
-       ldmfd   sp!, { r4-r6, pc }
-EPILOGUE(mpn_addmul_1)
diff --git a/mpn/arm/aors_n.asm b/mpn/arm/aors_n.asm

new file mode 100644 (file)

index 0000000..46e8541
--- /dev/null
+++ b/mpn/arm/aors_n.asm
@@ -0,0 +1,100 @@
+dnl  ARM mpn_add_n and mpn_sub_n
+
+dnl  Contributed to the GNU project by Robert Harley.
+
+dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     2.5    slightly fluctuating
+C Cortex-A15    2.25
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_add_n', `
+  define(`ADDSUB',     adds)
+  define(`ADDSUBC',    adcs)
+  define(`CLRCY',      `cmn    r0, #0')
+  define(`SETCY',      `cmp    $1, #1')
+  define(`RETVAL',     `adc    r0, n, #0')
+  define(`func',       mpn_add_n)
+  define(`func_nc',    mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(`ADDSUB',     subs)
+  define(`ADDSUBC',    sbcs)
+  define(`CLRCY',      `cmp    r0, r0')
+  define(`SETCY',      `rsbs   $1, $1, #0')
+  define(`RETVAL',     `sbc    r0, r0, r0
+                       and     r0, r0, #1')
+  define(`func',       mpn_sub_n)
+  define(`func_nc',    mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+       ldr     r12, [sp, #0]
+       stmfd   sp!, { r8, r9, lr }
+       SETCY(  r12)
+       b       L(ent)
+EPILOGUE()
+PROLOGUE(func)
+       stmfd   sp!, { r8, r9, lr }
+       CLRCY(  r12)
+L(ent):        tst     n, #1
+       beq     L(skip1)
+       ldr     r12, [up], #4
+       ldr     lr, [vp], #4
+       ADDSUBC r12, r12, lr
+       str     r12, [rp], #4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       ldmia   up!, { r8, r9 }
+       ldmia   vp!, { r12, lr }
+       ADDSUBC r8, r8, r12
+       ADDSUBC r9, r9, lr
+       stmia   rp!, { r8, r9 }
+L(skip2):
+       bics    n, n, #3
+       beq     L(rtn)
+       stmfd   sp!, { r4, r5, r6, r7 }
+
+L(top):        ldmia   up!, { r4, r5, r6, r7 }
+       ldmia   vp!, { r8, r9, r12, lr }
+       ADDSUBC r4, r4, r8
+       sub     n, n, #4
+       ADDSUBC r5, r5, r9
+       ADDSUBC r6, r6, r12
+       ADDSUBC r7, r7, lr
+       stmia   rp!, { r4, r5, r6, r7 }
+       teq     n, #0
+       bne     L(top)
+
+       ldmfd   sp!, { r4, r5, r6, r7 }
+
+L(rtn):        RETVAL
+       ldmfd   sp!, { r8, r9, pc }
+EPILOGUE()
diff --git a/mpn/arm/aorscnd_n.asm b/mpn/arm/aorscnd_n.asm

new file mode 100644 (file)

index 0000000..cfc2502
--- /dev/null
+++ b/mpn/arm/aorscnd_n.asm
@@ -0,0 +1,121 @@
+dnl  ARM mpn_addcnd_n, mpn_subcnd_n
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     2.5    slightly fluctuating
+C Cortex-A15    ?
+
+define(`rp',   `r0')
+define(`up',   `r1')
+define(`vp',   `r2')
+define(`n',    `r3')
+
+define(`cnd',  `r12')
+
+ifdef(`OPERATION_addcnd_n', `
+       define(`ADDSUB',      adds)
+       define(`ADDSUBC',      adcs)
+       define(`INITCY',      `cmn      r0, #0')
+       define(`RETVAL',      `adc      r0, n, #0')
+       define(func,          mpn_addcnd_n)')
+ifdef(`OPERATION_subcnd_n', `
+       define(`ADDSUB',      subs)
+       define(`ADDSUBC',      sbcs)
+       define(`INITCY',      `cmp      r0, #0')
+       define(`RETVAL',      `adc      r0, n, #0
+                             rsb       r0, r0, #1')
+       define(func,          mpn_subcnd_n)')
+
+MULFUNC_PROLOGUE(mpn_addcnd_n mpn_subcnd_n)
+
+ASM_START()
+PROLOGUE(func)
+       push    {r4-r11}
+       ldr     cnd, [sp, #32]
+
+       INITCY                          C really only needed for n = 0 (mod 4)
+
+       teq     cnd, #0                 C could use this for clearing/setting cy
+       mvnne   cnd, #0                 C conditionally set to 0xffffffff
+
+       ands    r4, n, #3
+       beq     L(top)
+       cmp     r4, #2
+       bcc     L(b1)
+       beq     L(b2)
+
+L(b3): ldm     vp!, {r4,r5,r6}
+       ldm     up!, {r8,r9,r10}
+       and     r4, r4, cnd
+       and     r5, r5, cnd
+       and     r6, r6, cnd
+       ADDSUB  r8, r8, r4
+       ADDSUBC r9, r9, r5
+       ADDSUBC r10, r10, r6
+       stm     rp!, {r8,r9,r10}
+       sub     n, n, #3
+       teq     n, #0
+       bne     L(top)
+       b       L(end)
+
+L(b2): ldm     vp!, {r4,r5}
+       ldm     up!, {r8,r9}
+       and     r4, r4, cnd
+       and     r5, r5, cnd
+       ADDSUB  r8, r8, r4
+       ADDSUBC r9, r9, r5
+       stm     rp!, {r8,r9}
+       sub     n, n, #2
+       teq     n, #0
+       bne     L(top)
+       b       L(end)
+
+L(b1): ldr     r4, [vp], #4
+       ldr     r8, [up], #4
+       and     r4, r4, cnd
+       ADDSUB  r8, r8, r4
+       str     r8, [rp], #4
+       sub     n, n, #1
+       teq     n, #0
+       beq     L(end)
+
+L(top):        ldm     vp!, {r4,r5,r6,r7}
+       ldm     up!, {r8,r9,r10,r11}
+       and     r4, r4, cnd
+       and     r5, r5, cnd
+       and     r6, r6, cnd
+       and     r7, r7, cnd
+       ADDSUBC r8, r8, r4
+       ADDSUBC r9, r9, r5
+       ADDSUBC r10, r10, r6
+       ADDSUBC r11, r11, r7
+       sub     n, n, #4
+       stm     rp!, {r8,r9,r10,r11}
+       teq     n, #0
+       bne     L(top)
+
+L(end):        RETVAL
+       pop     {r4-r11}
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/aorslsh1_n.asm b/mpn/arm/aorslsh1_n.asm

new file mode 100644 (file)

index 0000000..5aaabb3
--- /dev/null
+++ b/mpn/arm/aorslsh1_n.asm
@@ -0,0 +1,155 @@
+dnl  ARM mpn_addlsh1_n and mpn_sublsh1_n
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            addlsh1_n       sublsh1_n
+C           cycles/limb     cycles/limb
+C StrongARM     ?               ?
+C XScale        ?               ?
+C Cortex-A8     ?               ?
+C Cortex-A9     3.12            3.7
+C Cortex-A15    ?               ?
+
+C TODO
+C  * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
+C    The sublsh1_n code could surely be tweaked, its REVCY slows down things
+C    very much.  If two insns are really needed, it might help to separate them
+C    for better micro-parallelism.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_addlsh1_n', `
+  define(`ADDSUB',     adds)
+  define(`ADDSUBC',    adcs)
+  define(`SETCY',      `cmp    $1, #1')
+  define(`RETVAL',     `adc    r0, $1, #2')
+  define(`SAVECY',     `sbc    $1, $2, #0')
+  define(`RESTCY',     `cmn    $1, #1')
+  define(`REVCY',      `')
+  define(`INICYR',     `mov    $1, #0')
+  define(`r10r11',     `r11')
+  define(`func',       mpn_addlsh1_n)
+  define(`func_nc',    mpn_addlsh1_nc)')
+ifdef(`OPERATION_sublsh1_n', `
+  define(`ADDSUB',     subs)
+  define(`ADDSUBC',    sbcs)
+  define(`SETCY',      `rsbs   $1, $1, #0')
+  define(`RETVAL',     `adc    r0, $1, #1')
+  define(`SAVECY',     `sbc    $1, $1, $1')
+  define(`RESTCY',     `cmn    $1, #1')
+  define(`REVCY',      `sbc    $1, $1, $1
+                       cmn     $1, #1')
+  define(`INICYR',     `mvn    $1, #0')
+  define(`r10r11',     `r10')
+  define(`func',       mpn_sublsh1_n)
+  define(`func_nc',    mpn_sublsh1_nc)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+       push    {r4-r10r11, r14}
+
+ifdef(`OPERATION_addlsh1_n', `
+       mvn     r11, #0
+')
+       INICYR( r14)
+       subs    n, n, #3
+       blt     L(le2)                  C carry clear on branch path
+
+       cmn     r0, #0                  C clear carry
+       ldmia   vp!, {r8, r9, r10}
+       b       L(mid)
+
+L(top):        RESTCY( r14)
+       ADDSUBC r4, r4, r8
+       ADDSUBC r5, r5, r9
+       ADDSUBC r6, r6, r10
+       ldmia   vp!, {r8, r9, r10}
+       stmia   rp!, {r4, r5, r6}
+       REVCY(r14)
+       adcs    r8, r8, r8
+       adcs    r9, r9, r9
+       adcs    r10, r10, r10
+       ldmia   up!, {r4, r5, r6}
+       SAVECY( r14, r11)
+       subs    n, n, #3
+       blt     L(exi)
+       RESTCY( r12)
+       ADDSUBC r4, r4, r8
+       ADDSUBC r5, r5, r9
+       ADDSUBC r6, r6, r10
+       ldmia   vp!, {r8, r9, r10}
+       stmia   rp!, {r4, r5, r6}
+       REVCY(r12)
+L(mid):        adcs    r8, r8, r8
+       adcs    r9, r9, r9
+       adcs    r10, r10, r10
+       ldmia   up!, {r4, r5, r6}
+       SAVECY( r12, r11)
+       subs    n, n, #3
+       bge     L(top)
+
+       mov     r7, r12                 C swap alternating...
+       mov     r12, r14                C ...carry-save...
+       mov     r14, r7                 C ...registers
+
+L(exi):        RESTCY( r12)
+       ADDSUBC r4, r4, r8
+       ADDSUBC r5, r5, r9
+       ADDSUBC r6, r6, r10
+       stmia   rp!, {r4, r5, r6}
+
+       REVCY(r12)
+L(le2):        tst     n, #1                   C n = {-1,-2,-3} map to [2], [1], [0]
+       beq     L(e1)
+
+L(e02):        tst     n, #2
+       beq     L(rt0)
+       ldm     vp, {r8, r9}
+       adcs    r8, r8, r8
+       adcs    r9, r9, r9
+       ldm     up, {r4, r5}
+       SAVECY( r12, r11)
+       RESTCY( r14)
+       ADDSUBC r4, r4, r8
+       ADDSUBC r5, r5, r9
+       stm     rp, {r4, r5}
+       b       L(rt1)
+
+L(e1): ldr     r8, [vp]
+       adcs    r8, r8, r8
+       ldr     r4, [up]
+       SAVECY( r12, r11)
+       RESTCY( r14)
+       ADDSUBC r4, r4, r8
+       str     r4, [rp]
+
+L(rt1):        mov     r14, r12
+       REVCY(r12)
+L(rt0):        RETVAL( r14)
+       pop     {r4-r10r11, r14}
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/aorsmul_1.asm b/mpn/arm/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..4668585
--- /dev/null
+++ b/mpn/arm/aorsmul_1.asm
@@ -0,0 +1,123 @@
+dnl  ARM mpn_addmul_1 and mpn_submul_1.
+
+dnl  Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM:     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     5.25
+C Cortex-A15    4
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+define(`vl', `r3')
+define(`rl', `r12')
+define(`ul', `r6')
+define(`r',  `lr')
+
+ifdef(`OPERATION_addmul_1', `
+  define(`ADDSUB',     adds)
+  define(`ADDSUBC',    adcs)
+  define(`CLRRCY',     `mov    $1, #0
+                       adds    r0, r0, #0')
+  define(`RETVAL',     `adc    r0, r4, #0')
+  define(`func',       mpn_addmul_1)')
+ifdef(`OPERATION_submul_1', `
+  define(`ADDSUB',     subs)
+  define(`ADDSUBC',    sbcs)
+  define(`CLRRCY',     `subs   $1, r0, r0')
+  define(`RETVAL',     `sbc    r0, r0, r0
+                       sub     r0, $1, r0')
+  define(`func',       mpn_submul_1)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+PROLOGUE(func)
+       stmfd   sp!, { r4-r6, lr }
+       CLRRCY( r4)
+       tst     n, #1
+       beq     L(skip1)
+       ldr     ul, [up], #4
+       ldr     rl, [rp, #0]
+       umull   r5, r4, ul, vl
+       ADDSUB  r, rl, r5
+       str     r, [rp], #4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       ldr     ul, [up], #4
+       ldr     rl, [rp, #0]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       ldr     ul, [up], #4
+       ADDSUBC r, rl, r4
+       ldr     rl, [rp, #4]
+       mov     r4, #0
+       umlal   r5, r4, ul, vl
+       str     r, [rp], #4
+       ADDSUBC r, rl, r5
+       str     r, [rp], #4
+L(skip2):
+       bics    n, n, #3
+       beq     L(rtn)
+
+       ldr     ul, [up], #4
+       ldr     rl, [rp, #0]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       b       L(in)
+
+L(top):        ldr     ul, [up], #4
+       ADDSUBC r, rl, r5
+       ldr     rl, [rp, #4]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       str     r, [rp], #4
+L(in): ldr     ul, [up], #4
+       ADDSUBC r, rl, r4
+       ldr     rl, [rp, #4]
+       mov     r4, #0
+       umlal   r5, r4, ul, vl
+       str     r, [rp], #4
+       ldr     ul, [up], #4
+       ADDSUBC r, rl, r5
+       ldr     rl, [rp, #4]
+       mov     r5, #0
+       umlal   r4, r5, ul, vl
+       str     r, [rp], #4
+       ldr     ul, [up], #4
+       ADDSUBC r, rl, r4
+       ldr     rl, [rp, #4]
+       mov     r4, #0
+       umlal   r5, r4, ul, vl
+       sub     n, n, #4
+       tst     n, n
+       str     r, [rp], #4
+       bne     L(top)
+
+       ADDSUBC r, rl, r5
+       str     r, [rp]
+
+L(rtn):        RETVAL( r4)
+       ldmfd   sp!, { r4-r6, pc }
+EPILOGUE()
diff --git a/mpn/arm/arm-defs.m4 b/mpn/arm/arm-defs.m4

index 9d169e822dc11380a45fa04e85f830d6f1639075..95370d54a6213d75ffcc7fac9aa7dd7b95c6285b 100644 (file)
--- a/mpn/arm/arm-defs.m4
+++ b/mpn/arm/arm-defs.m4
@@ -2,7 +2,7 @@ divert(-1)
  
  dnl  m4 macros for ARM assembler.
  
-dnl  Copyright 2001 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2012, 2013 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -23,7 +23,7 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  dnl  Standard commenting is with @, the default m4 # is for constants and we
  dnl  don't want to disable macro expansions in or after them.
  
-changecom(@)
+changecom(@&*$)
  
  
  dnl  APCS register names.
@@ -47,4 +47,34 @@ deflit(sp,r13)
  deflit(lr,r14)
  deflit(pc,r15)
  
+
+define(`lea_list', `')
+define(`lea_num',0)
+
+dnl  LEA(reg,gmp_symbol)
+dnl
+dnl  Load the address of gmp_symbol into a register.  The gmp_symbol must be
+dnl  either local or protected/hidden, since we assume it has a fixed distance
+dnl  from the point of use.
+
+define(`LEA',`dnl
+ldr    $1, L(ptr`'lea_num)
+ifdef(`PIC',dnl
+`dnl
+L(bas`'lea_num):dnl
+       add     $1, $1, pc`'dnl
+       m4append(`lea_list',`
+L(ptr'lea_num`):       .word   GSYM_PREFIX`'$2-L(bas'lea_num`)-8')
+       define(`lea_num', eval(lea_num+1))dnl
+',`dnl
+       m4append(`lea_list',`
+L(ptr'lea_num`):       .word   GSYM_PREFIX`'$2')
+       define(`lea_num', eval(lea_num+1))dnl
+')dnl
+')
+
+define(`EPILOGUE_cpu',
+`lea_list
+       SIZE(`$1',.-`$1')')
+
  divert
diff --git a/mpn/arm/bdiv_dbm1c.asm b/mpn/arm/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..6ce9802
--- /dev/null
+++ b/mpn/arm/bdiv_dbm1c.asm
@@ -0,0 +1,101 @@
+dnl  ARM mpn_bdiv_dbm1c.
+
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     4.25
+C Cortex-A15    2.5
+
+C TODO
+C  * Try using umlal or umaal.
+C  * Try using ldm/stm.
+
+define(`qp',     `r0')
+define(`up',     `r1')
+define(`n',      `r2')
+define(`bd',     `r3')
+define(`cy',     `sp,#0')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+       push    {r4, r5, r6, r7, r8}
+       ldr     r4, [up], #4
+       ldr     r5, [sp, #20]
+       ands    r12, n, #3
+       beq     L(fi0)
+       cmp     r12, #2
+       bcc     L(fi1)
+       beq     L(fi2)
+
+L(fi3):        umull   r8, r12, r4, bd
+       ldr     r4, [up], #4
+       b       L(lo3)
+
+L(fi0):        umull   r6, r7, r4, bd
+       ldr     r4, [up], #4
+       b       L(lo0)
+
+L(fi1):        subs    n, n, #1
+       umull   r8, r12, r4, bd
+       bls     L(wd1)
+       ldr     r4, [up], #4
+       b       L(lo1)
+
+L(fi2):        umull   r6, r7, r4, bd
+       ldr     r4, [up], #4
+       b       L(lo2)
+
+L(top):        ldr     r4, [up], #4
+       subs    r5, r5, r6
+       str     r5, [qp], #4
+       sbc     r5, r5, r7
+L(lo1):        umull   r6, r7, r4, bd
+       ldr     r4, [up], #4
+       subs    r5, r5, r8
+       str     r5, [qp], #4
+       sbc     r5, r5, r12
+L(lo0):        umull   r8, r12, r4, bd
+       ldr     r4, [up], #4
+       subs    r5, r5, r6
+       str     r5, [qp], #4
+       sbc     r5, r5, r7
+L(lo3):        umull   r6, r7, r4, bd
+       ldr     r4, [up], #4
+       subs    r5, r5, r8
+       str     r5, [qp], #4
+       sbc     r5, r5, r12
+L(lo2):        subs    n, n, #4
+       umull   r8, r12, r4, bd
+       bhi     L(top)
+
+L(wd2):        subs    r5, r5, r6
+       str     r5, [qp], #4
+       sbc     r5, r5, r7
+L(wd1):        subs    r5, r5, r8
+       str     r5, [qp]
+       sbc     r0, r5, r12
+       pop     {r4, r5, r6, r7, r8}
+       bx      lr
+EPILOGUE()
diff --git a/mpn/arm/com.asm b/mpn/arm/com.asm

new file mode 100644 (file)

index 0000000..437b9f4
--- /dev/null
+++ b/mpn/arm/com.asm
@@ -0,0 +1,63 @@
+dnl  ARM mpn_com.
+
+dnl  Copyright 2003, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     2.0
+C Cortex-A15    ?
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+       tst     n, #1
+       beq     L(skip1)
+       ldr     r3, [up], #4
+       mvn     r3, r3
+       str     r3, [rp], #4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       ldmia   up!, { r3, r12 }                C load 2 limbs
+       mvn     r3, r3
+       mvn     r12, r12
+       stmia   rp!, { r3, r12 }                C store 2 limbs
+L(skip2):
+       bics    n, n, #3
+       beq     L(rtn)
+       stmfd   sp!, { r7, r8, r9 }             C save regs on stack
+
+L(top):        ldmia   up!, { r3, r8, r9, r12 }        C load 4 limbs
+       subs    n, n, #4
+       mvn     r3, r3
+       mvn     r8, r8
+       mvn     r9, r9
+       mvn     r12, r12
+       stmia   rp!, { r3, r8, r9, r12 }        C store 4 limbs
+       bne     L(top)
+
+       ldmfd   sp!, { r7, r8, r9 }             C restore regs from stack
+L(rtn):        bx      lr
+EPILOGUE()
diff --git a/mpn/arm/copyd.asm b/mpn/arm/copyd.asm

index 718b762b914b5771c9cff15640df0e973799a6fb..50e8c4e1bb7b8641027a8d25ac78701396e448b9 100644 (file)
--- a/mpn/arm/copyd.asm
+++ b/mpn/arm/copyd.asm
@@ -1,6 +1,6 @@
  dnl  ARM mpn_copyd.
  
-dnl  Copyright 2003 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,12 +19,16 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C This runs at 3 cycles/limb in the StrongARM.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n',`r2')
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     1.5
+C Cortex-A15    ?
  
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
  
  ASM_START()
  PROLOGUE(mpn_copyd)
@@ -44,15 +48,14 @@ L(skip1):
         stmda   rp!, { r3, r12 }                C store 2 limbs
  L(skip2):
         bics    n, n, #3
-       beq     L(return)
+       beq     L(rtn)
         stmfd   sp!, { r7, r8, r9 }             C save regs on stack
-L(loop):
-       ldmda   up!, { r3, r8, r9, r12 }        C load 4 limbs
-       ldr     r7, [rp, #-12]                  C cache allocate
+
+L(top):        ldmda   up!, { r3, r8, r9, r12 }        C load 4 limbs
         subs    n, n, #4
         stmda   rp!, { r3, r8, r9, r12 }        C store 4 limbs
-       bne     L(loop)
+       bne     L(top)
+
         ldmfd   sp!, { r7, r8, r9 }             C restore regs from stack
-L(return):
-       mov     pc, lr
-EPILOGUE(mpn_copyd)
+L(rtn):        bx      lr
+EPILOGUE()
diff --git a/mpn/arm/copyi.asm b/mpn/arm/copyi.asm

index 5ee93acd4c6446e11a8aa68e3ab5ac1a1eb7588f..ba9824192ba20ba994bf8c8fd1d02f79c829e4e1 100644 (file)
--- a/mpn/arm/copyi.asm
+++ b/mpn/arm/copyi.asm
@@ -1,6 +1,6 @@
  dnl  ARM mpn_copyi.
  
-dnl  Copyright 2003 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,12 +19,16 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C This runs at 3 cycles/limb in the StrongARM.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n',`r2')
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     1.5
+C Cortex-A15    ?
  
+define(`rp', `r0')
+define(`up', `r1')
+define(`n',  `r2')
  
  ASM_START()
  PROLOGUE(mpn_copyi)
@@ -39,15 +43,14 @@ L(skip1):
         stmia   rp!, { r3, r12 }                C store 2 limbs
  L(skip2):
         bics    n, n, #3
-       beq     L(return)
+       beq     L(rtn)
         stmfd   sp!, { r7, r8, r9 }             C save regs on stack
-L(loop):
-       ldmia   up!, { r3, r8, r9, r12 }        C load 4 limbs
-       ldr     r7, [rp, #12]                   C cache allocate
+
+L(top):        ldmia   up!, { r3, r8, r9, r12 }        C load 4 limbs
         subs    n, n, #4
         stmia   rp!, { r3, r8, r9, r12 }        C store 4 limbs
-       bne     L(loop)
+       bne     L(top)
+
         ldmfd   sp!, { r7, r8, r9 }             C restore regs from stack
-L(return):
-       mov     pc, lr
-EPILOGUE(mpn_copyi)
+L(rtn):        bx      lr
+EPILOGUE()
diff --git a/mpn/arm/gmp-mparam.h b/mpn/arm/gmp-mparam.h

index 431aa4a30f6035a641dd3d3cc9f63efb59cf10ac..e6321dde8aa49994e42085c8640a2560e2e78a41 100644 (file)
--- a/mpn/arm/gmp-mparam.h
+++ b/mpn/arm/gmp-mparam.h
@@ -21,122 +21,96 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define GMP_LIMB_BITS 32
  #define BYTES_PER_MP_LIMB 4
  
-/* 593MHz ARM (gcc50.fsffrance.org) */
+/* 1193MHz ARM (gcc55.fsffrance.org) */
  
  #define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
  #define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         17
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         56
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         11
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
  #define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     27
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     71
  #define USE_PREINV_DIVREM_1                  1  /* preinv always */
  #define DIVREM_2_THRESHOLD                   0  /* preinv always */
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           44
+#define BMOD_1_TO_MOD_1_THRESHOLD           41
  
-#define MUL_TOOM22_THRESHOLD                34
-#define MUL_TOOM33_THRESHOLD               121
-#define MUL_TOOM44_THRESHOLD               191
-#define MUL_TOOM6H_THRESHOLD               366
-#define MUL_TOOM8H_THRESHOLD               547
+#define MUL_TOOM22_THRESHOLD                36
+#define MUL_TOOM33_THRESHOLD               125
+#define MUL_TOOM44_THRESHOLD               193
+#define MUL_TOOM6H_THRESHOLD               303
+#define MUL_TOOM8H_THRESHOLD               418
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     129
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     191
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     117
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     137
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     125
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     176
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     129
  
-#define SQR_BASECASE_THRESHOLD              13
+#define SQR_BASECASE_THRESHOLD              12
  #define SQR_TOOM2_THRESHOLD                 78
-#define SQR_TOOM3_THRESHOLD                141
+#define SQR_TOOM3_THRESHOLD                137
  #define SQR_TOOM4_THRESHOLD                212
-#define SQR_TOOM6_THRESHOLD                330
+#define SQR_TOOM6_THRESHOLD                306
  #define SQR_TOOM8_THRESHOLD                422
  
-#define MULMOD_BNM1_THRESHOLD               21
-#define SQRMOD_BNM1_THRESHOLD               25
+#define MULMOD_BNM1_THRESHOLD               20
+#define SQRMOD_BNM1_THRESHOLD               26
  
-#define MUL_FFT_MODF_THRESHOLD             404  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             436  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    404, 5}, {     21, 6}, {     11, 5}, {     25, 6}, \
-    {     13, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
+  { {    436, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
      {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     21, 6}, {     43, 7}, {     29, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     49, 8}, {     27, 7}, {     55, 9}, \
-    {     15, 8}, {     31, 7}, {     63, 8}, {     43, 9}, \
-    {     23, 8}, {     55, 9}, {     31, 8}, {     71, 9}, \
-    {     39, 8}, {     83, 9}, {     47, 8}, {     99, 9}, \
-    {     55,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     79,10}, {     47, 9}, {    103,11}, {     31,10}, \
-    {     63, 9}, {    135,10}, {     95, 9}, {    191,10}, \
-    {    111,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    143, 9}, {    287,10}, {    159, 9}, {    319,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
-    {     63,11}, {    127,10}, {    287,11}, {    159,10}, \
-    {    351,11}, {    191,10}, {    415,11}, {    223,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    319,10}, \
-    {    639,11}, {    351,12}, {    191,11}, {    415,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 79
+    {     39, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {    256, 9}, {    512,10}, {   1024,11}, {   2048,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 28
  #define MUL_FFT_THRESHOLD                 5760
  
-#define SQR_FFT_MODF_THRESHOLD             400  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             404  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    400, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     25, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
-    {     32, 7}, {     19, 6}, {     39, 7}, {     29, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 7}, {     55, 9}, \
-    {     15, 8}, {     39, 9}, {     23, 8}, {     55,10}, \
-    {     15, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    159, 8}, {    319,10}, {     95, 9}, {    191,10}, \
-    {    111,11}, {     63,10}, {    127, 9}, {    271,10}, \
-    {    143, 9}, {    303,10}, {    159,11}, {     95,10}, \
-    {    191, 9}, {    383,10}, {    207,12}, {     63,11}, \
-    {    127,10}, {    303,11}, {    159,10}, {    367,11}, \
-    {    191,10}, {    415,11}, {    223,10}, {    447,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    287,10}, \
-    {    607,11}, {    319,10}, {    639,11}, {    351,12}, \
-    {    191,11}, {    447,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 77
-#define SQR_FFT_THRESHOLD                 3136
+  { {    404, 5}, {     13, 4}, {     27, 5}, {     27, 6}, \
+    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
+    {     35, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {    512,10}, \
+    {   1024,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 26
+#define SQR_FFT_THRESHOLD                 3776
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 120
-#define MULLO_MUL_N_THRESHOLD            11317
-
-#define DC_DIV_QR_THRESHOLD                134
-#define DC_DIVAPPR_Q_THRESHOLD             442
-#define DC_BDIV_QR_THRESHOLD               127
-#define DC_BDIV_Q_THRESHOLD                296
-
-#define INV_MULMOD_BNM1_THRESHOLD           66
-#define INV_NEWTON_THRESHOLD               458
-#define INV_APPR_THRESHOLD                 454
-
-#define BINV_NEWTON_THRESHOLD              494
-#define REDC_1_TO_REDC_N_THRESHOLD         116
-
-#define MU_DIV_QR_THRESHOLD               2914
-#define MU_DIVAPPR_Q_THRESHOLD            3091
-#define MUPI_DIV_QR_THRESHOLD              221
-#define MU_BDIV_QR_THRESHOLD              2259
-#define MU_BDIV_Q_THRESHOLD               2747
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     109
-#define GCD_DC_THRESHOLD                   697
-#define GCDEXT_DC_THRESHOLD                535
+#define MULLO_DC_THRESHOLD                 137
+#define MULLO_MUL_N_THRESHOLD            11479
+
+#define DC_DIV_QR_THRESHOLD                150
+#define DC_DIVAPPR_Q_THRESHOLD             494
+#define DC_BDIV_QR_THRESHOLD               148
+#define DC_BDIV_Q_THRESHOLD                345
+
+#define INV_MULMOD_BNM1_THRESHOLD           70
+#define INV_NEWTON_THRESHOLD               474
+#define INV_APPR_THRESHOLD                 478
+
+#define BINV_NEWTON_THRESHOLD              542
+#define REDC_1_TO_REDC_N_THRESHOLD         117
+
+#define MU_DIV_QR_THRESHOLD               2089
+#define MU_DIVAPPR_Q_THRESHOLD            2172
+#define MUPI_DIV_QR_THRESHOLD              225
+#define MU_BDIV_QR_THRESHOLD              1528
+#define MU_BDIV_Q_THRESHOLD               2089
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     197
+#define GCD_DC_THRESHOLD                   902
+#define GCDEXT_DC_THRESHOLD                650
  #define JACOBI_BASE_METHOD                   2
  
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        29
-#define SET_STR_DC_THRESHOLD               321
-#define SET_STR_PRECOMPUTE_THRESHOLD      1037
+#define GET_STR_DC_THRESHOLD                20
+#define GET_STR_PRECOMPUTE_THRESHOLD        39
+#define SET_STR_DC_THRESHOLD              1045
+#define SET_STR_PRECOMPUTE_THRESHOLD      2147
diff --git a/mpn/arm/invert_limb.asm b/mpn/arm/invert_limb.asm

index bbc9b9a60e69c5232f1964c73af61d59c1708ee9..d717404f9a56e38d1538ed59763ba869dfa2eb25 100644 (file)
--- a/mpn/arm/invert_limb.asm
+++ b/mpn/arm/invert_limb.asm
@@ -1,6 +1,6 @@
  dnl  ARM mpn_invert_limb -- Invert a normalized limb.
  
-dnl  Copyright 2001, 2009, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2009, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,8 +21,7 @@ include(`../config.m4')
  
  ASM_START()
  PROLOGUE(mpn_invert_limb)
-       ldr     r2, L(4)
-L(2):  add     r2, pc, r2
+       LEA(    r2, approx_tab-512)
         mov     r3, r0, lsr #23
         mov     r3, r3, asl #1
         ldrh    r3, [r3, r2]
@@ -43,9 +42,6 @@ L(2): add     r2, pc, r2
         adc     r3, r3, r0
         rsb     r0, r3, r2
         bx      lr
-
-       ALIGN(4)
-L(4):  .word   approx_tab-8-512-L(2)
  EPILOGUE()
  
         .section .rodata
diff --git a/mpn/arm/logops_n.asm b/mpn/arm/logops_n.asm

new file mode 100644 (file)

index 0000000..6326ca9
--- /dev/null
+++ b/mpn/arm/logops_n.asm
@@ -0,0 +1,127 @@
+dnl  ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb             cycles/limb
+C          and andn ior xor         nand iorn nior xnor
+C StrongARM     ?                       ?
+C XScale        ?                       ?
+C Cortex-A8     ?                       ?
+C Cortex-A9    2.5-2.72                2.75-3
+C Cortex-A15    ?                       ?
+
+C TODO
+C  * It seems that 2.25 c/l and 2.75 c/l is possible for A9.
+C  * Debug popping issue, see comment below.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+define(`POSTOP')
+
+ifdef(`OPERATION_and_n',`
+  define(`func',    `mpn_and_n')
+  define(`LOGOP',   `and       $1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',    `mpn_andn_n')
+  define(`LOGOP',   `bic       $1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',    `mpn_nand_n')
+  define(`POSTOP',  `mvn       $1, $1')
+  define(`LOGOP',   `and       $1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',    `mpn_ior_n')
+  define(`LOGOP',   `orr       $1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',    `mpn_iorn_n')
+  define(`POSTOP',  `mvn       $1, $1')
+  define(`LOGOP',   `bic       $1, $3, $2')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',    `mpn_nior_n')
+  define(`POSTOP',  `mvn       $1, $1')
+  define(`LOGOP',   `orr       $1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',    `mpn_xor_n')
+  define(`LOGOP',   `eor       $1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',    `mpn_xnor_n')
+  define(`POSTOP',  `mvn       $1, $1')
+  define(`LOGOP',   `eor       $1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+       push    { r8, r9, r10 }
+       tst     n, #1
+       beq     L(skip1)
+       ldr     r10, [vp], #4
+       ldr     r12, [up], #4
+       LOGOP(  r12, r12, r10)
+       POSTOP( r12)
+       str     r12, [rp], #4
+L(skip1):
+       tst     n, #2
+       beq     L(skip2)
+       ldmia   vp!, { r10, r12 }
+       ldmia   up!, { r8, r9 }
+       LOGOP(  r8, r8, r10)
+       LOGOP(  r9, r9, r12)
+       POSTOP( r8)
+       POSTOP( r9)
+       stmia   rp!, { r8, r9 }
+L(skip2):
+       bics    n, n, #3
+       beq     L(rtn)
+       push    { r4, r5, r6, r7 }
+
+       ldmia   vp!, { r8, r9, r10, r12 }
+       b       L(mid)
+
+L(top):        ldmia   vp!, { r8, r9, r10, r12 }
+       POSTOP( r4)
+       POSTOP( r5)
+       POSTOP( r6)
+       POSTOP( r7)
+       stmia   rp!, { r4, r5, r6, r7 }
+L(mid):        sub     n, n, #4
+       ldmia   up!, { r4, r5, r6, r7 }
+       teq     n, #0
+       LOGOP(  r4, r4, r8)
+       LOGOP(  r5, r5, r9)
+       LOGOP(  r6, r6, r10)
+       LOGOP(  r7, r7, r12)
+       bne     L(top)
+
+       POSTOP( r4)
+       POSTOP( r5)
+       POSTOP( r6)
+       POSTOP( r7)
+       stmia   rp!, { r4, r5, r6, r7 }
+
+       pop     { r4, r5, r6, r7 }      C popping r8-r10 here strangely fails
+
+L(rtn):        pop     { r8, r9, r10 }
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/lshift.asm b/mpn/arm/lshift.asm

new file mode 100644 (file)

index 0000000..e072d9d
--- /dev/null
+++ b/mpn/arm/lshift.asm
@@ -0,0 +1,76 @@
+dnl  ARM mpn_lshift.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     3.5
+C Cortex-A15    ?
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       add     up, up, n, lsl #2
+       push    {r4, r6, r7, r8}
+       ldr     r4, [up, #-4]!
+       add     rp, rp, n, lsl #2
+       rsb     tnc, cnt, #32
+
+       lsl     r7, r4, cnt
+       tst     n, #1
+       beq     L(evn)                  C n even
+
+L(odd):        subs    n, n, #2
+       bcc     L(1)                    C n = 1
+       ldr     r8, [up, #-4]!
+       b       L(mid)
+
+L(evn):        ldr     r6, [up, #-4]!
+       subs    n, n, #2
+       beq     L(end)
+
+L(top):        ldr     r8, [up, #-4]!
+       orr     r7, r7, r6, lsr tnc
+       str     r7, [rp, #-4]!
+       lsl     r7, r6, cnt
+L(mid):        ldr     r6, [up, #-4]!
+       orr     r7, r7, r8, lsr tnc
+       str     r7, [rp, #-4]!
+       lsl     r7, r8, cnt
+       subs    n, n, #2
+       bgt     L(top)
+
+L(end):        orr     r7, r7, r6, lsr tnc
+       str     r7, [rp, #-4]!
+       lsl     r7, r6, cnt
+L(1):  str     r7, [rp, #-4]
+       lsr     r0, r4, tnc
+       pop     {r4, r6, r7, r8}
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/lshiftc.asm b/mpn/arm/lshiftc.asm

new file mode 100644 (file)

index 0000000..f82de0c
--- /dev/null
+++ b/mpn/arm/lshiftc.asm
@@ -0,0 +1,83 @@
+dnl  ARM mpn_lshiftc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     4.0
+C Cortex-A15    ?
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+       add     up, up, n, lsl #2
+       push    {r4, r6, r7, r8}
+       ldr     r4, [up, #-4]!
+       add     rp, rp, n, lsl #2
+       rsb     tnc, cnt, #32
+       mvn     r6, r4
+
+       lsl     r7, r6, cnt
+       tst     n, #1
+       beq     L(evn)                  C n even
+
+L(odd):        subs    n, n, #2
+       bcc     L(1)                    C n = 1
+       ldr     r8, [up, #-4]!
+       mvn     r8, r8
+       b       L(mid)
+
+L(evn):        ldr     r6, [up, #-4]!
+       mvn     r6, r6
+       subs    n, n, #2
+       beq     L(end)
+
+L(top):        ldr     r8, [up, #-4]!
+       orr     r7, r7, r6, lsr tnc
+       str     r7, [rp, #-4]!
+       mvn     r8, r8
+       lsl     r7, r6, cnt
+L(mid):        ldr     r6, [up, #-4]!
+       orr     r7, r7, r8, lsr tnc
+       str     r7, [rp, #-4]!
+       mvn     r6, r6
+       lsl     r7, r8, cnt
+       subs    n, n, #2
+       bgt     L(top)
+
+L(end):        orr     r7, r7, r6, lsr tnc
+       str     r7, [rp, #-4]!
+       lsl     r7, r6, cnt
+L(1):  mvn     r6, #0
+       orr     r7, r7, r6, lsr tnc
+       str     r7, [rp, #-4]
+       lsr     r0, r4, tnc
+       pop     {r4, r6, r7, r8}
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/mod_34lsub1.asm b/mpn/arm/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..4643699
--- /dev/null
+++ b/mpn/arm/mod_34lsub1.asm
@@ -0,0 +1,109 @@
+dnl  ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     1.33
+C Cortex-A15    ?
+
+define(`ap',   r0)
+define(`n',    r1)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * Write cleverer summation code.
+C  * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l.
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+       push    { r4, r5, r6, r7 }
+
+       subs    n, n, #3
+       mov     r7, #0
+       blt     L(le2)                  C n <= 2
+
+       ldmia   ap!, { r2, r3, r12 }
+       subs    n, n, #3
+       blt     L(sum)                  C n <= 5
+       adds    r0, r0, #0              C clear carry
+       sub     n, n, #3
+       b       L(mid)
+
+L(top):        adcs    r2, r2, r4
+       adcs    r3, r3, r5
+       adcs    r12, r12, r6
+L(mid):        ldmia   ap!, { r4, r5, r6 }
+       tst     n, n
+       sub     n, n, #3
+       bpl     L(top)
+
+       add     n, n, #3
+
+       adcs    r2, r2, r4
+       adcs    r3, r3, r5
+       adcs    r12, r12, r6
+       movcs   r7, #1                  C r7 <= 1
+
+L(sum):        cmn     n, #2
+       movlo   r4, #0
+       ldrhs   r4, [ap], #4
+       movls   r5, #0
+       ldrhi   r5, [ap], #4
+
+       adds    r2, r2, r4
+       adcs    r3, r3, r5
+       adcs    r12, r12, #0
+       adc     r7, r7, #0              C r7 <= 2
+
+L(sum2):
+       bic     r0, r2, #0xff000000
+       add     r0, r0, r2, lsr #24
+       add     r0, r0, r7
+
+       lsl     r7, r3, #8
+       bic     r1, r7, #0xff000000
+       add     r0, r0, r1
+       add     r0, r0, r3, lsr #16
+
+       lsl     r7, r12, #16
+       bic     r1, r7, #0xff000000
+       add     r0, r0, r1
+       add     r0, r0, r12, lsr #8
+
+       pop     { r4, r5, r6, r7 }
+       bx      lr
+
+L(le2):        cmn     n, #1
+       bne     L(1)
+       ldmia   ap!, { r2, r3 }
+       mov     r12, #0
+       b       L(sum2)
+L(1):  ldr     r2, [ap]
+       bic     r0, r2, #0xff000000
+       add     r0, r0, r2, lsr #24
+       pop     { r4, r5, r6, r7 }
+       bx      lr
+EPILOGUE()
diff --git a/mpn/arm/mode1o.asm b/mpn/arm/mode1o.asm

new file mode 100644 (file)

index 0000000..e85f7f2
--- /dev/null
+++ b/mpn/arm/mode1o.asm
@@ -0,0 +1,72 @@
+dnl  ARM mpn_modexact_1c_odd
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9    10
+C Cortex-A15    ?
+
+define(`up', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cy', `r3')
+
+       .protected      binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+       stmfd   sp!, {r4, r5}
+
+       LEA(    r4, binvert_limb_table)
+
+       ldr     r5, [up], #4            C up[0]
+
+       and     r12, d, #254
+       ldrb    r4, [r4, r12, lsr #1]
+       mul     r12, r4, r4
+       mul     r12, d, r12
+       rsb     r12, r12, r4, asl #1
+       mul     r4, r12, r12
+       mul     r4, d, r4
+       rsb     r4, r4, r12, asl #1     C r4 = inverse
+
+       subs    n, n, #1                C set carry as side-effect
+       beq     L(end)
+
+L(top):        sbcs    cy, r5, cy
+       ldr     r5, [up], #4
+       sub     n, n, #1
+       mul     r12, r4, cy
+       tst     n, n
+       umull   r12, cy, d, r12
+       bne     L(top)
+
+L(end):        sbcs    cy, r5, cy
+       mul     r12, r4, cy
+       umull   r12, r0, d, r12
+       addcc   r0, r0, #1
+
+       ldmfd   sp!, {r4, r5}
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/mul_1.asm b/mpn/arm/mul_1.asm

index e86735188180b6396fa1fd4f25183c6e33bfcf1e..f4b6bf984df224a91fb84b7752519749fe8a493a 100644 (file)
--- a/mpn/arm/mul_1.asm
+++ b/mpn/arm/mul_1.asm
@@ -2,7 +2,7 @@ dnl  ARM mpn_mul_1 -- Multiply a limb vector with a limb and store the result
  dnl  in a second limb vector.
  dnl  Contributed by Robert Harley.
  
-dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+dnl  Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,9 +21,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C            cycles/limb
-C StrongARM:     6-8  (dependent on vl value)
-C XScale:        ?-?
+C           cycles/limb
+C StrongARM    6-8
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     4.75
+C Cortex-A15    ?
  
  C We should rewrite this along the lines of addmul_1.asm.  That should save a
  C cycle on StrongARM, and several cycles on XScale.
@@ -54,10 +57,10 @@ L(skip1):
         stmia   rp!, { r8, r9 }
  L(skip2):
         bics    n, n, #3
-       beq     L(return)
+       beq     L(rtn)
         stmfd   sp!, { r6, r7 }
-L(loop):
-       mov     r6, r12
+
+L(top):        mov     r6, r12
         ldmia   up!, { r8, r9, r12, lr }
         ldr     r7, [rp, #12]                   C cache allocate
         mov     r7, #0
@@ -70,9 +73,10 @@ L(loop):
         umlal   r9, r12, lr, vl
         subs    n, n, #4
         stmia   rp!, { r6, r7, r8, r9 }
-       bne     L(loop)
+       bne     L(top)
+
         ldmfd   sp!, { r6, r7 }
-L(return):
-       mov     r0, r12
+
+L(rtn):        mov     r0, r12
         ldmfd   sp!, { r8, r9, pc }
-EPILOGUE(mpn_mul_1)
+EPILOGUE()
diff --git a/mpn/arm/rsh1aors_n.asm b/mpn/arm/rsh1aors_n.asm

new file mode 100644 (file)

index 0000000..af952a9
--- /dev/null
+++ b/mpn/arm/rsh1aors_n.asm
@@ -0,0 +1,112 @@
+dnl  ARM mpn_rsh1add_n and mpn_rsh1sub_n.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9    3.64-3.7
+C Cortex-A15    ?
+
+C TODO
+C  * Not optimised.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n',  `r3')
+
+ifdef(`OPERATION_rsh1add_n', `
+  define(`ADDSUB',     adds)
+  define(`ADDSUBC',    adcs)
+  define(`RSTCY',      `cmn    $1, $1')
+  define(`func',       mpn_rsh1add_n)
+  define(`func_nc',    mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+  define(`ADDSUB',     subs)
+  define(`ADDSUBC',    sbcs)
+  define(`RSTCY',
+       `mvn    $2, #0x80000000
+       cmp     $2, $1')
+  define(`func',       mpn_rsh1sub_n)
+  define(`func_nc',    mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func)
+       push    {r4-r11}
+       ldr     r4, [up], #4
+       ldr     r8, [vp], #4
+       ADDSUB  r4, r4, r8
+       rrxs    r12, r7
+       and     r11, r4, #1     C return value
+       subs    n, n, #4
+       blo     L(end)
+
+L(top):        ldmia   up!, {r5,r6,r7}
+       ldmia   vp!, {r8,r9,r10}
+       cmn     r12, r12
+       ADDSUBC r5, r5, r8
+       ADDSUBC r6, r6, r9
+       ADDSUBC r7, r7, r10
+       rrxs    r12, r7
+       rrxs    r6, r6
+       rrxs    r5, r5
+       rrxs    r4, r4
+       subs    n, n, #3
+       stmia   rp!, {r4,r5,r6}
+       mov     r4, r7
+       bhs     L(top)
+
+L(end):        cmn     n, #2
+       bls     L(e2)
+       ldm     up, {r5,r6}
+       ldm     vp, {r8,r9}
+       cmn     r12, r12
+       ADDSUBC r5, r5, r8
+       ADDSUBC r6, r6, r9
+       rrxs    r12, r6
+       rrxs    r5, r5
+       rrxs    r4, r4
+       stmia   rp!, {r4,r5}
+       mov     r4, r6
+       b       L(e1)
+
+L(e2): bne     L(e1)
+       ldr     r5, [up, #0]
+       ldr     r8, [vp, #0]
+       cmn     r12, r12
+       ADDSUBC r5, r5, r8
+       rrxs    r12, r5
+       rrxs    r4, r4
+       str     r4, [rp], #4
+       mov     r4, r5
+
+L(e1): RSTCY(  r12, r1)
+       rrxs    r4, r4
+       str     r4, [rp, #0]
+       mov     r0, r11
+       pop     {r4-r11}
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/rshift.asm b/mpn/arm/rshift.asm

new file mode 100644 (file)

index 0000000..2fe127c
--- /dev/null
+++ b/mpn/arm/rshift.asm
@@ -0,0 +1,74 @@
+dnl  ARM mpn_rshift.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     3.5
+C Cortex-A15    ?
+
+define(`rp',  `r0')
+define(`up',  `r1')
+define(`n',   `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       push    {r4, r6, r7, r8}
+       ldr     r4, [up]
+       rsb     tnc, cnt, #32
+
+       lsr     r7, r4, cnt
+       tst     n, #1
+       beq     L(evn)                  C n even
+
+L(odd):        subs    n, n, #2
+       bcc     L(1)                    C n = 1
+       ldr     r8, [up, #4]!
+       b       L(mid)
+
+L(evn):        ldr     r6, [up, #4]!
+       subs    n, n, #2
+       beq     L(end)
+
+L(top):        ldr     r8, [up, #4]!
+       orr     r7, r7, r6, lsl tnc
+       str     r7, [rp], #4
+       lsr     r7, r6, cnt
+L(mid):        ldr     r6, [up, #4]!
+       orr     r7, r7, r8, lsl tnc
+       str     r7, [rp], #4
+       lsr     r7, r8, cnt
+       subs    n, n, #2
+       bgt     L(top)
+
+L(end):        orr     r7, r7, r6, lsl tnc
+       str     r7, [rp], #4
+       lsr     r7, r6, cnt
+L(1):  str     r7, [rp], #4
+       lsl     r0, r4, tnc
+       pop     {r4, r6, r7, r8}
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/sub_n.asm b/mpn/arm/sub_n.asm

deleted file mode 100644 (file)

index 7063be4..0000000
--- a/mpn/arm/sub_n.asm
+++ /dev/null
@@ -1,71 +0,0 @@
-dnl  ARM mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
-dnl  store difference in a third limb vector.
-dnl  Contributed by Robert Harley.
-
-dnl  Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C This code runs at 5 cycles/limb.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`vp',`r2')
-define(`n',`r3')
-
-
-ASM_START()
-PROLOGUE(mpn_sub_n)
-       stmfd   sp!, { r8, r9, lr }
-       subs    r12, r12, r12
-       tst     n, #1
-       beq     L(skip1)
-       ldr     r12, [up], #4
-       ldr     lr, [vp], #4
-       subs    r12, r12, lr
-       str     r12, [rp], #4
-L(skip1):
-       tst     n, #2
-       beq     L(skip2)
-       ldmia   up!, { r8, r9 }
-       ldmia   vp!, { r12, lr }
-       sbcs    r8, r8, r12
-       sbcs    r9, r9, lr
-       stmia   rp!, { r8, r9 }
-L(skip2):
-       bics    n, n, #3
-       beq     L(return)
-       stmfd   sp!, { r4, r5, r6, r7 }
-L(sub_n_loop):
-       ldmia   up!, { r4, r5, r6, r7 }
-       ldmia   vp!, { r8, r9, r12, lr }
-       sbcs    r4, r4, r8
-       ldr     r8, [rp, #12]                   C cache allocate
-       sbcs    r5, r5, r9
-       sbcs    r6, r6, r12
-       sbcs    r7, r7, lr
-       stmia   rp!, { r4, r5, r6, r7 }
-       sub     n, n, #4
-       teq     n, #0
-       bne     L(sub_n_loop)
-       ldmfd   sp!, { r4, r5, r6, r7 }
-L(return):
-       sbc     r0, r0, r0
-       and     r0, r0, #1
-       ldmfd   sp!, { r8, r9, pc }
-EPILOGUE(mpn_sub_n)
diff --git a/mpn/arm/submul_1.asm b/mpn/arm/submul_1.asm

deleted file mode 100644 (file)

index c365437..0000000
--- a/mpn/arm/submul_1.asm
+++ /dev/null
@@ -1,107 +0,0 @@
-dnl  ARM mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
-dnl  result from a second limb vector.
-
-dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C StrongARM:  7.75-9.75  (dependent on vl value)
-C XScale:        8-9     (dependent on vl value, estimated)
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n',`r2')
-define(`vl',`r3')
-define(`rl',`r12')
-define(`ul',`r6')
-define(`r',`lr')
-
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
-       stmfd   sp!, { r4-r6, lr }
-       subs    r4, r0, r0              C clear r4, set cy
-       tst     n, #1
-       beq     L(skip1)
-       ldr     ul, [up], #4
-       ldr     rl, [rp, #0]
-       umull   r5, r4, ul, vl
-       subs    r, rl, r5
-       str     r, [rp], #4
-L(skip1):
-       tst     n, #2
-       beq     L(skip2)
-       ldr     ul, [up], #4
-       ldr     rl, [rp, #0]
-       mov     r5, #0
-       umlal   r4, r5, ul, vl
-       ldr     ul, [up], #4
-       sbcs    r, rl, r4
-       ldr     rl, [rp, #4]
-       mov     r4, #0
-       umlal   r5, r4, ul, vl
-       str     r, [rp], #4
-       sbcs    r, rl, r5
-       str     r, [rp], #4
-L(skip2):
-       bics    r, n, #3
-       beq     L(return)
-
-       ldr     ul, [up], #4
-       ldr     rl, [rp, #0]
-       mov     r5, #0
-       umlal   r4, r5, ul, vl
-       b       L(in)
-
-L(loop):
-       ldr     ul, [up], #4
-       sbcs    r, rl, r5
-       ldr     rl, [rp, #4]
-       mov     r5, #0
-       umlal   r4, r5, ul, vl
-       str     r, [rp], #4
-L(in): ldr     ul, [up], #4
-       sbcs    r, rl, r4
-       ldr     rl, [rp, #4]
-       mov     r4, #0
-       umlal   r5, r4, ul, vl
-       str     r, [rp], #4
-       ldr     ul, [up], #4
-       sbcs    r, rl, r5
-       ldr     rl, [rp, #4]
-       mov     r5, #0
-       umlal   r4, r5, ul, vl
-       str     r, [rp], #4
-       ldr     ul, [up], #4
-       sbcs    r, rl, r4
-       ldr     rl, [rp, #4]
-       mov     r4, #0
-       umlal   r5, r4, ul, vl
-       str     r, [rp], #4
-       sub     n, n, #4
-       bics    r, n, #3
-       bne     L(loop)
-
-       sbcs    r, rl, r5
-       str     r, [rp], #4
-L(return):
-       sbc     r0, r0, r0
-       sub     r0, r4, r0
-       ldmfd   sp!, { r4-r6, pc }
-EPILOGUE(mpn_submul_1)
diff --git a/mpn/arm/udiv.asm b/mpn/arm/udiv.asm

index 9434a4f2b6a018245352f7ffe6499ce78d6f01f0..5cdf9281565bcfca832d0954cde60903294aea74 100644 (file)
--- a/mpn/arm/udiv.asm
+++ b/mpn/arm/udiv.asm
@@ -1,7 +1,7 @@
  dnl  ARM mpn_udiv_qrnnd -- divide a two limb dividend and a one limb divisor.
  dnl  Return quotient and store remainder through a supplied pointer.
  
-dnl  Copyright 2001 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -48,9 +48,9 @@ L(oop):       divstep(n1,n0,d)
         teq     r12, #0
         bne     L(oop)
  
-       str     n1, [ rem_ptr ]         C store remainder
+       str     n1, [rem_ptr]           C store remainder
         adc     r0, n0, n0              C quotient: add last carry from divstep
-       mov     pc, lr
+       bx      lr
  
  L(_large_divisor):
         stmfd   sp!, { r8, lr }
@@ -87,7 +87,7 @@ L(oop2):
         addcs   n0, n0, #1              C adjust quotient
  
  L(_even_divisor):
-       str     n1, [ rem_ptr ]         C store remainder
+       str     n1, [rem_ptr]           C store remainder
         mov     r0, n0                  C quotient
         ldmfd   sp!, { r8, pc }
  EPILOGUE(mpn_udiv_qrnnd)
diff --git a/mpn/arm/v5/gcd_1.asm b/mpn/arm/v5/gcd_1.asm

new file mode 100644 (file)

index 0000000..d0aa966
--- /dev/null
+++ b/mpn/arm/v5/gcd_1.asm
@@ -0,0 +1,109 @@
+dnl  ARM v5 mpn_gcd_1.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/bit (approx)
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     5.9
+C Cortex-A15    ?
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+C TODO
+C  * Optimise inner-loop better.
+
+C Threshold of when to call bmod when U is one limb.  Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 6)
+
+C INPUT PARAMETERS
+define(`up',    `r0')
+define(`n',     `r1')
+define(`v0',    `r2')
+
+ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',,
+  `define(`BMOD_1_TO_MOD_1_THRESHOLD',0xffffffff)')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+       push    {r4, r7, lr}
+       ldr     r3, [up]        C U low limb
+
+       orr     r3, r3, v0
+       rsb     r4, r3, #0
+       and     r4, r4, r3
+       clz     r4, r4          C min(ctz(u0),ctz(v0))
+       rsb     r4, r4, #31
+
+       rsb     r12, v0, #0
+       and     r12, r12, v0
+       clz     r12, r12
+       rsb     r12, r12, #31
+       lsr     v0, v0, r12
+
+       mov     r7, v0
+
+       cmp     n, #1
+       bne     L(nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+       ldr     r3, [up]
+       cmp     v0, r3, lsr #BMOD_THRES_LOG2
+       bhi     L(red1)
+
+L(bmod):mov    r3, #0          C carry argument
+       bl      mpn_modexact_1c_odd
+       b       L(red0)
+
+L(nby1):cmp    n, #BMOD_1_TO_MOD_1_THRESHOLD
+       blo     L(bmod)
+
+       bl      mpn_mod_1
+
+L(red0):mov    r3, r0
+L(red1):rsbs   r12, r3, #0
+       and     r12, r12, r3
+       clz     r12, r12
+       rsb     r12, r12, #31
+       bne     L(mid)
+       b       L(end)
+
+       ALIGN(8)
+L(top):        rsb     r12, r12, #31
+       movcc   r3, r1          C if x-y < 0
+       movcc   r7, r0          C use x,y-x
+L(mid):        lsr     r3, r3, r12     C
+       mov     r0, r3          C
+       sub     r1, r7, r3      C
+       rsbs    r3, r7, r3      C
+       and     r12, r1, r3     C
+       clz     r12, r12        C
+       bne     L(top)          C
+
+L(end):        lsl     r0, r7, r4
+       pop     {r4, r7, pc}
+EPILOGUE()
diff --git a/mpn/arm/v5/mod_1_1.asm b/mpn/arm/v5/mod_1_1.asm

new file mode 100644 (file)

index 0000000..824f13f
--- /dev/null
+++ b/mpn/arm/v5/mod_1_1.asm
@@ -0,0 +1,117 @@
+dnl  ARM mpn_mod_1_1p
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     7
+C Cortex-A15    6
+
+define(`ap', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1_1p)
+       push    {r4-r10}
+       add     r0, r0, r1, asl #2
+       ldr     r5, [r0, #-4]!
+       ldr     r12, [r0, #-4]!
+       subs    r1, r1, #2
+       ble     L(4)
+       ldr     r8, [r3, #12]
+       mov     r4, r12
+       mov     r10, r5
+       umull   r7, r5, r10, r8
+       sub     r1, r1, #1
+       b       L(mid)
+
+L(top):        adds    r12, r6, r7
+       adcs    r10, r4, r5
+       sub     r1, r1, #1
+       mov     r6, #0
+       movcs   r6, r8
+       umull   r7, r5, r10, r8
+       adds    r4, r12, r6
+       subcs   r4, r4, r2
+L(mid):        ldr     r6, [r0, #-4]!
+       teq     r1, #0
+       bne     L(top)
+
+       adds    r12, r6, r7
+       adcs    r5, r4, r5
+       subcs   r5, r5, r2
+L(4):  ldr     r1, [r3, #4]
+       cmp     r1, #0
+       beq     L(7)
+       ldr     r4, [r3, #8]
+       umull   r0, r6, r5, r4
+       adds    r12, r0, r12
+       addcs   r6, r6, #1
+       rsb     r0, r1, #32
+       mov     r0, r12, lsr r0
+       orr     r5, r0, r6, asl r1
+       mov     r12, r12, asl r1
+       b       L(8)
+L(7):  cmp     r5, r2
+       subcs   r5, r5, r2
+L(8):  ldr     r0, [r3, #0]
+       umull   r4, r3, r5, r0
+       add     r5, r5, #1
+       adds    r0, r4, r12
+       adc     r5, r3, r5
+       mul     r5, r2, r5
+       sub     r12, r12, r5
+       cmp     r12, r0
+       addhi   r12, r12, r2
+       cmp     r2, r12
+       subls   r12, r12, r2
+       mov     r0, r12, lsr r1
+       pop     {r4-r10}
+       bx      r14
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+       stmfd   sp!, {r4, r5, r6, r14}
+       mov     r5, r0
+       clz     r4, r1
+       mov     r0, r1, asl r4
+       rsb     r6, r0, #0
+       bl      mpn_invert_limb
+       str     r0, [r5, #0]
+       str     r4, [r5, #4]
+       cmp     r4, #0
+       beq     L(2)
+       rsb     r1, r4, #32
+       mov     r3, #1
+       mov     r3, r3, asl r4
+       orr     r3, r3, r0, lsr r1
+       mul     r3, r6, r3
+       mov     r4, r3, lsr r4
+       str     r4, [r5, #8]
+L(2):  mul     r0, r6, r0
+       str     r0, [r5, #12]
+       ldmfd   sp!, {r4, r5, r6, pc}
+EPILOGUE()
diff --git a/mpn/arm/v5/mod_1_2.asm b/mpn/arm/v5/mod_1_2.asm

new file mode 100644 (file)

index 0000000..a41bf35
--- /dev/null
+++ b/mpn/arm/v5/mod_1_2.asm
@@ -0,0 +1,144 @@
+dnl  ARM mpn_mod_1s_2p
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     4.25
+C Cortex-A15    3
+
+define(`ap', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1s_2p)
+       push    {r4-r10}
+       tst     n, #1
+       add     r7, r3, #8
+       ldmia   r7, {r7, r8, r12}       C load B1, B2, B3
+       add     ap, ap, n, lsl #2       C put ap at operand end
+       beq     L(evn)
+
+L(odd):        subs    n, n, #1
+       beq     L(1)
+       ldmdb   ap!, {r4,r6,r9}
+       mov     r10, #0
+       umlal   r4, r10, r6, r7
+       umlal   r4, r10, r9, r8
+       b       L(com)
+
+L(evn):        ldmdb   ap!, {r4,r10}
+L(com):        subs    n, n, #2
+       ble     L(end)
+       ldmdb   ap!, {r5,r6}
+       b       L(mid)
+
+L(top):        mov     r9, #0
+       umlal   r5, r9, r6, r7          C B1
+       umlal   r5, r9, r4, r8          C B2
+       ldmdb   ap!, {r4,r6}
+       umlal   r5, r9, r10, r12        C B3
+       ble     L(xit)
+       mov     r10, #0
+       umlal   r4, r10, r6, r7         C B1
+       umlal   r4, r10, r5, r8         C B2
+       ldmdb   ap!, {r5,r6}
+       umlal   r4, r10, r9, r12        C B3
+L(mid):        subs    n, n, #4
+       bge     L(top)
+
+       mov     r9, #0
+       umlal   r5, r9, r6, r7          C B1
+       umlal   r5, r9, r4, r8          C B2
+       umlal   r5, r9, r10, r12        C B3
+       mov     r4, r5
+
+L(end):        movge      r9, r10              C executed iff coming via xit
+       ldr     r6, [r3, #4]            C cps[1] = cnt
+       mov     r5, #0
+       umlal   r4, r5, r9, r7
+       mov     r7, r5, lsl r6
+L(x):  rsb     r1, r6, #32
+       orr     r8, r7, r4, lsr r1
+       mov     r9, r4, lsl r6
+       ldr     r5, [r3, #0]
+       add     r0, r8, #1
+       umull   r12, r1, r8, r5
+       adds    r4, r12, r9
+       adc     r1, r1, r0
+       mul     r5, r2, r1
+       sub     r9, r9, r5
+       cmp     r9, r4
+       addhi   r9, r9, r2
+       cmp     r2, r9
+       subls   r9, r9, r2
+       mov     r0, r9, lsr r6
+       pop     {r4-r10}
+       bx      r14
+
+L(xit):        mov     r10, #0
+       umlal   r4, r10, r6, r7         C B1
+       umlal   r4, r10, r5, r8         C B2
+       umlal   r4, r10, r9, r12        C B3
+       b       L(end)
+
+L(1):  ldr     r6, [r3, #4]            C cps[1] = cnt
+       ldr     r4, [ap, #-4]           C ap[0]
+       mov     r7, #0
+       b       L(x)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_2p_cps)
+       push    {r4-r8, r14}
+       clz     r4, r1
+       mov     r5, r1, lsl r4          C b <<= cnt
+       mov     r6, r0                  C r6 = cps
+       mov     r0, r5
+       bl      mpn_invert_limb
+       rsb     r3, r4, #32
+       mov     r3, r0, lsr r3
+       mov     r2, #1
+       orr     r3, r3, r2, lsl r4
+       rsb     r1, r5, #0
+       mul     r2, r1, r3
+       umull   r3, r12, r2, r0
+       add     r12, r2, r12
+       mvn     r12, r12
+       mul     r1, r5, r12
+       cmp     r1, r3
+       addhi   r1, r1, r5
+       umull   r12, r7, r1, r0
+       add     r7, r1, r7
+       mvn     r7, r7
+       mul     r3, r5, r7
+       cmp     r3, r12
+       addhi   r3, r3, r5
+       mov     r5, r2, lsr r4
+       mov     r7, r1, lsr r4
+       mov     r8, r3, lsr r4
+       stmia   r6, {r0,r4,r5,r7,r8}    C fill cps
+       pop     {r4-r8, pc}
+EPILOGUE()
diff --git a/mpn/arm/v6/addmul_1.asm b/mpn/arm/v6/addmul_1.asm

new file mode 100644 (file)

index 0000000..56bfb34
--- /dev/null
+++ b/mpn/arm/v6/addmul_1.asm
@@ -0,0 +1,99 @@
+dnl  ARM mpn_addmul_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM:    -
+C XScale        -
+C Cortex-A8     ?
+C Cortex-A9     3.25
+C Cortex-A15    4
+
+C TODO
+C  * Micro-optimise feed-in code.
+C  * Optimise for n=1,2 by delaying register saving.
+C  * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       stmfd   sp!, { r4, r5, r6, r7 }
+
+       ands    r6, n, #3
+       mov     r12, #0
+       beq     L(fi0)
+       cmp     r6, #2
+       bcc     L(fi1)
+       beq     L(fi2)
+
+L(fi3):        ldr     r4, [up], #4
+       ldr     r6, [rp, #0]
+       ldr     r5, [up], #4
+       b       L(lo3)
+
+L(fi0):        ldr     r5, [up], #4
+       ldr     r7, [rp], #4
+       ldr     r4, [up], #4
+       b       L(lo0)
+
+L(fi1):        ldr     r4, [up], #4
+       ldr     r6, [rp], #8
+       subs    n, n, #1
+       beq     L(1)
+       ldr     r5, [up], #4
+       b       L(lo1)
+
+L(fi2):        ldr     r5, [up], #4
+       ldr     r7, [rp], #12
+       ldr     r4, [up], #4
+       b       L(lo2)
+
+       ALIGN(16)
+L(top):        ldr     r6, [rp, #-8]
+       ldr     r5, [up], #4
+       str     r7, [rp, #-12]
+L(lo1):        umaal   r6, r12, r4, v0
+       ldr     r7, [rp, #-4]
+       ldr     r4, [up], #4
+       str     r6, [rp, #-8]
+L(lo0):        umaal   r7, r12, r5, v0
+       ldr     r6, [rp, #0]
+       ldr     r5, [up], #4
+       str     r7, [rp, #-4]
+L(lo3):        umaal   r6, r12, r4, v0
+       ldr     r7, [rp, #4]
+       ldr     r4, [up], #4
+       str     r6, [rp], #16
+L(lo2):        umaal   r7, r12, r5, v0
+       subs    n, n, #4
+       bhi     L(top)
+
+       ldr     r6, [rp, #-8]
+       str     r7, [rp, #-12]
+L(1):  umaal   r6, r12, r4, v0
+       str     r6, [rp, #-8]
+       mov     r0, r12
+       ldmfd   sp!, { r4, r5, r6, r7 }
+       bx      lr
+EPILOGUE()
diff --git a/mpn/arm/v6/addmul_2.asm b/mpn/arm/v6/addmul_2.asm

new file mode 100644 (file)

index 0000000..dec0798
--- /dev/null
+++ b/mpn/arm/v6/addmul_2.asm
@@ -0,0 +1,119 @@
+dnl  ARM mpn_addmul_2.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM:    -
+C XScale        -
+C Cortex-A8     ?
+C Cortex-A9     2.38
+C Cortex-A15    2.5
+
+C TODO
+C  * Consider using more registers for the r[] loads, allowing better load-use
+C    scheduling for a 6% speedup (on A9).  Free: r10, r11, r14
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`u0',`r3')
+define(`u1',`r9')
+
+define(`cya',`r8')
+define(`cyb',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_2)
+       push    { r4, r5, r6, r7, r8, r9 }
+
+       ldm     vp, { v0, v1 }
+       mov     cya, #0
+       mov     cyb, #0
+
+       tst     n, #1
+       beq     L(evn)
+L(odd):        ldr     r5, [rp, #0]
+       ldr     u0, [up, #0]
+       ldr     r4, [rp, #4]
+       tst     n, #2
+       beq     L(fi1)
+L(fi3):        sub     up, up, #12
+       sub     rp, rp, #16
+       b       L(lo3)
+L(fi1):        sub     n, n, #1
+       sub     up, up, #4
+       sub     rp, rp, #8
+       b       L(lo1)
+L(evn):        ldr     r4, [rp, #0]
+       ldr     u1, [up, #0]
+       ldr     r5, [rp, #4]
+       tst     n, #2
+       bne     L(fi2)
+L(fi0):        sub     up, up, #8
+       sub     rp, rp, #12
+       b       L(lo0)
+L(fi2):        subs    n, n, #2
+       sub     rp, rp, #4
+       bls     L(end)
+
+       ALIGN(16)
+L(top):        ldr     u0, [up, #4]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #4]
+       ldr     r4, [rp, #12]
+       umaal   r5, cyb, u1, v1
+L(lo1):        ldr     u1, [up, #8]
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #8]
+       ldr     r5, [rp, #16]
+       umaal   r4, cyb, u0, v1
+L(lo0):        ldr     u0, [up, #12]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #12]
+       ldr     r4, [rp, #20]
+       umaal   r5, cyb, u1, v1
+L(lo3):        ldr     u1, [up, #16]!
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #16]!
+       ldr     r5, [rp, #8]
+       umaal   r4, cyb, u0, v1
+       subs    n, n, #4
+       bhi     L(top)
+
+L(end):        umaal   r4, cya, u1, v0
+       ldr     u0, [up, #4]
+       umaal   r5, cyb, u1, v1
+       str     r4, [rp, #4]
+       umaal   r5, cya, u0, v0
+       umaal   cya, cyb, u0, v1
+       str     r5, [rp, #8]
+       str     cya, [rp, #12]
+       mov     r0, cyb
+
+       pop     { r4, r5, r6, r7, r8, r9 }
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/v6/gmp-mparam.h b/mpn/arm/v6/gmp-mparam.h

new file mode 100644 (file)

index 0000000..ae29206
--- /dev/null
+++ b/mpn/arm/v6/gmp-mparam.h
@@ -0,0 +1,146 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 700MHz ARM11 (raspberry pi) */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      2
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     29
+#define USE_PREINV_DIVREM_1                  1  /* preinv always */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           33
+
+#define MUL_TOOM22_THRESHOLD                36
+#define MUL_TOOM33_THRESHOLD               117
+#define MUL_TOOM44_THRESHOLD               462
+#define MUL_TOOM6H_THRESHOLD                 0  /* always */
+#define MUL_TOOM8H_THRESHOLD               620
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     130
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     573
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     209
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     209
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     305
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 50
+#define SQR_TOOM3_THRESHOLD                181
+#define SQR_TOOM4_THRESHOLD                686
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
+#define SQR_TOOM8_THRESHOLD                915
+
+#define MULMID_TOOM42_THRESHOLD             72
+
+#define MULMOD_BNM1_THRESHOLD               25
+#define SQRMOD_BNM1_THRESHOLD               30
+
+#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    476, 5}, {     21, 6}, {     11, 5}, {     25, 6}, \
+    {     13, 5}, {     27, 6}, {     25, 7}, {     13, 6}, \
+    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
+    {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
+    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
+    {     71, 9}, {     39, 8}, {     83, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
+    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    143, 9}, {    287,10}, {    159,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 63
+#define MUL_FFT_THRESHOLD                 4736
+
+#define SQR_FFT_MODF_THRESHOLD             464  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    464, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     36, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
+    {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
+    {     35, 7}, {     71, 8}, {     43, 9}, {     23, 8}, \
+    {     55, 9}, {     31, 8}, {     71, 9}, {     39, 8}, \
+    {     83, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95, 9}, {    191,10}, {    111,11}, \
+    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
+    {    287,10}, {    159,11}, {     95,10}, {    191, 9}, \
+    {    383,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 61
+#define SQR_FFT_THRESHOLD                 3776
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  67
+#define MULLO_MUL_N_THRESHOLD             8907
+
+#define DC_DIV_QR_THRESHOLD                 40
+#define DC_DIVAPPR_Q_THRESHOLD             156
+#define DC_BDIV_QR_THRESHOLD                71
+#define DC_BDIV_Q_THRESHOLD                208
+
+#define INV_MULMOD_BNM1_THRESHOLD           70
+#define INV_NEWTON_THRESHOLD               151
+#define INV_APPR_THRESHOLD                 150
+
+#define BINV_NEWTON_THRESHOLD              375
+#define REDC_1_TO_REDC_2_THRESHOLD           5
+#define REDC_2_TO_REDC_N_THRESHOLD         134
+
+#define MU_DIV_QR_THRESHOLD               2130
+#define MU_DIVAPPR_Q_THRESHOLD            2130
+#define MUPI_DIV_QR_THRESHOLD               80
+#define MU_BDIV_QR_THRESHOLD              1787
+#define MU_BDIV_Q_THRESHOLD               2130
+
+#define POWM_SEC_TABLE  7,32,460,1705
+
+#define MATRIX22_STRASSEN_THRESHOLD         19
+#define HGCD_THRESHOLD                      85
+#define HGCD_APPR_THRESHOLD                119
+#define HGCD_REDUCE_THRESHOLD             3389
+#define GCD_DC_THRESHOLD                   333
+#define GCDEXT_DC_THRESHOLD                309
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                21
+#define GET_STR_PRECOMPUTE_THRESHOLD        41
+#define SET_STR_DC_THRESHOLD               527
+#define SET_STR_PRECOMPUTE_THRESHOLD      1323
+
+#define FAC_DSC_THRESHOLD                  414
+#define FAC_ODD_THRESHOLD                  154
diff --git a/mpn/arm/v6/mul_1.asm b/mpn/arm/v6/mul_1.asm

new file mode 100644 (file)

index 0000000..8d0c696
--- /dev/null
+++ b/mpn/arm/v6/mul_1.asm
@@ -0,0 +1,102 @@
+dnl  ARM mpn_mul_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM:    -
+C XScale        -
+C Cortex-A8     ?
+C Cortex-A9     3.25
+C Cortex-A15    ?
+
+C TODO
+C  * Micro-optimise feed-in code.
+C  * Optimise for n=1,2 by delaying register saving.
+C  * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       stmfd   sp!, { r4, r5, r6, r7 }
+
+       ands    r6, n, #3
+       mov     r12, #0
+       beq     L(fi0)
+       cmp     r6, #2
+       bcc     L(fi1)
+       beq     L(fi2)
+
+L(fi3):        ldr     r4, [up], #4
+       mov     r6, #0
+       ldr     r5, [up], #4
+       b       L(lo3)
+
+L(fi0):        ldr     r5, [up], #4
+       add     rp, rp, #4
+       mov     r7, #0
+       ldr     r4, [up], #4
+       b       L(lo0)
+
+L(fi1):        ldr     r4, [up], #4
+       mov     r6, #0
+       add     rp, rp, #8
+       subs    n, n, #1
+       beq     L(1)
+       ldr     r5, [up], #4
+       b       L(lo1)
+
+L(fi2):        ldr     r5, [up], #4
+       add     rp, rp, #12
+       mov     r7, #0
+       ldr     r4, [up], #4
+       b       L(lo2)
+
+       ALIGN(16)
+L(top):        mov     r6, #0
+       ldr     r5, [up], #4
+       str     r7, [rp, #-12]
+L(lo1):        umaal   r6, r12, r4, v0
+       mov     r7, #0
+       ldr     r4, [up], #4
+       str     r6, [rp, #-8]
+L(lo0):        umaal   r7, r12, r5, v0
+       mov     r6, #0
+       ldr     r5, [up], #4
+       str     r7, [rp, #-4]
+L(lo3):        umaal   r6, r12, r4, v0
+       mov     r7, #0
+       ldr     r4, [up], #4
+       str     r6, [rp], #16
+L(lo2):        umaal   r7, r12, r5, v0
+       subs    n, n, #4
+       bhi     L(top)
+
+       mov     r6, #0
+       str     r7, [rp, #-12]
+L(1):  umaal   r6, r12, r4, v0
+       str     r6, [rp, #-8]
+       mov     r0, r12
+       ldmfd   sp!, { r4, r5, r6, r7 }
+       bx      lr
+EPILOGUE()
diff --git a/mpn/arm/v6/mul_2.asm b/mpn/arm/v6/mul_2.asm

new file mode 100644 (file)

index 0000000..3fdb9c6
--- /dev/null
+++ b/mpn/arm/v6/mul_2.asm
@@ -0,0 +1,119 @@
+dnl  ARM mpn_mul_2.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM:    -
+C XScale        -
+C Cortex-A8     ?
+C Cortex-A9     2.25
+C Cortex-A15    ?
+
+C TODO
+C  * This is a trivial edit of the addmul_2 code.  Check for simplifications,
+C    and possible speedups to 2.0 c/l.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`u0',`r3')
+define(`u1',`r9')
+
+define(`cya',`r8')
+define(`cyb',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_mul_2)
+       push    { r4, r5, r6, r7, r8, r9 }
+
+       ldm     vp, { v0, v1 }
+       mov     cya, #0
+       mov     cyb, #0
+
+       tst     n, #1
+       beq     L(evn)
+L(odd):        mov     r5, #0
+       ldr     u0, [up, #0]
+       mov     r4, #0
+       tst     n, #2
+       beq     L(fi1)
+L(fi3):        sub     up, up, #12
+       sub     rp, rp, #16
+       b       L(lo3)
+L(fi1):        sub     n, n, #1
+       sub     up, up, #4
+       sub     rp, rp, #8
+       b       L(lo1)
+L(evn):        mov     r4, #0
+       ldr     u1, [up, #0]
+       mov     r5, #0
+       tst     n, #2
+       bne     L(fi2)
+L(fi0):        sub     up, up, #8
+       sub     rp, rp, #12
+       b       L(lo0)
+L(fi2):        subs    n, n, #2
+       sub     rp, rp, #4
+       bls     L(end)
+
+       ALIGN(16)
+L(top):        ldr     u0, [up, #4]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #4]
+       mov     r4, #0
+       umaal   r5, cyb, u1, v1
+L(lo1):        ldr     u1, [up, #8]
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #8]
+       mov     r5, #0
+       umaal   r4, cyb, u0, v1
+L(lo0):        ldr     u0, [up, #12]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #12]
+       mov     r4, #0
+       umaal   r5, cyb, u1, v1
+L(lo3):        ldr     u1, [up, #16]!
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #16]!
+       mov     r5, #0
+       umaal   r4, cyb, u0, v1
+       subs    n, n, #4
+       bhi     L(top)
+
+L(end):        umaal   r4, cya, u1, v0
+       ldr     u0, [up, #4]
+       umaal   r5, cyb, u1, v1
+       str     r4, [rp, #4]
+       umaal   r5, cya, u0, v0
+       umaal   cya, cyb, u0, v1
+       str     r5, [rp, #8]
+       str     cya, [rp, #12]
+       mov     r0, cyb
+
+       pop     { r4, r5, r6, r7, r8, r9 }
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/v6/sqr_basecase.asm b/mpn/arm/v6/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..e1dc478
--- /dev/null
+++ b/mpn/arm/v6/sqr_basecase.asm
@@ -0,0 +1,507 @@
+dnl  ARM v6 mpn_sqr_basecase.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Code structure:
+C
+C
+C        m_2(0m4)        m_2(2m4)        m_2(1m4)        m_2(3m4)
+C           |               |               |               |
+C           |               |               |               |
+C           |               |               |               |
+C          \|/             \|/             \|/             \|/
+C              ____________                   ____________
+C             /            \                 /            \
+C            \|/            \               \|/            \
+C         am_2(3m4)       am_2(1m4)       am_2(0m4)       am_2(2m4)
+C            \            /|\                \            /|\
+C             \____________/                  \____________/
+C                       \                        /
+C                        \                      /
+C                         \                    /
+C                       tail(0m2)          tail(1m2)
+C                            \              /
+C                             \            /
+C                            sqr_diag_addlsh1
+
+C TODO
+C  * Further tweak counter and updates in outer loops.  (This could save
+C    perhaps 5n cycles).
+C  * Try to use fewer register.  Perhaps coalesce r9 branch target and n_saved.
+C    (This could save 2-3 cycles for n > 4.)
+C  * Optimise sqr_diag_addlsh1 loop.  (This could save O(n) cycles.)
+C  * Implement larger final corners (xit/tix).  Also stop loops earlier
+C    suppressing writes of upper-most rp[] values.  (This could save 10-20
+C    cycles for n > 4.)
+C  * Is the branch really faster than discrete branches?
+
+define(`rp',      r0)
+define(`up',      r1)
+define(`n',       r2)
+
+define(`v0',      r3)
+define(`v1',      r6)
+define(`i',       r8)
+define(`n_saved', r14)
+define(`cya',     r11)
+define(`cyb',     r12)
+define(`u0',      r7)
+define(`u1',      r9)
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+       and     r12, n, #3
+       cmp     n, #4
+       addgt   r12, r12, #4
+       add     pc, pc, r12, lsl #2
+       nop
+       b       L(4)
+       b       L(1)
+       b       L(2)
+       b       L(3)
+       b       L(0m4)
+       b       L(1m4)
+       b       L(2m4)
+       b       L(3m4)
+
+
+L(1m4):        push    {r4-r10,r11,r14}
+       mov     n_saved, n
+       sub     i, n, #4
+       sub     n, n, #2
+       add     r10, pc, #L(am2_2m4)-.-8
+       ldm     up, {v0,v1,u0}
+       sub     up, up, #4
+       mov     cyb, #0
+       mov     r5, #0
+       umull   r4, cya, v1, v0
+       str     r4, [rp], #-12
+       mov     r4, #0
+       b       L(ko0)
+
+L(3m4):        push    {r4-r10,r11,r14}
+       mov     n_saved, n
+       sub     i, n, #4
+       sub     n, n, #2
+       add     r10, pc, #L(am2_0m4)-.-8
+       ldm     up, {v0,v1,u0}
+       add     up, up, #4
+       mov     cyb, #0
+       mov     r5, #0
+       umull   r4, cya, v1, v0
+       str     r4, [rp], #-4
+       mov     r4, #0
+       b       L(ko2)
+
+L(2m4):        push    {r4-r10,r11,r14}
+       mov     n_saved, n
+       sub     i, n, #4
+       sub     n, n, #2
+       add     r10, pc, #L(am2_3m4)-.-8
+       ldm     up, {v0,v1,u1}
+       mov     cyb, #0
+       mov     r4, #0
+       umull   r5, cya, v1, v0
+       str     r5, [rp], #-8
+       mov     r5, #0
+       b       L(ko1)
+
+L(0m4):        push    {r4-r10,r11,r14}
+       mov     n_saved, n
+       sub     i, n, #4
+       sub     n, n, #2
+       add     r10, pc, #L(am2_1m4)-.-8
+       ldm     up, {v0,v1,u1}
+       mov     cyb, #0
+       mov     r4, #0
+       add     up, up, #8
+       umull   r5, cya, v1, v0
+       str     r5, [rp, #0]
+       mov     r5, #0
+
+L(top):        ldr     u0, [up, #4]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #4]
+       mov     r4, #0
+       umaal   r5, cyb, u1, v1
+L(ko2):        ldr     u1, [up, #8]
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #8]
+       mov     r5, #0
+       umaal   r4, cyb, u0, v1
+L(ko1):        ldr     u0, [up, #12]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #12]
+       mov     r4, #0
+       umaal   r5, cyb, u1, v1
+L(ko0):        ldr     u1, [up, #16]!
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #16]!
+       mov     r5, #0
+       umaal   r4, cyb, u0, v1
+       subs    i, i, #4
+       bhi     L(top)
+       bx      r10
+
+L(evnloop):
+       subs    i, n, #4
+       sub     n, n, #2
+       blt     L(tix)
+       ldm     up, {v0,v1,u0}
+       add     up, up, #4
+       mov     cya, #0
+       mov     cyb, #0
+       ldm     rp, {r4,r5}
+       sub     rp, rp, #4
+       umaal   r4, cya, v1, v0
+       str     r4, [rp, #4]
+       ldr     r4, [rp, #12]
+       b       L(lo2)
+L(ua2):        ldr     u0, [up, #4]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #4]
+       ldr     r4, [rp, #12]
+       umaal   r5, cyb, u1, v1
+L(lo2):        ldr     u1, [up, #8]
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #8]
+       ldr     r5, [rp, #16]
+       umaal   r4, cyb, u0, v1
+       ldr     u0, [up, #12]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #12]
+       ldr     r4, [rp, #20]
+       umaal   r5, cyb, u1, v1
+       ldr     u1, [up, #16]!
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #16]!
+       ldr     r5, [rp, #8]
+       umaal   r4, cyb, u0, v1
+       subs    i, i, #4
+       bhi     L(ua2)
+L(am2_0m4):
+       umaal   r4, cya, u1, v0
+       ldr     u0, [up, #4]
+       umaal   r5, cyb, u1, v1
+       str     r4, [rp, #4]
+       umaal   r5, cya, u0, v0
+       umaal   cya, cyb, u0, v1
+       str     r5, [rp, #8]
+       str     cya, [rp, #12]
+       str     cyb, [rp, #16]
+       sub     up, up, n, lsl #2
+       sub     rp, rp, n, lsl #2
+       add     up, up, #8
+       sub     i, n, #4
+       sub     n, n, #2
+       ldm     up, {v0,v1,u0}
+       sub     up, up, #4
+       mov     cya, #0
+       mov     cyb, #0
+       ldr     r4, [rp, #24]
+       ldr     r5, [rp, #28]
+       add     rp, rp, #12
+       umaal   r4, cya, v1, v0
+       str     r4, [rp, #12]
+       ldr     r4, [rp, #20]
+       b       L(lo0)
+L(ua0):        ldr     u0, [up, #4]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #4]
+       ldr     r4, [rp, #12]
+       umaal   r5, cyb, u1, v1
+       ldr     u1, [up, #8]
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #8]
+       ldr     r5, [rp, #16]
+       umaal   r4, cyb, u0, v1
+       ldr     u0, [up, #12]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #12]
+       ldr     r4, [rp, #20]
+       umaal   r5, cyb, u1, v1
+L(lo0):        ldr     u1, [up, #16]!
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #16]!
+       ldr     r5, [rp, #8]
+       umaal   r4, cyb, u0, v1
+       subs    i, i, #4
+       bhi     L(ua0)
+L(am2_2m4):
+       umaal   r4, cya, u1, v0
+       ldr     u0, [up, #4]
+       umaal   r5, cyb, u1, v1
+       str     r4, [rp, #4]
+       umaal   r5, cya, u0, v0
+       umaal   cya, cyb, u0, v1
+       str     r5, [rp, #8]
+       str     cya, [rp, #12]
+       str     cyb, [rp, #16]
+       sub     up, up, n, lsl #2
+       sub     rp, rp, n, lsl #2
+       add     up, up, #8
+       add     rp, rp, #24
+       b       L(evnloop)
+
+
+L(oddloop):
+       subs    i, n, #4
+       sub     n, n, #2
+       blt     L(xit)
+       ldm     up, {v0,v1,u1}
+       mov     cya, #0
+       mov     cyb, #0
+       sub     rp, rp, #8
+       ldr     r5, [rp, #8]
+       ldr     r4, [rp, #12]
+       umaal   r5, cya, v1, v0
+       str     r5, [rp, #8]
+       ldr     r5, [rp, #16]
+       b       L(lo1)
+L(ua1):        ldr     u0, [up, #4]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #4]
+       ldr     r4, [rp, #12]
+       umaal   r5, cyb, u1, v1
+       ldr     u1, [up, #8]
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #8]
+       ldr     r5, [rp, #16]
+       umaal   r4, cyb, u0, v1
+L(lo1):        ldr     u0, [up, #12]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #12]
+       ldr     r4, [rp, #20]
+       umaal   r5, cyb, u1, v1
+       ldr     u1, [up, #16]!
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #16]!
+       ldr     r5, [rp, #8]
+       umaal   r4, cyb, u0, v1
+       subs    i, i, #4
+       bhi     L(ua1)
+L(am2_3m4):
+       umaal   r4, cya, u1, v0
+       ldr     u0, [up, #4]
+       umaal   r5, cyb, u1, v1
+       str     r4, [rp, #4]
+       umaal   r5, cya, u0, v0
+       umaal   cya, cyb, u0, v1
+       str     r5, [rp, #8]
+       str     cya, [rp, #12]
+       str     cyb, [rp, #16]
+       sub     up, up, n, lsl #2
+       sub     rp, rp, n, lsl #2
+       add     up, up, #8
+       add     rp, rp, #24
+       subs    i, n, #4
+       sub     n, n, #2
+       ldm     up, {v0,v1,u1}
+       mov     cya, #0
+       mov     cyb, #0
+       ldr     r5, [rp, #0]
+       ldr     r4, [rp, #4]
+       add     up, up, #8
+       umaal   r5, cya, v1, v0
+       str     r5, [rp, #0]
+       ldr     r5, [rp, #8]
+       bls     L(e3)
+L(ua3):        ldr     u0, [up, #4]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #4]
+       ldr     r4, [rp, #12]
+       umaal   r5, cyb, u1, v1
+       ldr     u1, [up, #8]
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #8]
+       ldr     r5, [rp, #16]
+       umaal   r4, cyb, u0, v1
+       ldr     u0, [up, #12]
+       umaal   r4, cya, u1, v0
+       str     r4, [rp, #12]
+       ldr     r4, [rp, #20]
+       umaal   r5, cyb, u1, v1
+       ldr     u1, [up, #16]!
+       umaal   r5, cya, u0, v0
+       str     r5, [rp, #16]!
+       ldr     r5, [rp, #8]
+       umaal   r4, cyb, u0, v1
+       subs    i, i, #4
+       bhi     L(ua3)
+L(e3):
+L(am2_1m4):
+       umaal   r4, cya, u1, v0
+       ldr     u0, [up, #4]
+       umaal   r5, cyb, u1, v1
+       str     r4, [rp, #4]
+       umaal   r5, cya, u0, v0
+       umaal   cya, cyb, u0, v1
+       str     r5, [rp, #8]
+       str     cya, [rp, #12]
+       str     cyb, [rp, #16]
+       sub     up, up, n, lsl #2
+       sub     rp, rp, n, lsl #2
+       add     up, up, #8
+       add     rp, rp, #24
+       b       L(oddloop)
+
+L(xit):        ldm     up!, {v0,u0}
+       ldr     cya, [rp], #12
+       mov     cyb, #0
+       umaal   cya, cyb, u0, v0
+       b       L(sqr_diag_addlsh1)
+
+L(tix):        ldm     up!, {v0,v1,u0}
+       ldm     rp, {r4,r5}
+       mov     cya, #0
+       mov     cyb, #0
+       umaal   r4, cya, v1, v0
+       umaal   r5, cya, u0, v0
+       stm     rp, {r4,r5}
+       umaal   cya, cyb, u0, v1
+       add     rp, rp, #20
+C      b       L(sqr_diag_addlsh1)
+
+
+define(`w0',  r6)
+define(`w1',  r7)
+define(`w2',  r8)
+define(`rbx', r9)
+
+L(sqr_diag_addlsh1):
+       str     cya, [rp, #-12]
+       str     cyb, [rp, #-8]
+       sub     n, n_saved, #1
+       sub     up, up, n_saved, lsl #2
+       sub     rp, rp, n_saved, lsl #3
+       ldr     r3, [up], #4
+       umull   w1, r5, r3, r3
+       mov     w2, #0
+C      cmn     r0, #0                  C clear cy (already clear by luck)
+       b       L(lm)
+
+L(tsd):        adds    w0, w0, rbx
+       adcs    w1, w1, r4
+       str     w0, [rp, #0]
+L(lm): ldr     w0, [rp, #4]
+       str     w1, [rp, #4]
+       ldr     w1, [rp, #8]!
+       add     rbx, r5, w2
+       adcs    w0, w0, w0
+       ldr     r3, [up], #4
+       adcs    w1, w1, w1
+       mov     w2, #0
+       adc     w2, w2, w2
+       umull   r4, r5, r3, r3
+       subs    n, n, #1
+       bne     L(tsd)
+
+       adds    w0, w0, rbx
+       adcs    w1, w1, r4
+       adc     w2, r5, w2
+       stm     rp, {w0,w1,w2}
+
+       pop     {r4-r10,r11,pc}
+
+
+C Straight line code for n <= 4
+
+L(1):  ldr     r3, [up, #0]
+       umull   r1, r2, r3, r3
+       stm     rp, {r1,r2}
+       bx      r14
+
+L(2):  push    {r4-r5}
+       ldm     up, {r5,r12}
+       umull   r1, r2, r5, r5
+       umull   r3, r4, r12, r12
+       umull   r5, r12, r5, r12
+       adds    r5, r5, r5
+       adcs    r12, r12, r12
+       adc     r4, r4, #0
+       adds    r2, r2, r5
+       adcs    r3, r3, r12
+       adc     r4, r4, #0
+       stm     rp, {r1,r2,r3,r4}
+       pop     {r4-r5}
+       bx      r14
+
+L(3):  push    {r4-r11}
+       ldm     up, {r7,r8,r9}
+       umull   r1, r2, r7, r7
+       umull   r3, r4, r8, r8
+       umull   r5, r6, r9, r9
+       umull   r10, r11, r7, r8
+       mov     r12, #0
+       umlal   r11, r12, r7, r9
+       mov     r7, #0
+       umlal   r12, r7, r8, r9
+       adds    r10, r10, r10
+       adcs    r11, r11, r11
+       adcs    r12, r12, r12
+       adcs    r7, r7, r7
+       adc     r6, r6, #0
+       adds    r2, r2, r10
+       adcs    r3, r3, r11
+       adcs    r4, r4, r12
+       adcs    r5, r5, r7
+       adc     r6, r6, #0
+       stm     rp, {r1,r2,r3,r4,r5,r6}
+       pop     {r4-r11}
+       bx      r14
+
+L(4):  push    {r4-r11, r14}
+       ldm     up, {r9,r10,r11,r12}
+       umull   r1, r2, r9, r9
+       umull   r3, r4, r10, r10
+       umull   r5, r6, r11, r11
+       umull   r7, r8, r12, r12
+       stm     rp, {r1,r2,r3,r4,r5,r6,r7}
+       umull   r1, r2, r9, r10
+       mov     r3, #0
+       umlal   r2, r3, r9, r11
+       mov     r4, #0
+       umlal   r3, r4, r9, r12
+       mov     r5, #0
+       umlal   r3, r5, r10, r11
+       umaal   r4, r5, r10, r12
+       mov     r6, #0
+       umlal   r5, r6, r11, r12
+       adds    r1, r1, r1
+       adcs    r2, r2, r2
+       adcs    r3, r3, r3
+       adcs    r4, r4, r4
+       adcs    r5, r5, r5
+       adcs    r6, r6, r6
+       adc     r7, r8, #0
+       add     rp, rp, #4
+       ldm     rp, {r8,r9,r10,r11,r12,r14}
+       adds    r1, r1, r8
+       adcs    r2, r2, r9
+       adcs    r3, r3, r10
+       adcs    r4, r4, r11
+       adcs    r5, r5, r12
+       adcs    r6, r6, r14
+       adc     r7, r7, #0
+       stm     rp, {r1,r2,r3,r4,r5,r6,r7}
+       pop     {r4-r11, pc}
+EPILOGUE()
diff --git a/mpn/arm/v6t2/divrem_1.asm b/mpn/arm/v6t2/divrem_1.asm

new file mode 100644 (file)

index 0000000..5dca904
--- /dev/null
+++ b/mpn/arm/v6t2/divrem_1.asm
@@ -0,0 +1,200 @@
+dnl  ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C              norm    unorm   frac
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     13      14      13
+C Cortex-A15    ?
+
+C TODO
+C  * Optimise inner-loops better, they could likely run a cycle or two faster.
+C  * Decrease register usage, streamline non-loop code.
+
+define(`qp_arg',  `r0')
+define(`fn',      `r1')
+define(`up_arg',  `r2')
+define(`n_arg',   `r3')
+define(`d_arg',   `0')
+define(`dinv_arg',`4')
+define(`cnt_arg', `8')
+
+define(`n',       `r9')
+define(`qp',      `r5')
+define(`up',      `r6')
+define(`cnt',     `r7')
+define(`tnc',     `r10')
+define(`dinv',    `r0')
+define(`d',       `r4')
+
+ASM_START()
+PROLOGUE(mpn_preinv_divrem_1)
+       stmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+       ldr     d,    [sp, #9*4+d_arg]
+       ldr     cnt,  [sp, #9*4+cnt_arg]
+       str     r1, [sp, #9*4+d_arg]    C reuse d stack slot for fn
+       sub     n, r3, #1
+       add     r3, r1, n
+       cmp     d, #0
+       add     qp, qp_arg, r3, lsl #2  C put qp at Q[] end
+       add     up, up_arg, n, lsl #2   C put up at U[] end
+       ldr     dinv, [sp, #9*4+dinv_arg]
+       blt     L(nent)
+       b       L(uent)
+EPILOGUE()
+
+PROLOGUE(mpn_divrem_1)
+       stmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+       sub     n, r3, #1
+       ldr     d, [sp, #9*4+d_arg]     C d
+       str     r1, [sp, #9*4+d_arg]    C reuse d stack slot for fn
+       add     r3, r1, n
+       cmp     d, #0
+       add     qp, qp_arg, r3, lsl #2  C put qp at Q[] end
+       add     up, up_arg, n, lsl #2   C put up at U[] end
+       blt     L(normalised)
+
+L(unnorm):
+       clz     cnt, d
+       mov     r0, d, lsl cnt          C pass d << cnt
+       bl      mpn_invert_limb
+L(uent):
+       mov     d, d, lsl cnt           C d <<= cnt
+       cmp     n, #0
+       mov     r1, #0                  C r
+       blt     L(frac)
+
+       ldr     r11, [up, #0]
+
+       rsb     tnc, cnt, #32
+       mov     r1, r11, lsr tnc
+       mov     r11, r11, lsl cnt
+       beq     L(uend)
+
+       ldr     r3, [up, #-4]!
+       orr     r2, r11, r3, lsr tnc
+       b       L(mid)
+
+L(utop):
+       mls     r1, d, r8, r11
+       mov     r11, r3, lsl cnt
+       ldr     r3, [up, #-4]!
+       cmp     r1, r2
+       addhi   r1, r1, d
+       subhi   r8, r8, #1
+       orr     r2, r11, r3, lsr tnc
+       cmp     r1, d
+       bcs     L(ufx)
+L(uok):        str     r8, [qp], #-4
+L(mid):        add     r8, r1, #1
+       mov     r11, r2
+       umlal   r2, r8, r1, dinv
+       subs    n, n, #1
+       bne     L(utop)
+
+       mls     r1, d, r8, r11
+       mov     r11, r3, lsl cnt
+       cmp     r1, r2
+       addhi   r1, r1, d
+       subhi   r8, r8, #1
+       cmp     r1, d
+       rsbcs   r1, d, r1
+       addcs   r8, r8, #1
+       str     r8, [qp], #-4
+
+L(uend):add    r8, r1, #1
+       mov     r2, r11
+       umlal   r2, r8, r1, dinv
+       mls     r1, d, r8, r11
+       cmp     r1, r2
+       addhi   r1, r1, d
+       subhi   r8, r8, #1
+       cmp     r1, d
+       rsbcs   r1, d, r1
+       addcs   r8, r8, #1
+       str     r8, [qp], #-4
+L(frac):
+       ldr     r2, [sp, #9*4+d_arg]    C fn
+       cmp     r2, #0
+       beq     L(fend)
+
+L(ftop):mov    r6, #0
+       add     r3, r1, #1
+       umlal   r6, r3, r1, dinv
+       mov     r8, #0
+       mls     r1, d, r3, r8
+       cmp     r1, r6
+       addhi   r1, r1, d
+       subhi   r3, r3, #1
+       subs    r2, r2, #1
+       str     r3, [qp], #-4
+       bne     L(ftop)
+
+L(fend):mov    r11, r1, lsr cnt
+L(rtn):        mov     r0, r11
+       ldmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+
+L(normalised):
+       mov     r0, d
+       bl      mpn_invert_limb
+L(nent):
+       cmp     n, #0
+       mov     r11, #0                 C r
+       blt     L(nend)
+
+       ldr     r11, [up, #0]
+       cmp     r11, d
+       movlo   r2, #0                  C hi q limb
+       movhs   r2, #1                  C hi q limb
+       subhs   r11, r11, d
+
+       str     r2, [qp], #-4
+       cmp     n, #0
+       beq     L(nend)
+
+L(ntop):ldr    r1, [up, #-4]!
+       add     r12, r11, #1
+       umlal   r1, r12, r11, dinv
+       ldr     r3, [up, #0]
+       mls     r11, d, r12, r3
+       cmp     r11, r1
+       addhi   r11, r11, d
+       subhi   r12, r12, #1
+       cmp     d, r11
+       bls     L(nfx)
+L(nok):        str     r12, [qp], #-4
+       subs    n, n, #1
+       bne     L(ntop)
+
+L(nend):mov    r1, r11                 C r
+       mov     cnt, #0                 C shift cnt
+       b       L(frac)
+
+L(nfx):        add     r12, r12, #1
+       rsb     r11, d, r11
+       b       L(nok)
+L(ufx):        rsb     r1, d, r1
+       add     r8, r8, #1
+       b       L(uok)
+EPILOGUE()
diff --git a/mpn/arm/v6t2/gcd_1.asm b/mpn/arm/v6t2/gcd_1.asm

new file mode 100644 (file)

index 0000000..0c1e385
--- /dev/null
+++ b/mpn/arm/v6t2/gcd_1.asm
@@ -0,0 +1,103 @@
+dnl  ARM v6t2 mpn_gcd_1.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/bit (approx)
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     5.30
+C Cortex-A15    ?
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+C TODO
+C  * Optimise inner-loop better.
+
+C Threshold of when to call bmod when U is one limb.  Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 7)
+
+C INPUT PARAMETERS
+define(`up',    `r0')
+define(`n',     `r1')
+define(`v0',    `r2')
+
+ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',,
+  `define(`BMOD_1_TO_MOD_1_THRESHOLD',0xffffffff)')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+       push    {r4, r7, lr}
+       ldr     r3, [up]        C U low limb
+
+       orr     r3, r3, v0
+       rbit    r4, r3
+       clz     r4, r4          C min(ctz(u0),ctz(v0))
+
+       rbit    r12, v0
+       clz     r12, r12
+       lsr     v0, v0, r12
+
+       mov     r7, v0
+
+       cmp     n, #1
+       bne     L(nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+       ldr     r3, [up]
+       cmp     v0, r3, lsr #BMOD_THRES_LOG2
+       bhi     L(red1)
+
+L(bmod):mov    r3, #0          C carry argument
+       bl      mpn_modexact_1c_odd
+       b       L(red0)
+
+L(nby1):cmp    n, #BMOD_1_TO_MOD_1_THRESHOLD
+       blo     L(bmod)
+
+       bl      mpn_mod_1
+
+L(red0):mov    r3, r0
+L(red1):cmp    r3, #0
+       rbit    r12, r3
+       clz     r12, r12
+       bne     L(mid)
+       b       L(end)
+
+       ALIGN(8)
+L(top):        movcs   r3, r1          C if x-y < 0
+       movcs   r7, r0          C use x,y-x
+L(mid):        lsr     r3, r3, r12     C
+       mov     r0, r3          C
+       subs    r1, r7, r3      C
+       rsb     r3, r7, r3      C
+       rbit    r12, r1
+       clz     r12, r12        C
+       bne     L(top)          C
+
+L(end):        lsl     r0, r7, r4
+       pop     {r4, r7, pc}
+EPILOGUE()
diff --git a/mpn/arm/v6t2/mode1o.asm b/mpn/arm/v6t2/mode1o.asm

new file mode 100644 (file)

index 0000000..81b9520
--- /dev/null
+++ b/mpn/arm/v6t2/mode1o.asm
@@ -0,0 +1,75 @@
+dnl  ARM mpn_modexact_1c_odd
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C StrongARM     ?
+C XScale        ?
+C Cortex-A8     ?
+C Cortex-A9     9
+C Cortex-A15    ?
+
+define(`up', `r0')
+define(`n',  `r1')
+define(`d',  `r2')
+define(`cy', `r3')
+
+       .protected      binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+       stmfd   sp!, {r4, r5, r6, r7}
+
+       LEA(    r4, binvert_limb_table)
+
+       ldr     r6, [up], #4            C up[0]
+
+       ubfx    r12, d, #1, #7
+       ldrb    r4, [r4, r12]
+       smulbb  r12, r4, r4
+       mul     r12, d, r12
+       rsb     r12, r12, r4, asl #1
+       mul     r4, r12, r12
+       mul     r4, d, r4
+       rsb     r4, r4, r12, asl #1     C r4 = inverse
+
+       subs    n, n, #1
+       sub     r6, r6, cy
+       mul     r6, r6, r4
+       beq     L(end)
+
+       rsb     r5, r4, #0              C r5 = -inverse
+
+L(top):        ldr     r7, [up], #4
+       mov     r12, #0
+       umaal   r12, cy, r6, d
+       mul     r6, r7, r4
+       mla     r6, cy, r5, r6
+       subs    n, n, #1
+       bne     L(top)
+
+L(end):        mov     r12, #0
+       umaal   r12, cy, r6, d
+       mov     r0, cy
+
+       ldmfd   sp!, {r4, r5, r6, r7}
+       bx      r14
+EPILOGUE()
diff --git a/mpn/arm/v7a/cora15/gmp-mparam.h b/mpn/arm/v7a/cora15/gmp-mparam.h

new file mode 100644 (file)

index 0000000..bfb5910
--- /dev/null
+++ b/mpn/arm/v7a/cora15/gmp-mparam.h
@@ -0,0 +1,167 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 1700MHz Cortex-A15 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           15
+
+#define MUL_TOOM22_THRESHOLD                31
+#define MUL_TOOM33_THRESHOLD               109
+#define MUL_TOOM44_THRESHOLD               288
+#define MUL_TOOM6H_THRESHOLD               632
+#define MUL_TOOM8H_THRESHOLD                 0  /* always */
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     113
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     199
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     189
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     211
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     287
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 51
+#define SQR_TOOM3_THRESHOLD                169
+#define SQR_TOOM4_THRESHOLD                662
+#define SQR_TOOM6_THRESHOLD                951
+#define SQR_TOOM8_THRESHOLD               1005
+
+#define MULMID_TOOM42_THRESHOLD             44
+
+#define MULMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               30
+
+#define MUL_FFT_MODF_THRESHOLD             525  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    505, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
+    {     33, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     41, 7}, {     21, 8}, {     11, 7}, {     23, 6}, \
+    {     47, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     51, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
+    {     71, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     99, 9}, {     55,10}, {     31, 9}, {     79,10}, \
+    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    143, 9}, {    287,10}, {    159,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,10}, {    287,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    351,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    415,12}, {    127,11}, {    255,10}, \
+    {    543,11}, {    287,10}, {    607,11}, {    319,10}, \
+    {    671,11}, {    351,12}, {    191,11}, {    383,10}, \
+    {    799,11}, {    415,13}, {    127,12}, {    255,11}, \
+    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
+    {    319,11}, {    735,12}, {    383,11}, {    799,10}, \
+    {   1599,11}, {    831,12}, {    447,11}, {    959,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,12}, {    703,13}, {    383,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1215,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 110
+#define MUL_FFT_THRESHOLD                 5760
+
+#define SQR_FFT_MODF_THRESHOLD             535  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    535, 5}, {     27, 6}, {     16, 5}, {     33, 6}, \
+    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     41, 7}, {     21, 6}, \
+    {     43, 8}, {     11, 6}, {     45, 7}, {     23, 6}, \
+    {     47, 7}, {     25, 6}, {     51, 7}, {     27, 6}, \
+    {     55, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {     35, 7}, {     71, 8}, {     43, 9}, {     23, 8}, \
+    {     55, 9}, {     31, 8}, {     71, 9}, {     39, 8}, \
+    {     83, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95, 9}, {    191,10}, {    111,11}, \
+    {     63,10}, {    159,11}, {     95,10}, {    191,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271, 9}, {    543,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    335,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    415,12}, {    127,11}, {    255,10}, \
+    {    511,11}, {    287,10}, {    607,11}, {    319,10}, \
+    {    639,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,13}, {    127,12}, {    255,11}, {    543,10}, \
+    {   1087,11}, {    607,10}, {   1215,12}, {    319,11}, \
+    {    735,12}, {    383,11}, {    831,12}, {    447,11}, \
+    {    959,13}, {    255,12}, {    511,11}, {   1087,12}, \
+    {    575,11}, {   1215,12}, {    703,13}, {    383,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 111
+#define SQR_FFT_THRESHOLD                 4928
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  27
+#define MULLO_MUL_N_THRESHOLD             8907
+
+#define DC_DIV_QR_THRESHOLD                 31
+#define DC_DIVAPPR_Q_THRESHOLD              45
+#define DC_BDIV_QR_THRESHOLD                29
+#define DC_BDIV_Q_THRESHOLD                 50
+
+#define INV_MULMOD_BNM1_THRESHOLD           66
+#define INV_NEWTON_THRESHOLD               171
+#define INV_APPR_THRESHOLD                  65
+
+#define BINV_NEWTON_THRESHOLD              300
+#define REDC_1_TO_REDC_2_THRESHOLD          12
+#define REDC_2_TO_REDC_N_THRESHOLD          99
+
+#define MU_DIV_QR_THRESHOLD               1895
+#define MU_DIVAPPR_Q_THRESHOLD            1895
+#define MUPI_DIV_QR_THRESHOLD               54
+#define MU_BDIV_QR_THRESHOLD              1470
+#define MU_BDIV_Q_THRESHOLD               1895
+
+#define POWM_SEC_TABLE  6,44,548,1604
+
+#define MATRIX22_STRASSEN_THRESHOLD         22
+#define HGCD_THRESHOLD                      40
+#define HGCD_APPR_THRESHOLD                 50
+#define HGCD_REDUCE_THRESHOLD             3389
+#define GCD_DC_THRESHOLD                   278
+#define GCDEXT_DC_THRESHOLD                180
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                18
+#define GET_STR_PRECOMPUTE_THRESHOLD        34
+#define SET_STR_DC_THRESHOLD               198
+#define SET_STR_PRECOMPUTE_THRESHOLD       541
+
+#define FAC_DSC_THRESHOLD                  303
+#define FAC_ODD_THRESHOLD                   28
diff --git a/mpn/asm-defs.m4 b/mpn/asm-defs.m4

index a0382d00f1648abaef61bf308e3b67da7d01fd15..60f6b1a0a015fabb34d679f5c91f7130fc11e8a0 100644 (file)
--- a/mpn/asm-defs.m4
+++ b/mpn/asm-defs.m4
@@ -2,8 +2,8 @@ divert(-1)
  dnl
  dnl  m4 macros for gmp assembly code, shared by all CPUs.
  
-dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
-dnl  Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2011 Free
+dnl  Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -867,7 +867,7 @@ ifelse(eval($'`#>1 || m4_length('m4_doublequote($`'1)`)!=0),1,($'`@))')')
  dnl  Called: deflit_emptyargcheck(macroname,$#,`$1')
  define(deflit_emptyargcheck,
  `ifelse(eval($2==1 && !m4_dollarhash_1_if_noparen_p && m4_length(`$3')==0),1,
-`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-incl.m4 for more information)
+`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-defs.m4 for more information)
  ')')')
  
  
@@ -1054,6 +1054,18 @@ dnl  aors_n
  m4_not_for_expansion(`OPERATION_add_n')
  m4_not_for_expansion(`OPERATION_sub_n')
  
+dnl  aors_err1_n
+m4_not_for_expansion(`OPERATION_add_err1_n')
+m4_not_for_expansion(`OPERATION_sub_err1_n')
+
+dnl  aors_err2_n
+m4_not_for_expansion(`OPERATION_add_err2_n')
+m4_not_for_expansion(`OPERATION_sub_err2_n')
+
+dnl  aors_err3_n
+m4_not_for_expansion(`OPERATION_add_err3_n')
+m4_not_for_expansion(`OPERATION_sub_err3_n')
+
  dnl  aorsmul_1
  m4_not_for_expansion(`OPERATION_addmul_1')
  m4_not_for_expansion(`OPERATION_submul_1')
@@ -1302,20 +1314,44 @@ dnl  function that might be implemented in assembler is here.
  
  define(define_mpn,
  m4_assert_numargs(1)
-`define(`mpn_$1',`MPN(`$1')')')
+`deflit(`mpn_$1',`MPN(`$1')')')
  
  define_mpn(add)
  define_mpn(add_1)
+define_mpn(add_err1_n)
+define_mpn(add_err2_n)
+define_mpn(add_err3_n)
  define_mpn(add_n)
  define_mpn(add_nc)
+define_mpn(addcnd_n)
  define_mpn(addlsh1_n)
+define_mpn(addlsh1_nc)
  define_mpn(addlsh2_n)
+define_mpn(addlsh2_nc)
  define_mpn(addlsh_n)
+define_mpn(addlsh_nc)
+define_mpn(addlsh1_n_ip1)
+define_mpn(addlsh1_nc_ip1)
+define_mpn(addlsh2_n_ip1)
+define_mpn(addlsh2_nc_ip1)
+define_mpn(addlsh_n_ip1)
+define_mpn(addlsh_nc_ip1)
+define_mpn(addlsh1_n_ip2)
+define_mpn(addlsh1_nc_ip2)
+define_mpn(addlsh2_n_ip2)
+define_mpn(addlsh2_nc_ip2)
+define_mpn(addlsh_n_ip2)
+define_mpn(addlsh_nc_ip2)
  define_mpn(addmul_1)
  define_mpn(addmul_1c)
  define_mpn(addmul_2)
  define_mpn(addmul_3)
  define_mpn(addmul_4)
+define_mpn(addmul_5)
+define_mpn(addmul_6)
+define_mpn(addmul_7)
+define_mpn(addmul_8)
+define_mpn(addmul_2s)
  define_mpn(add_n_sub_n)
  define_mpn(add_n_sub_nc)
  define_mpn(addaddmul_1msb0)
@@ -1330,6 +1366,11 @@ define_mpn(copyd)
  define_mpn(copyi)
  define_mpn(count_leading_zeros)
  define_mpn(count_trailing_zeros)
+define_mpn(div_qr_2)
+define_mpn(div_qr_2n_pi1)
+define_mpn(div_qr_2u_pi1)
+define_mpn(div_qr_2n_pi2)
+define_mpn(div_qr_2u_pi2)
  define_mpn(divexact_1)
  define_mpn(divexact_by3c)
  define_mpn(divrem)
@@ -1345,6 +1386,7 @@ define_mpn(gcdext)
  define_mpn(get_str)
  define_mpn(hamdist)
  define_mpn(invert_limb)
+define_mpn(invert_limb_table)
  define_mpn(ior_n)
  define_mpn(iorn_n)
  define_mpn(lshift)
@@ -1368,9 +1410,12 @@ define_mpn(mul_1c)
  define_mpn(mul_2)
  define_mpn(mul_3)
  define_mpn(mul_4)
+define_mpn(mul_5)
+define_mpn(mul_6)
  define_mpn(mul_basecase)
  define_mpn(mul_n)
  define_mpn(mullo_basecase)
+define_mpn(mulmid_basecase)
  define_mpn(perfect_square_p)
  define_mpn(popcount)
  define_mpn(preinv_divrem_1)
@@ -1385,8 +1430,11 @@ define_mpn(random2)
  define_mpn(redc_1)
  define_mpn(redc_2)
  define_mpn(rsblsh1_n)
+define_mpn(rsblsh1_nc)
  define_mpn(rsblsh2_n)
+define_mpn(rsblsh2_nc)
  define_mpn(rsblsh_n)
+define_mpn(rsblsh_nc)
  define_mpn(rsh1add_n)
  define_mpn(rsh1add_nc)
  define_mpn(rsh1sub_n)
@@ -1398,16 +1446,32 @@ define_mpn(scan1)
  define_mpn(set_str)
  define_mpn(sqr_basecase)
  define_mpn(sqr_diagonal)
+define_mpn(sqr_diag_addlsh1)
  define_mpn(sub_n)
+define_mpn(subcnd_n)
  define_mpn(sublsh1_n)
+define_mpn(sublsh1_nc)
+define_mpn(sublsh1_n_ip1)
+define_mpn(sublsh1_nc_ip1)
  define_mpn(sublsh2_n)
+define_mpn(sublsh2_nc)
+define_mpn(sublsh2_n_ip1)
+define_mpn(sublsh2_nc_ip1)
+define_mpn(sublsh_n)
+define_mpn(sublsh_nc)
+define_mpn(sublsh_n_ip1)
+define_mpn(sublsh_nc_ip1)
  define_mpn(sqrtrem)
  define_mpn(sub)
  define_mpn(sub_1)
+define_mpn(sub_err1_n)
+define_mpn(sub_err2_n)
+define_mpn(sub_err3_n)
  define_mpn(sub_n)
  define_mpn(sub_nc)
  define_mpn(submul_1)
  define_mpn(submul_1c)
+define_mpn(tabselect)
  define_mpn(umul_ppmm)
  define_mpn(umul_ppmm_r)
  define_mpn(udiv_qrnnd)
@@ -1649,6 +1713,22 @@ m4_assert_numargs(1)
  )
  
  
+dnl  Usage: ABI_SUPPORT(abi)
+dnl
+dnl  A dummy macro which is grepped for by ./configure to know what ABIs
+dnl  are supported in an asm file.
+dnl
+dnl  If multiple non-standard ABIs are supported, several ABI_SUPPORT
+dnl  declarations should be used:
+dnl
+dnl         ABI_SUPPORT(FOOABI)
+dnl         ABI_SUPPORT(BARABI)
+
+define(ABI_SUPPORT,
+m4_assert_numargs(1)
+)
+
+
  dnl  Usage: GMP_NUMB_MASK
  dnl
  dnl  A bit mask for the number part of a limb.  Eg. with 6 bit nails in a
@@ -1660,4 +1740,11 @@ m4_assert_defined(`GMP_NUMB_BITS')
  `m4_hex_lowmask(GMP_NUMB_BITS)')
  
  
+dnl  Usage: m4append(`variable',`value-to-append')
+
+define(`m4append',
+`define(`$1',  defn(`$1')`$2')
+'
+)
+
  divert`'dnl
diff --git a/mpn/generic/add_err1_n.c b/mpn/generic/add_err1_n.c

new file mode 100644 (file)

index 0000000..8ccba70
--- /dev/null
+++ b/mpn/generic/add_err1_n.c
@@ -0,0 +1,90 @@
+/* mpn_add_err1_n -- add_n with one error term
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+  return value is carry out.
+
+  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+               mp_ptr ep, mp_srcptr yp,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+  yp += n - 1;
+  el = eh = 0;
+
+  do
+    {
+      yl = *yp--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary add_n */
+      ADDC_LIMB (cy1, sl, ul, vl);
+      ADDC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh:el) */
+      zl = (-cy) & yl;
+      el += zl;
+      eh += el < zl;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+  el &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el;
+  ep[1] = eh;
+
+  return cy;
+}
diff --git a/mpn/generic/add_err2_n.c b/mpn/generic/add_err2_n.c

new file mode 100644 (file)

index 0000000..b5dfba7
--- /dev/null
+++ b/mpn/generic/add_err2_n.c
@@ -0,0 +1,106 @@
+/* mpn_add_err2_n -- add_n with two error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+  return value is carry out.
+
+  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+  stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary add_n */
+      ADDC_LIMB (cy1, sl, ul, vl);
+      ADDC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+
+  return cy;
+}
diff --git a/mpn/generic/add_err3_n.c b/mpn/generic/add_err3_n.c

new file mode 100644 (file)

index 0000000..612b821
--- /dev/null
+++ b/mpn/generic/add_err3_n.c
@@ -0,0 +1,121 @@
+/* mpn_add_err3_n -- add_n with three error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+  return value is carry out.
+
+  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  yp3 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+  el3 = eh3 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      yl3 = *yp3--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary add_n */
+      ADDC_LIMB (cy1, sl, ul, vl);
+      ADDC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+
+      /* update (eh3:el3) */
+      zl3 = (-cy) & yl3;
+      el3 += zl3;
+      eh3 += el3 < zl3;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+  el3 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+  ep[4] = el3;
+  ep[5] = eh3;
+
+  return cy;
+}
diff --git a/mpn/generic/add_n_sub_n.c b/mpn/generic/add_n_sub_n.c

new file mode 100644 (file)

index 0000000..21437c6
--- /dev/null
+++ b/mpn/generic/add_n_sub_n.c
@@ -0,0 +1,162 @@
+/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef L1_CACHE_SIZE
+#define L1_CACHE_SIZE 8192     /* only 68040 has less than this */
+#endif
+
+#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
+
+
+/* mpn_add_n_sub_n.
+   r1[] = s1[] + s2[]
+   r2[] = s1[] - s2[]
+   All operands have n limbs.
+   In-place operations allowed.  */
+mp_limb_t
+mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+{
+  mp_limb_t acyn, acyo;                /* carry for add */
+  mp_limb_t scyn, scyo;                /* carry for subtract */
+  mp_size_t off;               /* offset in operands */
+  mp_size_t this_n;            /* size of current chunk */
+
+  /* We alternatingly add and subtract in chunks that fit into the (L1)
+     cache.  Since the chunks are several hundred limbs, the function call
+     overhead is insignificant, but we get much better locality.  */
+
+  /* We have three variant of the inner loop, the proper loop is chosen
+     depending on whether r1 or r2 are the same operand as s1 or s2.  */
+
+  if (r1p != s1p && r1p != s2p)
+    {
+      /* r1 is not identical to either input operand.  We can therefore write
+        to r1 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+       {
+         this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+         acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+         acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+         acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+         scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+         scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+         scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+       }
+    }
+  else if (r2p != s1p && r2p != s2p)
+    {
+      /* r2 is not identical to either input operand.  We can therefore write
+        to r2 directly, without using temporary storage.  */
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+       {
+         this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_sub_nc
+         scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+         scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+         scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+#if HAVE_NATIVE_mpn_add_nc
+         acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+         acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+         acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+       }
+    }
+  else
+    {
+      /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
+        Need temporary storage.  */
+      mp_limb_t tp[PART_SIZE];
+      acyo = 0;
+      scyo = 0;
+      for (off = 0; off < n; off += PART_SIZE)
+       {
+         this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+         acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
+#else
+         acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
+         acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+         scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+         scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+         scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+         MPN_COPY (r1p + off, tp, this_n);
+       }
+    }
+
+  return 2 * acyo + scyo;
+}
+
+#ifdef MAIN
+#include <stdlib.h>
+#include <stdio.h>
+#include "timing.h"
+
+long cputime ();
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr r1p, r2p, s1p, s2p;
+  double t;
+  mp_size_t n;
+
+  n = strtol (argv[1], 0, 0);
+
+  r1p = malloc (n * BYTES_PER_MP_LIMB);
+  r2p = malloc (n * BYTES_PER_MP_LIMB);
+  s1p = malloc (n * BYTES_PER_MP_LIMB);
+  s2p = malloc (n * BYTES_PER_MP_LIMB);
+  TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
+  printf ("              separate add and sub: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
+  printf ("combined addsub separate variables: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r1 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  printf ("        combined addsub r2 overlap: %.3f\n", t);
+  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
+  printf ("          combined addsub in-place: %.3f\n", t);
+
+  return 0;
+}
+#endif
diff --git a/mpn/generic/addcnd_n.c b/mpn/generic/addcnd_n.c

new file mode 100644 (file)

index 0000000..256cfbb
--- /dev/null
+++ b/mpn/generic/addcnd_n.c
@@ -0,0 +1,62 @@
+/* mpn_addcnd_n -- Compute R = U + V if CND != 0 or R = U if CND == 0.
+   Both cases should take the same time and perform the exact same memory
+   accesses, since this function is intended to be used where side-channel
+   attack resilience is relevant.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2008, 2009, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_addcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
+      sl = ul + vl;
+      cy1 = sl < ul;
+      rl = sl + cy;
+      cy2 = rl < sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+#else
+      rl = ul + vl;
+      rl += cy;
+      cy = rl >> GMP_NUMB_BITS;
+      *rp++ = rl & GMP_NUMB_MASK;
+#endif
+    }
+  while (--n != 0);
+
+  return cy;
+}
diff --git a/mpn/generic/addsub_n.c b/mpn/generic/addsub_n.c

deleted file mode 100644 (file)

index 21437c6..0000000
--- a/mpn/generic/addsub_n.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#ifndef L1_CACHE_SIZE
-#define L1_CACHE_SIZE 8192     /* only 68040 has less than this */
-#endif
-
-#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
-
-
-/* mpn_add_n_sub_n.
-   r1[] = s1[] + s2[]
-   r2[] = s1[] - s2[]
-   All operands have n limbs.
-   In-place operations allowed.  */
-mp_limb_t
-mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
-{
-  mp_limb_t acyn, acyo;                /* carry for add */
-  mp_limb_t scyn, scyo;                /* carry for subtract */
-  mp_size_t off;               /* offset in operands */
-  mp_size_t this_n;            /* size of current chunk */
-
-  /* We alternatingly add and subtract in chunks that fit into the (L1)
-     cache.  Since the chunks are several hundred limbs, the function call
-     overhead is insignificant, but we get much better locality.  */
-
-  /* We have three variant of the inner loop, the proper loop is chosen
-     depending on whether r1 or r2 are the same operand as s1 or s2.  */
-
-  if (r1p != s1p && r1p != s2p)
-    {
-      /* r1 is not identical to either input operand.  We can therefore write
-        to r1 directly, without using temporary storage.  */
-      acyo = 0;
-      scyo = 0;
-      for (off = 0; off < n; off += PART_SIZE)
-       {
-         this_n = MIN (n - off, PART_SIZE);
-#if HAVE_NATIVE_mpn_add_nc
-         acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
-#else
-         acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
-         acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
-#endif
-#if HAVE_NATIVE_mpn_sub_nc
-         scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
-#else
-         scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
-         scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
-#endif
-       }
-    }
-  else if (r2p != s1p && r2p != s2p)
-    {
-      /* r2 is not identical to either input operand.  We can therefore write
-        to r2 directly, without using temporary storage.  */
-      acyo = 0;
-      scyo = 0;
-      for (off = 0; off < n; off += PART_SIZE)
-       {
-         this_n = MIN (n - off, PART_SIZE);
-#if HAVE_NATIVE_mpn_sub_nc
-         scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
-#else
-         scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
-         scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
-#endif
-#if HAVE_NATIVE_mpn_add_nc
-         acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
-#else
-         acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
-         acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
-#endif
-       }
-    }
-  else
-    {
-      /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
-        Need temporary storage.  */
-      mp_limb_t tp[PART_SIZE];
-      acyo = 0;
-      scyo = 0;
-      for (off = 0; off < n; off += PART_SIZE)
-       {
-         this_n = MIN (n - off, PART_SIZE);
-#if HAVE_NATIVE_mpn_add_nc
-         acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
-#else
-         acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
-         acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
-#endif
-#if HAVE_NATIVE_mpn_sub_nc
-         scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
-#else
-         scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
-         scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
-#endif
-         MPN_COPY (r1p + off, tp, this_n);
-       }
-    }
-
-  return 2 * acyo + scyo;
-}
-
-#ifdef MAIN
-#include <stdlib.h>
-#include <stdio.h>
-#include "timing.h"
-
-long cputime ();
-
-int
-main (int argc, char **argv)
-{
-  mp_ptr r1p, r2p, s1p, s2p;
-  double t;
-  mp_size_t n;
-
-  n = strtol (argv[1], 0, 0);
-
-  r1p = malloc (n * BYTES_PER_MP_LIMB);
-  r2p = malloc (n * BYTES_PER_MP_LIMB);
-  s1p = malloc (n * BYTES_PER_MP_LIMB);
-  s2p = malloc (n * BYTES_PER_MP_LIMB);
-  TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
-  printf ("              separate add and sub: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
-  printf ("combined addsub separate variables: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
-  printf ("        combined addsub r1 overlap: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
-  printf ("        combined addsub r2 overlap: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
-  printf ("          combined addsub in-place: %.3f\n", t);
-
-  return 0;
-}
-#endif
diff --git a/mpn/generic/bdiv_qr.c b/mpn/generic/bdiv_qr.c

index 6fc61b79347a58d325ff3d0a10ac452d28af9e8d..8bac03e04169353ea9334c80449fa8e898107eda 100644 (file)
--- a/mpn/generic/bdiv_qr.c
+++ b/mpn/generic/bdiv_qr.c
@@ -7,7 +7,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
  
-Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2009, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,7 +29,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* Computes Q = N / D mod B^n,
-           R = N - QD.  */
+           R = N - QD.  */
  
  mp_limb_t
  mpn_bdiv_qr (mp_ptr qp, mp_ptr rp,
@@ -40,6 +40,7 @@ mpn_bdiv_qr (mp_ptr qp, mp_ptr rp,
    mp_limb_t di;
    mp_limb_t rh;
  
+  ASSERT (nn > dn);
    if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
        BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
      {
diff --git a/mpn/generic/binvert.c b/mpn/generic/binvert.c

index f06030cfe71cdda2c248aca780d42e0b365f6078..2ed91f31a50b89bc178524ed74a0e351408a137a 100644 (file)
--- a/mpn/generic/binvert.c
+++ b/mpn/generic/binvert.c
@@ -6,7 +6,8 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
  
-Copyright (C) 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc.
+Copyright (C) 2004, 2005, 2006, 2007, 2009, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -32,14 +33,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
  */
  
-/* This is intended for constant THRESHOLDs only, where the compiler can
-   completely fold the result.  */
-#define LOG2C(n) \
- (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
-  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
-  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
-  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
-
  #if TUNE_PROGRAM_BUILD
  #define NPOWS \
   ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
diff --git a/mpn/generic/broot.c b/mpn/generic/broot.c

new file mode 100644 (file)

index 0000000..93d3292
--- /dev/null
+++ b/mpn/generic/broot.c
@@ -0,0 +1,186 @@
+/* mpn_broot -- Compute hensel sqrt
+
+   Contributed to the GNU project by Niels Möller
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+   typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+  mp_limb_t r = 1;
+  mp_limb_t s = a;
+
+  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+    if (e & 1)
+      r *= s;
+
+  return r;
+}
+
+/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
+
+   Iterates
+
+     r' <-- r - r * (a^{k-1} r^k - 1) / n
+
+   If
+
+     a^{k-1} r^k = 1 (mod 2^m),
+
+   then
+
+     a^{k-1} r'^k = 1 (mod 2^{2m}),
+
+   Compute the update term as
+
+     r' = r - (a^{k-1} r^{k+1} - r) / k
+
+   where we still have cancelation of low limbs.
+
+ */
+void
+mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+  mp_size_t sizes[GMP_LIMB_BITS * 2];
+  mp_ptr akm1, tp, rnp, ep, scratch;
+  mp_limb_t a0, r0, km1, kp1h, kinv;
+  mp_size_t rn;
+  unsigned i;
+
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT (ap[0] & 1);
+  ASSERT (k & 1);
+  ASSERT (k >= 3);
+
+  TMP_MARK;
+
+  akm1 = TMP_ALLOC_LIMBS (4*n);
+  tp = akm1 + n;
+
+  km1 = k-1;
+  /* FIXME: Could arrange the iteration so we don't need to compute
+     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
+     that we can use wraparound also for a*r, since the low half is
+     unchanged from the previous iteration. Or possibly mulmid. Also,
+     a r = a^{1/k}, so we get that value too, for free? */
+  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
+
+  a0 = ap[0];
+  binvert_limb (kinv, k);
+
+  /* 4 bits: a^{1/k - 1} (mod 16):
+
+       a % 8
+       1 3 5 7
+   k%4 +-------
+     1 |1 1 1 1
+     3 |1 9 9 1
+  */
+  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
+  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
+#if GMP_NUMB_BITS > 32
+  {
+    unsigned prec = 32;
+    do
+      {
+       r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
+       prec *= 2;
+      }
+    while (prec < GMP_NUMB_BITS);
+  }
+#endif
+
+  rp[0] = r0;
+  if (n == 1)
+    {
+      TMP_FREE;
+      return;
+    }
+
+  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
+  kp1h = k/2 + 1;
+
+  /* FIXME: Special case for two limb iteration. */
+  rnp = TMP_ALLOC_LIMBS (2*n + 1);
+  ep = rnp + n;
+
+  /* FIXME: Possible to this on the fly with some bit fiddling. */
+  for (i = 0; n > 1; n = (n + 1)/2)
+    sizes[i++] = n;
+
+  rn = 1;
+
+  while (i-- > 0)
+    {
+      /* Compute x^{k+1}. */
+      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
+                              final iteration.*/
+      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
+
+      /* Multiply by a^{k-1}. Can use wraparound; low part equals
+        r. */
+
+      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
+      ASSERT (mpn_cmp (ep, rp, rn) == 0);
+
+      ASSERT (sizes[i] <= 2*rn);
+      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
+      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
+      rn = sizes[i];
+    }
+  TMP_FREE;
+}
+
+/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
+void
+mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT (ap[0] & 1);
+  ASSERT (k & 1);
+
+  if (k == 1)
+    {
+      MPN_COPY (rp, ap, n);
+      return;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (n);
+
+  mpn_broot_invm1 (tp, ap, n, k);
+  mpn_mullo_n (rp, tp, ap, n);
+
+  TMP_FREE;
+}
diff --git a/mpn/generic/brootinv.c b/mpn/generic/brootinv.c

new file mode 100644 (file)

index 0000000..3ab547d
--- /dev/null
+++ b/mpn/generic/brootinv.c
@@ -0,0 +1,131 @@
+/* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
+
+   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+   typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+  mp_limb_t r = 1;
+  mp_limb_t s = a;
+
+  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+    if (e & 1)
+      r *= s;
+
+  return r;
+}
+
+/* Compute r such that r^k * y = 1 (mod B^n).
+
+   Iterates
+     r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
+   using Hensel lifting, each time doubling the number of known bits in r.
+
+   Works just for odd k.  Else the Hensel lifting degenerates.
+
+   FIXME:
+
+     (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
+
+     (2) Rewrite iteration as
+          r' <-- r - k^{-1} r (r^k y - 1)
+        and take advantage of the zero low part of r^k y - 1.
+
+     (3) Use wrap-around trick.
+
+     (4) Use a small table to get starting value.
+
+   Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS).
+*/
+
+void
+mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
+{
+  mp_ptr tp2, tp3;
+  mp_limb_t kinv, k2, r0, y0;
+  mp_size_t order[GMP_LIMB_BITS + 1];
+  int i, d;
+
+  ASSERT (bn > 0);
+  ASSERT ((k & 1) != 0);
+
+  tp2 = tp + bn;
+  tp3 = tp + 2 * bn;
+  k2 = k + 1;
+
+  binvert_limb (kinv, k);
+
+  /* 4-bit initial approximation:
+
+   y%16 | 1  3  5  7  9 11 13 15,
+    k%4 +-----------------------------
+     1  | 1 11 13  7  9  3  5 15
+     3  | 1  3  5  7  9 11 13 15
+
+  */
+  y0 = yp[0];
+
+  r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & ~(k << 2) & 8);         /* 4 bits */
+  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f));         /* 8 bits */
+  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0xffff));       /* 16 bits */
+  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));                        /* 32 bits */
+#if GMP_NUMB_BITS > 32
+  {
+    unsigned prec = 32;
+    do
+      {
+       r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
+       prec *= 2;
+      }
+    while (prec < GMP_NUMB_BITS);
+  }
+#endif
+
+  rp[0] = r0;
+  if (bn == 1)
+    return;
+
+  /* This initialization doesn't matter for the result (any garbage is
+     cancelled in the iteration), but proper initialization makes
+     valgrind happier. */
+  MPN_ZERO (rp+1, bn-1);
+
+  d = 0;
+  for (; bn > 1; bn = (bn + 1) >> 1)
+    order[d++] = bn;
+
+  for (i = d - 1; i >= 0; i--)
+    {
+      bn = order[i];
+
+      mpn_mul_1 (tp, rp, bn, k2);
+
+      mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
+      mpn_mullo_n (rp, yp, tp2, bn);
+
+      mpn_sub_n (tp2, tp, rp, bn);
+      mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0);
+    }
+}
diff --git a/mpn/generic/bsqrt.c b/mpn/generic/bsqrt.c

new file mode 100644 (file)

index 0000000..a9f49e6
--- /dev/null
+++ b/mpn/generic/bsqrt.c
@@ -0,0 +1,37 @@
+/* mpn_bsqrt, a^{1/2} (mod 2^n).
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpn_bsqrt (mp_ptr rp, mp_srcptr ap, mp_bitcnt_t nb, mp_ptr tp)
+{
+  mp_ptr sp;
+  mp_size_t n;
+
+  ASSERT (nb > 0);
+
+  n = nb / GMP_NUMB_BITS;
+  sp = tp + n;
+
+  mpn_bsqrtinv (sp, ap, nb, tp);
+  mpn_mullo_n (rp, sp, ap, n);
+}
diff --git a/mpn/generic/bsqrtinv.c b/mpn/generic/bsqrtinv.c

new file mode 100644 (file)

index 0000000..2dfd02b
--- /dev/null
+++ b/mpn/generic/bsqrtinv.c
@@ -0,0 +1,94 @@
+/* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
+
+   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Compute r such that r^2 * y = 1 (mod 2^{b+1}).
+   Return non-zero if such an integer r exists.
+
+   Iterates
+     r' <-- (3r - r^3 y) / 2
+   using Hensel lifting.  Since we divide by two, the Hensel lifting is
+   somewhat degenerates.  Therefore, we lift from 2^b to 2^{b+1}-1.
+
+   FIXME:
+     (1) Simplify to do precision book-keeping in limbs rather than bits.
+
+     (2) Rewrite iteration as
+          r' <-- r - r (r^2 y - 1) / 2
+        and take advantage of zero low part of r^2 y - 1.
+
+     (3) Use wrap-around trick.
+
+     (4) Use a small table to get starting value.
+*/
+int
+mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp)
+{
+  mp_ptr tp2, tp3;
+  mp_limb_t k;
+  mp_size_t bn, order[GMP_LIMB_BITS + 1];
+  int i, d;
+
+  ASSERT (bnb > 0);
+
+  bn = 1 + bnb / GMP_LIMB_BITS;
+
+  tp2 = tp + bn;
+  tp3 = tp + 2 * bn;
+  k = 3;
+
+  rp[0] = 1;
+  if (bnb == 1)
+    {
+      if ((yp[0] & 3) != 1)
+       return 0;
+    }
+  else
+    {
+      if ((yp[0] & 7) != 1)
+       return 0;
+
+      d = 0;
+      for (; bnb != 2; bnb = (bnb + 2) >> 1)
+       order[d++] = bnb;
+
+      for (i = d - 1; i >= 0; i--)
+       {
+         bnb = order[i];
+         bn = 1 + bnb / GMP_LIMB_BITS;
+
+         mpn_mul_1 (tp, rp, bn, k);
+
+         mpn_powlo (tp2, rp, &k, 1, bn, tp3);
+         mpn_mullo_n (rp, yp, tp2, bn);
+
+#if HAVE_NATIVE_mpn_rsh1sub_n
+         mpn_rsh1sub_n (rp, tp, rp, bn);
+#else
+         mpn_sub_n (tp2, tp, rp, bn);
+         mpn_rshift (rp, tp2, bn, 1);
+#endif
+       }
+    }
+  return 1;
+}
diff --git a/mpn/generic/comb_tables.c b/mpn/generic/comb_tables.c

new file mode 100644 (file)

index 0000000..de725d0
--- /dev/null
+++ b/mpn/generic/comb_tables.c
@@ -0,0 +1,37 @@
+/* Const tables shared among combinatoric functions.
+
+   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Entry i contains (i!/2^t) where t is chosen such that the parenthesis
+   is an odd integer. */
+const mp_limb_t __gmp_oddfac_table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE, ONE_LIMB_ODD_FACTORIAL_EXTTABLE };
+
+/* Entry i contains ((2i+1)!!/2^t) where t is chosen such that the parenthesis
+   is an odd integer. */
+const mp_limb_t __gmp_odd2fac_table[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
+
+/* Entry i contains 2i-popc(2i). */
+const unsigned char __gmp_fac2cnt_table[] = { TABLE_2N_MINUS_POPC_2N };
+
+const mp_limb_t __gmp_limbroots_table[] = { NTH_ROOT_NUMB_MASK_TABLE };
diff --git a/mpn/generic/div_qr_2.c b/mpn/generic/div_qr_2.c

new file mode 100644 (file)

index 0000000..ca61d6b
--- /dev/null
+++ b/mpn/generic/div_qr_2.c
@@ -0,0 +1,323 @@
+/* mpn_div_qr_2 -- Divide natural numbers, producing both remainder and
+   quotient.  The divisor is two limbs.
+
+   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+   RELEASE.
+
+
+Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef DIV_QR_2_PI2_THRESHOLD
+/* Disabled unless explicitly tuned. */
+#define DIV_QR_2_PI2_THRESHOLD MP_LIMB_T_MAX
+#endif
+
+#ifndef SANITY_CHECK
+#define SANITY_CHECK 0
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+   * add_sssaaaa is like longlong.h's add_ssaaaa but the propagating
+     carry-out into an additional sum opeand.
+   * add_csaac accepts two addends and a carry in, and generates a sum
+     and a carry out.  A little like a "full adder".
+*/
+#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER)
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)                                \
+  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"              \
+          : "=r" (s2), "=&r" (s1), "=&r" (s0)                          \
+          : "0"  ((USItype)(s2)),                                      \
+            "1"  ((USItype)(a1)), "g" ((USItype)(b1)),                 \
+            "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#define add_csaac(co, s, a, b, ci)                                     \
+  __asm__ ("bt\t$0, %2\n\tadc\t%5, %k1\n\tadc\t%k0, %k0"               \
+          : "=r" (co), "=r" (s)                                        \
+          : "rm"  ((USItype)(ci)), "0" (CNST_LIMB(0)),                 \
+            "%1" ((USItype)(a)), "g" ((USItype)(b)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)                                \
+  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"              \
+          : "=r" (s2), "=&r" (s1), "=&r" (s0)                          \
+          : "0"  ((UDItype)(s2)),                                      \
+            "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),               \
+            "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#define add_csaac(co, s, a, b, ci)                                     \
+  __asm__ ("bt\t$0, %2\n\tadc\t%5, %q1\n\tadc\t%q0, %q0"               \
+          : "=r" (co), "=r" (s)                                        \
+          : "rm"  ((UDItype)(ci)), "0" (CNST_LIMB(0)),                 \
+            "%1" ((UDItype)(a)), "g" ((UDItype)(b)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)                                \
+  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0"       \
+          : "=r" (s2), "=&r" (s1), "=&r" (s0)                          \
+          : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)                                \
+  do {                                                                 \
+    UWtype __s0, __s1, __c0, __c1;                                     \
+    __s0 = (a0) + (b0);                                                        \
+    __s1 = (a1) + (b1);                                                        \
+    __c0 = __s0 < (a0);                                                        \
+    __c1 = __s1 < (a1);                                                        \
+    (s0) = __s0;                                                       \
+    __s1 = __s1 + __c0;                                                        \
+    (s1) = __s1;                                                       \
+    (s2) += __c1 + (__s1 < __c0);                                      \
+  } while (0)
+#endif
+
+#ifndef add_csaac
+#define add_csaac(co, s, a, b, ci)                                     \
+  do {                                                                 \
+    UWtype __s, __c;                                                   \
+    __s = (a) + (b);                                                   \
+    __c = __s < (a);                                                   \
+    __s = __s + (ci);                                                  \
+    (s) = __s;                                                         \
+    (co) = __c + (__s < (ci));                                         \
+  } while (0)
+#endif
+
+/* Typically used with r1, r0 same as n3, n2. Other types of overlap
+   between inputs and outputs not supported. */
+#define udiv_qr_4by2(q1,q0, r1,r0, n3,n2,n1,n0, d1,d0, di1,di0)                \
+  do {                                                                 \
+    mp_limb_t _q3, _q2a, _q2, _q1, _q2c, _q1c, _q1d, _q0;              \
+    mp_limb_t _t1, _t0;                                                        \
+    mp_limb_t _c, _mask;                                               \
+                                                                       \
+    umul_ppmm (_q3,_q2a, n3, di1);                                     \
+    umul_ppmm (_q2,_q1, n2, di1);                                      \
+    umul_ppmm (_q2c,_q1c, n3, di0);                                    \
+    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2c,_q1c);                     \
+    umul_ppmm (_q1d,_q0, n2, di0);                                     \
+    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2a,_q1d);                     \
+                                                                       \
+    add_ssaaaa (r1, r0, n3, n2, 0, 1); /* FIXME: combine as in x86_64 asm */ \
+                                                                       \
+    /* [q3,q2,q1,q0] += [n3,n3,n1,n0] */                               \
+    add_csaac (_c, _q0, _q0, n0, 0);                                   \
+    add_csaac (_c, _q1, _q1, n1, _c);                                  \
+    add_csaac (_c, _q2, _q2, r0, _c);                                  \
+    _q3 = _q3 + r1 + _c;                                               \
+                                                                       \
+    umul_ppmm (_t1,_t0, _q2, d0);                                      \
+    _t1 += _q2 * d1 + _q3 * d0;                                                \
+                                                                       \
+    sub_ddmmss (r1, r0, n1, n0, _t1, _t0);                             \
+                                                                       \
+    _mask = -(mp_limb_t) (r1 >= _q1 & (r1 > _q1 | r0 >= _q0));  /* (r1,r0) >= (q1,q0) */  \
+    add_ssaaaa (r1, r0, r1, r0, d1 & _mask, d0 & _mask);               \
+    sub_ddmmss (_q3, _q2, _q3, _q2, 0, -_mask);                                \
+                                                                       \
+    if (UNLIKELY (r1 >= d1))                                           \
+      {                                                                        \
+       if (r1 > d1 || r0 >= d0)                                        \
+         {                                                             \
+           sub_ddmmss (r1, r0, r1, r0, d1, d0);                        \
+           add_ssaaaa (_q3, _q2, _q3, _q2, 0, 1);                      \
+         }                                                             \
+      }                                                                        \
+    (q1) = _q3;                                                                \
+    (q0) = _q2;                                                                \
+  } while (0)
+
+static void
+invert_4by2 (mp_ptr di, mp_limb_t d1, mp_limb_t d0)
+{
+  mp_limb_t v1, v0, p1, t1, t0, p0, mask;
+  invert_limb (v1, d1);
+  p1 = d1 * v1;
+  /* <1, v1> * d1 = <B-1, p1> */
+  p1 += d0;
+  if (p1 < d0)
+    {
+      v1--;
+      mask = -(mp_limb_t) (p1 >= d1);
+      p1 -= d1;
+      v1 += mask;
+      p1 -= mask & d1;
+    }
+  /* <1, v1> * d1 + d0 = <B-1, p1> */
+  umul_ppmm (t1, p0, d0, v1);
+  p1 += t1;
+  if (p1 < t1)
+    {
+      if (UNLIKELY (p1 >= d1))
+       {
+         if (p1 > d1 || p0 >= d0)
+           {
+             sub_ddmmss (p1, p0, p1, p0, d1, d0);
+             v1--;
+           }
+       }
+      sub_ddmmss (p1, p0, p1, p0, d1, d0);
+      v1--;
+    }
+  /* Now v1 is the 3/2 inverse, <1, v1> * <d1, d0> = <B-1, p1, p0>,
+   * with <p1, p0> + <d1, d0> >= B^2.
+   *
+   * The 4/2 inverse is (B^4 - 1) / <d1, d0> = <1, v1, v0>. The
+   * partial remainder after <1, v1> is
+   *
+   * B^4 - 1 - B <1, v1> <d1, d0> = <B-1, B-1, B-1, B-1> - <B-1, p1, p0, 0>
+   *                              = <~p1, ~p0, B-1>
+   */
+  udiv_qr_3by2 (v0, t1, t0, ~p1, ~p0, MP_LIMB_T_MAX, d1, d0, v1);
+  di[0] = v0;
+  di[1] = v1;
+
+#if SANITY_CHECK
+  {
+    mp_limb_t tp[4];
+    mp_limb_t dp[2];
+    dp[0] = d0;
+    dp[1] = d1;
+    mpn_mul_n (tp, dp, di, 2);
+    ASSERT_ALWAYS (mpn_add_n (tp+2, tp+2, dp, 2) == 0);
+    ASSERT_ALWAYS (tp[2] == MP_LIMB_T_MAX);
+    ASSERT_ALWAYS (tp[3] == MP_LIMB_T_MAX);
+    ASSERT_ALWAYS (mpn_add_n (tp, tp, dp, 2) == 1);
+  }
+#endif
+}
+
+static mp_limb_t
+mpn_div_qr_2n_pi2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+                  mp_limb_t d1, mp_limb_t d0, mp_limb_t di1, mp_limb_t di0)
+{
+  mp_limb_t qh;
+  mp_size_t i;
+  mp_limb_t r1, r0;
+
+  ASSERT (nn >= 2);
+  ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+  r1 = np[nn-1];
+  r0 = np[nn-2];
+
+  qh = 0;
+  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+    {
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+      r0 = r0 - d0;
+      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+      r0 &= GMP_NUMB_MASK;
+#endif
+      qh = 1;
+    }
+
+  for (i = nn - 2; i >= 2; i -= 2)
+    {
+      mp_limb_t n1, n0, q1, q0;
+      n1 = np[i-1];
+      n0 = np[i-2];
+      udiv_qr_4by2 (q1, q0, r1, r0, r1, r0, n1, n0, d1, d0, di1, di0);
+      qp[i-1] = q1;
+      qp[i-2] = q0;
+    }
+
+  if (i > 0)
+    {
+      mp_limb_t q;
+      udiv_qr_3by2 (q, r1, r0, r1, r0, np[0], d1, d0, di1);
+      qp[0] = q;
+    }
+  rp[1] = r1;
+  rp[0] = r0;
+
+  return qh;
+}
+
+
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least
+   significant quotient limbs at qp and the 2 long remainder at np.
+   Return the most significant limb of the quotient.
+
+   Preconditions:
+   1. qp must either not overlap with the input operands at all, or
+      qp >= np + 2 must hold true.  (This means that it's possible to put
+      the quotient in the high part of {np,nn}, right above the remainder.
+   2. nn >= 2.  */
+
+mp_limb_t
+mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+             mp_srcptr dp)
+{
+  mp_limb_t d1;
+  mp_limb_t d0;
+  gmp_pi1_t dinv;
+
+  ASSERT (nn >= 2);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-2, np, nn) || qp >= np + 2);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, 2);
+
+  d1 = dp[1]; d0 = dp[0];
+
+  ASSERT (d1 > 0);
+
+  if (UNLIKELY (d1 & GMP_NUMB_HIGHBIT))
+    {
+      if (BELOW_THRESHOLD (nn, DIV_QR_2_PI2_THRESHOLD))
+       {
+         gmp_pi1_t dinv;
+         invert_pi1 (dinv, d1, d0);
+         return mpn_div_qr_2n_pi1 (qp, rp, np, nn, d1, d0, dinv.inv32);
+       }
+      else
+       {
+         mp_limb_t di[2];
+         invert_4by2 (di, d1, d0);
+         return mpn_div_qr_2n_pi2 (qp, rp, np, nn, d1, d0, di[1], di[0]);
+       }
+    }
+  else
+    {
+      int shift;
+      count_leading_zeros (shift, d1);
+      d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+      d0 <<= shift;
+      invert_pi1 (dinv, d1, d0);
+      return mpn_div_qr_2u_pi1 (qp, rp, np, nn, d1, d0, shift, dinv.inv32);
+    }
+}
diff --git a/mpn/generic/div_qr_2n_pi1.c b/mpn/generic/div_qr_2n_pi1.c

new file mode 100644 (file)

index 0000000..ac8dc3d
--- /dev/null
+++ b/mpn/generic/div_qr_2n_pi1.c
@@ -0,0 +1,76 @@
+/* mpn_div_qr_2n_pi1
+
+   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+   RELEASE.
+
+
+Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for normalized divisor */
+mp_limb_t
+mpn_div_qr_2n_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+                  mp_limb_t d1, mp_limb_t d0, mp_limb_t di)
+{
+  mp_limb_t qh;
+  mp_size_t i;
+  mp_limb_t r1, r0;
+
+  ASSERT (nn >= 2);
+  ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+  np += nn - 2;
+  r1 = np[1];
+  r0 = np[0];
+
+  qh = 0;
+  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+    {
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+      r0 = r0 - d0;
+      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+      r0 &= GMP_NUMB_MASK;
+#endif
+      qh = 1;
+    }
+
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t n0, q;
+      n0 = np[-1];
+      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
+      np--;
+      qp[i] = q;
+    }
+
+  rp[1] = r1;
+  rp[0] = r0;
+
+  return qh;
+}
diff --git a/mpn/generic/div_qr_2u_pi1.c b/mpn/generic/div_qr_2u_pi1.c

new file mode 100644 (file)

index 0000000..568bf42
--- /dev/null
+++ b/mpn/generic/div_qr_2u_pi1.c
@@ -0,0 +1,67 @@
+/* mpn_div_qr_2u_pi1
+
+   Contributed to the GNU project by Niels Möller
+
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+   RELEASE.
+
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for unnormalized divisor. Caller must pass shifted d1 and
+   d0, while {np,nn} is shifted on the fly. */
+mp_limb_t
+mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+                  mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
+{
+  mp_limb_t qh;
+  mp_limb_t r2, r1, r0;
+  mp_size_t i;
+
+  ASSERT (nn >= 2);
+  ASSERT (d1 & GMP_NUMB_HIGHBIT);
+  ASSERT (shift > 0);
+
+  r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
+  r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
+  r0 = np[nn-2] << shift;
+
+  udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
+
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t q;
+      r0 = np[i];
+      r1 |= r0 >> (GMP_LIMB_BITS - shift);
+      r0 <<= shift;
+      udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
+      qp[i] = q;
+    }
+
+  rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
+  rp[1] = r2 >> shift;
+
+  return qh;
+}
diff --git a/mpn/generic/divexact.c b/mpn/generic/divexact.c

index c8409b2ad303ccdd100ca8da4d36f068fac70c86..ae80dc7ef77659a6c86e2f7a92897c0681692fb7 100644 (file)
--- a/mpn/generic/divexact.c
+++ b/mpn/generic/divexact.c
@@ -38,7 +38,7 @@ mpn_divexact (mp_ptr qp,
  {
    unsigned shift;
    mp_size_t qn;
-  mp_ptr tp, wp;
+  mp_ptr tp;
    TMP_DECL;
  
    ASSERT (dn > 0);
@@ -67,7 +67,9 @@ mpn_divexact (mp_ptr qp,
  
    if (shift > 0)
      {
-      mp_size_t ss = (dn > qn) ? qn + 1 : dn;
+      mp_ptr wp;
+      mp_size_t ss;
+      ss = (dn > qn) ? qn + 1 : dn;
  
        tp = TMP_ALLOC_LIMBS (ss);
        mpn_rshift (tp, dp, ss, shift);
@@ -77,18 +79,14 @@ mpn_divexact (mp_ptr qp,
          to shift one limb beyond qn. */
        wp = TMP_ALLOC_LIMBS (qn + 1);
        mpn_rshift (wp, np, qn + 1, shift);
-    }
-  else
-    {
-      wp = TMP_ALLOC_LIMBS (qn);
-      MPN_COPY (wp, np, qn);
+      np = wp;
      }
  
    if (dn > qn)
      dn = qn;
  
    tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
-  mpn_bdiv_q (qp, wp, qn, dp, dn, tp);
+  mpn_bdiv_q (qp, np, qn, dp, dn, tp);
    TMP_FREE;
  }
  
diff --git a/mpn/generic/divis.c b/mpn/generic/divis.c

index a67abdbbdc41b72c2cac8e06956e4d6bda93a78f..e6d08f7c97d229e680f2d221d3a5e0a4446f4db1 100644 (file)
--- a/mpn/generic/divis.c
+++ b/mpn/generic/divis.c
@@ -26,13 +26,13 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "longlong.h"
  
  
-/* Determine whether {ap,an} is divisible by {dp,dn}.  Must have both
+/* Determine whether A={ap,an} is divisible by D={dp,dn}.  Must have both
     operands normalized, meaning high limbs non-zero, except that an==0 is
     allowed.
  
-   There usually won't be many low zero bits on d, but the checks for this
+   There usually won't be many low zero bits on D, but the checks for this
     are fast and might pick up a few operand combinations, in particular they
-   might reduce d to fit the single-limb mod_1/modexact_1 code.
+   might reduce D to fit the single-limb mod_1/modexact_1 code.
  
     Future:
  
@@ -41,11 +41,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     there's no addback, but it would need a multi-precision inverse and so
     might be slower than the plain method (on small sizes at least).
  
-   When d must be normalized (shifted to high bit set), it's possible to
-   just append a low zero limb to "a" rather than bit-shifting as
-   mpn_tdiv_qr does internally, so long as it's already been checked that a
-   has at least as many trailing zeros bits as d.  Or equivalently, pass
-   qxn==1 to mpn_tdiv_qr, if/when it accepts that.  */
+   When D must be normalized (shifted to low bit set), it's possible to supress
+   the bit-shifting of A down, as long as it's already been checked that A has
+   at least as many trailing zero bits as D.  */
  
  int
  mpn_divisible_p (mp_srcptr ap, mp_size_t an,
@@ -127,7 +125,7 @@ mpn_divisible_p (mp_srcptr ap, mp_size_t an,
    TMP_MARK;
  
    rp = TMP_ALLOC_LIMBS (an + 1);
-  qp = TMP_ALLOC_LIMBS (an - dn + 1); /* FIXME: Could we avoid this */
+  qp = TMP_ALLOC_LIMBS (an - dn + 1); /* FIXME: Could we avoid this? */
  
    count_trailing_zeros (twos, dp[0]);
  
diff --git a/mpn/generic/divrem.c b/mpn/generic/divrem.c

index 1fb45410d894b0440c28bb297a5cc8806d803d11..57ab5250bb46bad11e55c22ab7639397c6efd333 100644 (file)
--- a/mpn/generic/divrem.c
+++ b/mpn/generic/divrem.c
@@ -1,6 +1,5 @@
  /* mpn_divrem -- Divide natural numbers, producing both remainder and
-   quotient.  This is now just a middle layer for calling the new
-   internal mpn_tdiv_qr.
+   quotient.  This is now just a middle layer calling mpn_tdiv_qr.
  
  Copyright 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2005 Free
  Software Foundation, Inc.
diff --git a/mpn/generic/divrem_1.c b/mpn/generic/divrem_1.c

index c416946294bf5a773d268e646367db4745c411cf..e66ed80dba5ad84d11bab2bd307b2bf1626093b5 100644 (file)
--- a/mpn/generic/divrem_1.c
+++ b/mpn/generic/divrem_1.c
@@ -156,7 +156,7 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
    else
      {
        /* Most significant bit of divisor == 0.  */
-      int norm;
+      int cnt;
  
        /* Skip a division if high < divisor (high quotient 0).  Testing here
          before normalizing will still skip as often as possible.  */
@@ -178,28 +178,28 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
           && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
         goto plain;
  
-      count_leading_zeros (norm, d);
-      d <<= norm;
-      r <<= norm;
+      count_leading_zeros (cnt, d);
+      d <<= cnt;
+      r <<= cnt;
  
        if (UDIV_NEEDS_NORMALIZATION
           && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
         {
+         mp_limb_t nshift;
           if (un != 0)
             {
               n1 = up[un - 1] << GMP_NAIL_BITS;
-             r |= (n1 >> (GMP_LIMB_BITS - norm));
+             r |= (n1 >> (GMP_LIMB_BITS - cnt));
               for (i = un - 2; i >= 0; i--)
                 {
                   n0 = up[i] << GMP_NAIL_BITS;
-                 udiv_qrnnd (*qp, r, r,
-                             (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
-                             d);
+                 nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+                 udiv_qrnnd (*qp, r, r, nshift, d);
                   r >>= GMP_NAIL_BITS;
                   qp--;
                   n1 = n0;
                 }
-             udiv_qrnnd (*qp, r, r, n1 << norm, d);
+             udiv_qrnnd (*qp, r, r, n1 << cnt, d);
               r >>= GMP_NAIL_BITS;
               qp--;
             }
@@ -209,27 +209,26 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
               r >>= GMP_NAIL_BITS;
               qp--;
             }
-         return r >> norm;
+         return r >> cnt;
         }
        else
         {
-         mp_limb_t  dinv;
+         mp_limb_t  dinv, nshift;
           invert_limb (dinv, d);
           if (un != 0)
             {
               n1 = up[un - 1] << GMP_NAIL_BITS;
-             r |= (n1 >> (GMP_LIMB_BITS - norm));
+             r |= (n1 >> (GMP_LIMB_BITS - cnt));
               for (i = un - 2; i >= 0; i--)
                 {
                   n0 = up[i] << GMP_NAIL_BITS;
-                 udiv_qrnnd_preinv (*qp, r, r,
-                                    ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
-                                    d, dinv);
+                 nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+                 udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
                   r >>= GMP_NAIL_BITS;
                   qp--;
                   n1 = n0;
                 }
-             udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
+             udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
               r >>= GMP_NAIL_BITS;
               qp--;
             }
@@ -239,7 +238,7 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
               r >>= GMP_NAIL_BITS;
               qp--;
             }
-         return r >> norm;
+         return r >> cnt;
         }
      }
  }
diff --git a/mpn/generic/divrem_2.c b/mpn/generic/divrem_2.c

index ba761dc36c0e7edc607c96302da4a9a1214e2209..51a1e78710f1f9d405c708516daa438d645e5bac 100644 (file)
--- a/mpn/generic/divrem_2.c
+++ b/mpn/generic/divrem_2.c
@@ -30,150 +30,81 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "longlong.h"
  
  
-/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
-   meaning the quotient size where that should happen, the quotient size
-   being how many udiv divisions will be done.
-
-   The default is to use preinv always, CPUs where this doesn't suit have
-   tuned thresholds.  Note in particular that preinv should certainly be
-   used if that's the only division available (USE_PREINV_ALWAYS).  */
-
-#ifndef DIVREM_2_THRESHOLD
-#define DIVREM_2_THRESHOLD  0
-#endif
-
-
-/* Divide num (NP/NSIZE) by den (DP/2) and write
-   the NSIZE-2 least significant quotient limbs at QP
-   and the 2 long remainder at NP.  If QEXTRA_LIMBS is
-   non-zero, generate that many fraction bits and append them after the
-   other quotient limbs.
-   Return the most significant limb of the quotient, this is always 0 or 1.
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least significant
+   quotient limbs at qp and the 2 long remainder at np.  If qxn is non-zero,
+   generate that many fraction bits and append them after the other quotient
+   limbs.  Return the most significant limb of the quotient, this is always 0
+   or 1.
  
     Preconditions:
-   0. NSIZE >= 2.
     1. The most significant bit of the divisor must be set.
-   2. QP must either not overlap with the input operands at all, or
-      QP + 2 >= NP must hold true.  (This means that it's
-      possible to put the quotient in the high part of NUM, right after the
-      remainder in NUM.
-   3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero.  */
+   2. qp must either not overlap with the input operands at all, or
+      qp >= np + 2 must hold true.  (This means that it's possible to put
+      the quotient in the high part of {np,nn}, right above the remainder.
+   3. nn >= 2, even if qxn is non-zero.  */
  
  mp_limb_t
  mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
               mp_ptr np, mp_size_t nn,
               mp_srcptr dp)
  {
-  mp_limb_t most_significant_q_limb = 0;
+  mp_limb_t most_significant_q_limb;
    mp_size_t i;
-  mp_limb_t n1, n0, n2;
-  mp_limb_t d1, d0;
-  mp_limb_t d1inv;
-  int use_preinv;
+  mp_limb_t r1, r0, d1, d0;
+  gmp_pi1_t di;
  
    ASSERT (nn >= 2);
    ASSERT (qxn >= 0);
    ASSERT (dp[1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp+2 >= np);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp >= np+2);
    ASSERT_MPN (np, nn);
    ASSERT_MPN (dp, 2);
  
    np += nn - 2;
    d1 = dp[1];
    d0 = dp[0];
-  n1 = np[1];
-  n0 = np[0];
+  r1 = np[1];
+  r0 = np[0];
  
-  if (n1 >= d1 && (n1 > d1 || n0 >= d0))
+  most_significant_q_limb = 0;
+  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
      {
  #if GMP_NAIL_BITS == 0
-      sub_ddmmss (n1, n0, n1, n0, d1, d0);
+      sub_ddmmss (r1, r0, r1, r0, d1, d0);
  #else
-      n0 = n0 - d0;
-      n1 = n1 - d1 - (n0 >> GMP_LIMB_BITS - 1);
-      n0 &= GMP_NUMB_MASK;
+      r0 = r0 - d0;
+      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+      r0 &= GMP_NUMB_MASK;
  #endif
        most_significant_q_limb = 1;
      }
  
-  use_preinv = ABOVE_THRESHOLD (qxn + nn - 2, DIVREM_2_THRESHOLD);
-  if (use_preinv)
-    invert_limb (d1inv, d1);
-
-  for (i = qxn + nn - 2 - 1; i >= 0; i--)
-    {
-      mp_limb_t q;
-      mp_limb_t r;
-
-      if (i >= qxn)
-       np--;
-      else
-       np[0] = 0;
+  invert_pi1 (di, d1, d0);
  
-      if (n1 == d1)
-       {
-         /* Q should be either 111..111 or 111..110.  Need special handling
-            of this rare case as normal division would give overflow.  */
-         q = GMP_NUMB_MASK;
-
-         r = (n0 + d1) & GMP_NUMB_MASK;
-         if (r < d1)   /* Carry in the addition? */
-           {
-#if GMP_NAIL_BITS == 0
-             add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
-#else
-             n0 = np[0] + d0;
-             n1 = (r - d0 + (n0 >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
-             n0 &= GMP_NUMB_MASK;
-#endif
-             qp[i] = q;
-             continue;
-           }
-         n1 = d0 - (d0 != 0);
-         n0 = -d0 & GMP_NUMB_MASK;
-       }
-      else
-       {
-         if (use_preinv)
-           udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
-         else
-           udiv_qrnnd (q, r, n1, n0 << GMP_NAIL_BITS, d1 << GMP_NAIL_BITS);
-         r >>= GMP_NAIL_BITS;
-         umul_ppmm (n1, n0, d0, q << GMP_NAIL_BITS);
-         n0 >>= GMP_NAIL_BITS;
-       }
+  qp += qxn;
  
-      n2 = np[0];
+  for (i = nn - 2 - 1; i >= 0; i--)
+    {
+      mp_limb_t n0, q;
+      n0 = np[-1];
+      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di.inv32);
+      np--;
+      qp[i] = q;
+    }
  
-    q_test:
-      if (n1 > r || (n1 == r && n0 > n2))
+  if (UNLIKELY (qxn != 0))
+    {
+      qp -= qxn;
+      for (i = qxn - 1; i >= 0; i--)
         {
-         /* The estimated Q was too large.  */
-         q--;
-
-#if GMP_NAIL_BITS == 0
-         sub_ddmmss (n1, n0, n1, n0, 0, d0);
-#else
-         n0 = n0 - d0;
-         n1 = n1 - (n0 >> GMP_LIMB_BITS - 1);
-         n0 &= GMP_NUMB_MASK;
-#endif
-         r += d1;
-         if (r >= d1)  /* If not carry, test Q again.  */
-           goto q_test;
+         mp_limb_t q;
+         udiv_qr_3by2 (q, r1, r0, r1, r0, 0, d1, d0, di.inv32);
+         qp[i] = q;
         }
-
-      qp[i] = q;
-#if GMP_NAIL_BITS == 0
-      sub_ddmmss (n1, n0, r, n2, n1, n0);
-#else
-      n0 = n2 - n0;
-      n1 = r - n1 - (n0 >> GMP_LIMB_BITS - 1);
-      n0 &= GMP_NUMB_MASK;
-#endif
      }
-  np[1] = n1;
-  np[0] = n0;
+
+  np[1] = r1;
+  np[0] = r0;
  
    return most_significant_q_limb;
  }
diff --git a/mpn/generic/gcd.c b/mpn/generic/gcd.c

index 542e0fe7b8e393be445ba3ba2e10a0b29c9a1cfe..bf69e3a0cf182ad8dfc7d53e00610ab277a09a68 100644 (file)
--- a/mpn/generic/gcd.c
+++ b/mpn/generic/gcd.c
@@ -1,7 +1,7 @@
  /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
  
  Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
-2004, 2005, 2008 Free Software Foundation, Inc.
+2004, 2005, 2008, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -51,6 +51,76 @@ mp_size_t p_table[P_TABLE_SIZE];
  #define CHOOSE_P(n) (2*(n) / 3)
  #endif
  
+struct gcd_ctx
+{
+  mp_ptr gp;
+  mp_size_t gn;
+};
+
+static void
+gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+         mp_srcptr qp, mp_size_t qn, int d)
+{
+  struct gcd_ctx *ctx = (struct gcd_ctx *) p;
+  MPN_COPY (ctx->gp, gp, gn);
+  ctx->gn = gn;
+}
+
+#if GMP_NAIL_BITS > 0
+/* Nail supports should be easy, replacing the sub_ddmmss with nails
+ * logic. */
+#error Nails not supported.
+#endif
+
+/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
+   Both U and V must be odd. */
+static inline mp_size_t
+gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
+{
+  mp_limb_t u0, u1, v0, v1;
+  mp_size_t gn;
+
+  u0 = up[0];
+  u1 = up[1];
+  v0 = vp[0];
+  v1 = vp[1];
+
+  ASSERT (u0 & 1);
+  ASSERT (v0 & 1);
+
+  /* Check for u0 != v0 needed to ensure that argument to
+   * count_trailing_zeros is non-zero. */
+  while (u1 != v1 && u0 != v0)
+    {
+      unsigned long int r;
+      if (u1 > v1)
+       {
+         sub_ddmmss (u1, u0, u1, u0, v1, v0);
+         count_trailing_zeros (r, u0);
+         u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
+         u1 >>= r;
+       }
+      else  /* u1 < v1.  */
+       {
+         sub_ddmmss (v1, v0, v1, v0, u1, u0);
+         count_trailing_zeros (r, v0);
+         v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
+         v1 >>= r;
+       }
+    }
+
+  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
+
+  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
+  if (u1 == v1 && u0 == v0)
+    return gn;
+
+  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
+  gp[0] = mpn_gcd_1 (gp, gn, v0);
+
+  return 1;
+}
+
  mp_size_t
  mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
  {
@@ -58,13 +128,17 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
    mp_size_t scratch;
    mp_size_t matrix_scratch;
  
-  mp_size_t gn;
+  struct gcd_ctx ctx;
    mp_ptr tp;
    TMP_DECL;
  
+  ASSERT (usize >= n);
+  ASSERT (n > 0);
+  ASSERT (vp[n-1] > 0);
+
    /* FIXME: Check for small sizes first, before setting up temporary
       storage etc. */
-  talloc = MPN_GCD_LEHMER_N_ITCH(n);
+  talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
  
    /* For initial division */
    scratch = usize - n + 1;
@@ -107,11 +181,13 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
        if (mpn_zero_p (up, n))
         {
           MPN_COPY (gp, vp, n);
-         TMP_FREE;
-         return n;
+         ctx.gn = n;
+         goto done;
         }
      }
  
+  ctx.gp = gp;
+
  #if TUNE_GCD_P
    while (CHOOSE_P (n) > 0)
  #else
@@ -134,153 +210,90 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
        else
         {
           /* Temporary storage n */
-         n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
+         n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
           if (n == 0)
-           {
-             TMP_FREE;
-             return gn;
-           }
+           goto done;
         }
      }
  
-  gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
-  TMP_FREE;
-  return gn;
-}
-
-#ifdef TUNE_GCD_P
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include "speed.h"
-
-static int
-compare_double(const void *ap, const void *bp)
-{
-  double a = * (const double *) ap;
-  double b = * (const double *) bp;
-
-  if (a < b)
-    return -1;
-  else if (a > b)
-    return 1;
-  else
-    return 0;
-}
-
-static double
-median (double *v, size_t n)
-{
-  qsort(v, n, sizeof(*v), compare_double);
-
-  return v[n/2];
-}
-
-#define TIME(res, code) do {                           \
-  double time_measurement[5];                          \
-  unsigned time_i;                                     \
-                                                       \
-  for (time_i = 0; time_i < 5; time_i++)               \
-    {                                                  \
-      speed_starttime();                               \
-      code;                                            \
-      time_measurement[time_i] = speed_endtime();      \
-    }                                                  \
-  res = median(time_measurement, 5);                   \
-} while (0)
-
-int
-main(int argc, char *argv)
-{
-  gmp_randstate_t rands;
-  mp_size_t n;
-  mp_ptr ap;
-  mp_ptr bp;
-  mp_ptr up;
-  mp_ptr vp;
-  mp_ptr gp;
-  mp_ptr tp;
-  TMP_DECL;
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t uh, ul, vh, vl;
+      mp_limb_t mask;
  
-  /* Unbuffered so if output is redirected to a file it isn't lost if the
-     program is killed part way through.  */
-  setbuf (stdout, NULL);
-  setbuf (stderr, NULL);
+      mask = up[n-1] | vp[n-1];
+      ASSERT (mask > 0);
  
-  gmp_randinit_default (rands);
+      if (mask & GMP_NUMB_HIGHBIT)
+       {
+         uh = up[n-1]; ul = up[n-2];
+         vh = vp[n-1]; vl = vp[n-2];
+       }
+      else
+       {
+         int shift;
  
-  TMP_MARK;
+         count_leading_zeros (shift, mask);
+         uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
+         ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
+         vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
+         vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
+       }
  
-  ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-  bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-  up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-  vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-  gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-  tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
+      /* Try an mpn_hgcd2 step */
+      if (mpn_hgcd2 (uh, ul, vh, vl, &M))
+       {
+         n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
+         MP_PTR_SWAP (up, tp);
+       }
+      else
+       {
+         /* mpn_hgcd2 has failed. Then either one of a or b is very
+            small, or the difference is very small. Perform one
+            subtraction followed by one division. */
  
-  mpn_random (ap, P_TABLE_SIZE);
-  mpn_random (bp, P_TABLE_SIZE);
+         /* Temporary storage n */
+         n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
+         if (n == 0)
+           goto done;
+       }
+    }
  
-  memset (p_table, 0, sizeof(p_table));
+  ASSERT(up[n-1] | vp[n-1]);
  
-  for (n = 100; n++; n < P_TABLE_SIZE)
+  if (n == 1)
      {
-      mp_size_t p;
-      mp_size_t best_p;
-      double best_time;
-      double lehmer_time;
-
-      if (ap[n-1] == 0)
-       ap[n-1] = 1;
-
-      if (bp[n-1] == 0)
-       bp[n-1] = 1;
-
-      p_table[n] = 0;
-      TIME(lehmer_time, {
-         MPN_COPY (up, ap, n);
-         MPN_COPY (vp, bp, n);
-         mpn_gcd_lehmer_n (gp, up, vp, n, tp);
-       });
+      *gp = mpn_gcd_1(up, 1, vp[0]);
+      ctx.gn = 1;
+      goto done;
+    }
  
-      best_time = lehmer_time;
-      best_p = 0;
+  /* Due to the calling convention for mpn_gcd, at most one can be
+     even. */
  
-      for (p = n * 0.48; p < n * 0.77; p++)
-       {
-         double t;
+  if (! (up[0] & 1))
+    MP_PTR_SWAP (up, vp);
  
-         p_table[n] = p;
+  ASSERT (up[0] & 1);
  
-         TIME(t, {
-             MPN_COPY (up, ap, n);
-             MPN_COPY (vp, bp, n);
-             mpn_gcd (gp, up, n, vp, n);
-           });
+  if (vp[0] == 0)
+    {
+      *gp = mpn_gcd_1 (up, 2, vp[1]);
+      ctx.gn = 1;
+      goto done;
+    }
+  else if (! (vp[0] & 1))
+    {
+      int r;
+      count_trailing_zeros (r, vp[0]);
+      vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
+      vp[1] >>= r;
+    }
  
-         if (t < best_time)
-           {
-             best_time = t;
-             best_p = p;
-           }
-       }
-      printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
-      if (best_p > 0)
-       {
-         double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
-         printf(" %5.3g%%", speedup);
-         if (speedup < 1.0)
-           {
-             printf(" (ignored)");
-             best_p = 0;
-           }
-       }
-      printf("\n");
+  ctx.gn = gcd_2(gp, up, vp);
  
-      p_table[n] = best_p;
-    }
+done:
    TMP_FREE;
-  gmp_randclear(rands);
-  return 0;
+  return ctx.gn;
  }
-#endif /* TUNE_GCD_P */
diff --git a/mpn/generic/gcd_1.c b/mpn/generic/gcd_1.c

index ab16f4bb23163aa0afeb413781bc64bcc06df18c..8131bdc9223c048f0af7ff7d08157d8e3c1c7a6c 100644 (file)
--- a/mpn/generic/gcd_1.c
+++ b/mpn/generic/gcd_1.c
@@ -1,6 +1,6 @@
  /* mpn_gcd_1 -- mpn and limb greatest common divisor.
  
-Copyright 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1994, 1996, 2000, 2001, 2009, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -28,7 +28,13 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define USE_ZEROTAB 0
  
  #if USE_ZEROTAB
-static const unsigned char zerotab[16] = {
+#define MAXSHIFT 4
+#define MASK ((1 << MAXSHIFT) - 1)
+static const unsigned char zerotab[1 << MAXSHIFT] =
+{
+#if MAXSHIFT > 4
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+#endif
    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
  };
  #endif
@@ -133,8 +139,11 @@ mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
    while (ulimb != vlimb)
      {
        int c;
-      mp_limb_t t = ulimb - vlimb;
-      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (t);
+      mp_limb_t t;
+      mp_limb_t vgtu;
+
+      t = ulimb - vlimb;
+      vgtu = LIMB_HIGHBIT_TO_MASK (t);
  
        /* v <-- min (u, v) */
        vlimb += (vgtu & t);
@@ -145,16 +154,16 @@ mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
  #if USE_ZEROTAB
        /* Number of trailing zeros is the same no matter if we look at
         * t or ulimb, but using t gives more parallelism. */
-      c = zerotab[t & 15];
+      c = zerotab[t & MASK];
  
-      while (UNLIKELY (c == 4))
+      while (UNLIKELY (c == MAXSHIFT))
         {
-         ulimb >>= 4;
+         ulimb >>= MAXSHIFT;
           if (0)
           strip_u_maybe:
             vlimb >>= 1;
  
-         c = zerotab[ulimb & 15];
+         c = zerotab[ulimb & MASK];
         }
  #else
        if (0)
diff --git a/mpn/generic/gcd_lehmer.c b/mpn/generic/gcd_lehmer.c

deleted file mode 100644 (file)

index 37fd3c5..0000000
--- a/mpn/generic/gcd_lehmer.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/* gcd_lehmer.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
-   Both U and V must be odd. */
-static inline mp_size_t
-gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
-{
-  mp_limb_t u0, u1, v0, v1;
-  mp_size_t gn;
-
-  u0 = up[0];
-  u1 = up[1];
-  v0 = vp[0];
-  v1 = vp[1];
-
-  ASSERT (u0 & 1);
-  ASSERT (v0 & 1);
-
-  /* Check for u0 != v0 needed to ensure that argument to
-   * count_trailing_zeros is non-zero. */
-  while (u1 != v1 && u0 != v0)
-    {
-      unsigned long int r;
-      if (u1 > v1)
-       {
-         u1 -= v1 + (u0 < v0);
-         u0 = (u0 - v0) & GMP_NUMB_MASK;
-         count_trailing_zeros (r, u0);
-         u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
-         u1 >>= r;
-       }
-      else  /* u1 < v1.  */
-       {
-         v1 -= u1 + (v0 < u0);
-         v0 = (v0 - u0) & GMP_NUMB_MASK;
-         count_trailing_zeros (r, v0);
-         v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
-         v1 >>= r;
-       }
-    }
-
-  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
-
-  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
-  if (u1 == v1 && u0 == v0)
-    return gn;
-
-  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
-  gp[0] = mpn_gcd_1 (gp, gn, v0);
-
-  return 1;
-}
-
-/* Temporary storage: n */
-mp_size_t
-mpn_gcd_lehmer_n (mp_ptr gp, mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
-{
-  /* Relax this requirement, and normalize at the start? Must disallow
-     A = B = 0, though. */
-  ASSERT(ap[n-1] > 0 || bp[n-1] > 0);
-
-  while (n > 2)
-    {
-      struct hgcd_matrix1 M;
-      mp_limb_t ah, al, bh, bl;
-      mp_limb_t mask;
-
-      mask = ap[n-1] | bp[n-1];
-      ASSERT (mask > 0);
-
-      if (mask & GMP_NUMB_HIGHBIT)
-       {
-         ah = ap[n-1]; al = ap[n-2];
-         bh = bp[n-1]; bl = bp[n-2];
-       }
-      else
-       {
-         int shift;
-
-         count_leading_zeros (shift, mask);
-         ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
-         al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
-         bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
-         bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
-       }
-
-      /* Try an mpn_nhgcd2 step */
-      if (mpn_hgcd2 (ah, al, bh, bl, &M))
-       {
-         n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
-         MP_PTR_SWAP (ap, tp);
-       }
-      else
-       {
-         /* mpn_hgcd2 has failed. Then either one of a or b is very
-            small, or the difference is very small. Perform one
-            subtraction followed by one division. */
-         mp_size_t gn;
-
-         /* Temporary storage n */
-         n = mpn_gcd_subdiv_step (gp, &gn, ap, bp, n, tp);
-         if (n == 0)
-           return gn;
-       }
-    }
-
-  if (n == 1)
-    {
-      *gp = mpn_gcd_1(ap, 1, bp[0]);
-      return 1;
-    }
-
-  /* Due to the calling convention for mpn_gcd, at most one can be
-     even. */
-
-  if (! (ap[0] & 1))
-    MP_PTR_SWAP (ap, bp);
-
-  ASSERT (ap[0] & 1);
-
-  if (bp[0] == 0)
-    {
-      *gp = mpn_gcd_1 (ap, 2, bp[1]);
-      return 1;
-    }
-  else if (! (bp[0] & 1))
-    {
-      int r;
-      count_trailing_zeros (r, bp[0]);
-      bp[0] = ((bp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (bp[0] >> r);
-      bp[1] >>= r;
-    }
-
-  return gcd_2(gp, ap, bp);
-}
diff --git a/mpn/generic/gcd_subdiv_step.c b/mpn/generic/gcd_subdiv_step.c

index 47c0c26c868c87f0bb4055ec3f16a769c03af65a..3db34073c93d2c5b51ece4a50666c1d33596f74f 100644 (file)
--- a/mpn/generic/gcd_subdiv_step.c
+++ b/mpn/generic/gcd_subdiv_step.c
@@ -4,7 +4,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008, 2010, 2011 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,23 +21,55 @@ License for more details.
  You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
+#include <stdlib.h>            /* for NULL */
+
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "longlong.h"
  
  /* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
     b is small, or the difference is small. Perform one subtraction
-   followed by one division. If the gcd is found, stores it in gp and
-   *gn, and returns zero. Otherwise, compute the reduced a and b, and
-   return the new size. */
+   followed by one division. The normal case is to compute the reduced
+   a and b, and return the new size.
+
+   If s == 0 (used for gcd and gcdext), returns zero if the gcd is
+   found.
+
+   If s > 0, don't reduce to size <= s, and return zero if no
+   reduction is possible (if either a, b or |a-b| is of size <= s). */
+
+/* The hook function is called as
+
+     hook(ctx, gp, gn, qp, qn, d)
+
+   in the following cases:
+
+   + If A = B at the start, G is the gcd, Q is NULL, d = -1.
+
+   + If one input is zero at the start, G is the gcd, Q is NULL,
+     d = 0 if A = G and d = 1 if B = G.
+
+   Otherwise, if d = 0 we have just subtracted a multiple of A from B,
+   and if d = 1 we have subtracted a multiple of B from A.
+
+   + If A = B after subtraction, G is the gcd, Q is NULL.
+
+   + If we get a zero remainder after division, G is the gcd, Q is the
+     quotient.
+
+   + Otherwise, G is NULL, Q is the quotient (often 1).
+
+ */
  
-/* FIXME: Check when the smaller number is a single limb, and invoke
- * mpn_gcd_1. */
  mp_size_t
-mpn_gcd_subdiv_step (mp_ptr gp, mp_size_t *gn,
-                    mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+mpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,
+                    gcd_subdiv_step_hook *hook, void *ctx,
+                    mp_ptr tp)
  {
-  mp_size_t an, bn;
+  static const mp_limb_t one = CNST_LIMB(1);
+  mp_size_t an, bn, qn;
+
+  int swapped;
  
    ASSERT (n > 0);
    ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
@@ -46,59 +78,117 @@ mpn_gcd_subdiv_step (mp_ptr gp, mp_size_t *gn,
    MPN_NORMALIZE (ap, an);
    MPN_NORMALIZE (bp, bn);
  
-  if (UNLIKELY (an == 0))
+  swapped = 0;
+
+  /* Arrange so that a < b, subtract b -= a, and maintain
+     normalization. */
+  if (an == bn)
      {
-    return_b:
-      MPN_COPY (gp, bp, bn);
-      *gn = bn;
-      return 0;
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+       {
+         /* For gcdext, return the smallest of the two cofactors, so
+            pass d = -1. */
+         if (s == 0)
+           hook (ctx, ap, an, NULL, 0, -1);
+         return 0;
+       }
+      else if (c > 0)
+       {
+         MP_PTR_SWAP (ap, bp);
+         swapped ^= 1;
+       }
      }
-  else if (UNLIKELY (bn == 0))
+  else
      {
-    return_a:
-      MPN_COPY (gp, ap, an);
-      *gn = an;
+      if (an > bn)
+       {
+         MPN_PTR_SWAP (ap, an, bp, bn);
+         swapped ^= 1;
+       }
+    }
+  if (an <= s)
+    {
+      if (s == 0)
+       hook (ctx, bp, bn, NULL, 0, swapped ^ 1);
        return 0;
      }
  
-  /* Arrange so that a > b, subtract an -= bn, and maintain
-     normalization. */
-  if (an < bn)
-    MPN_PTR_SWAP (ap, an, bp, bn);
-  else if (an == bn)
+  ASSERT_NOCARRY (mpn_sub (bp, bp, bn, ap, an));
+  MPN_NORMALIZE (bp, bn);
+  ASSERT (bn > 0);
+
+  if (bn <= s)
      {
-      int c;
-      MPN_CMP (c, ap, bp, an);
-      if (UNLIKELY (c == 0))
-       goto return_a;
-      else if (c < 0)
-       MP_PTR_SWAP (ap, bp);
+      /* Undo subtraction. */
+      mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+      if (cy > 0)
+       bp[an] = cy;
+      return 0;
      }
  
-  ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
-  MPN_NORMALIZE (ap, an);
-  ASSERT (an > 0);
-
-  /* Arrange so that a > b, and divide a = q b + r */
-  /* FIXME: an < bn happens when we have cancellation. If that is the
-     common case, then we could reverse the roles of a and b to avoid
-     the swap. */
-  if (an < bn)
-    MPN_PTR_SWAP (ap, an, bp, bn);
-  else if (an == bn)
+  /* Arrange so that a < b */
+  if (an == bn)
      {
        int c;
        MPN_CMP (c, ap, bp, an);
        if (UNLIKELY (c == 0))
-       goto return_a;
-      else if (c < 0)
-       MP_PTR_SWAP (ap, bp);
+       {
+         if (s > 0)
+           /* Just record subtraction and return */
+           hook (ctx, NULL, 0, &one, 1, swapped);
+         else
+           /* Found gcd. */
+           hook (ctx, bp, bn, NULL, 0, swapped);
+         return 0;
+       }
+
+      hook (ctx, NULL, 0, &one, 1, swapped);
+
+      if (c > 0)
+       {
+         MP_PTR_SWAP (ap, bp);
+         swapped ^= 1;
+       }
      }
+  else
+    {
+      hook (ctx, NULL, 0, &one, 1, swapped);
  
-  mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
+      if (an > bn)
+       {
+         MPN_PTR_SWAP (ap, an, bp, bn);
+         swapped ^= 1;
+       }
+    }
  
-  if (mpn_zero_p (ap, bn))
-    goto return_b;
+  mpn_tdiv_qr (tp, bp, 0, bp, bn, ap, an);
+  qn = bn - an + 1;
+  bn = an;
+  MPN_NORMALIZE (bp, bn);
+
+  if (UNLIKELY (bn <= s))
+    {
+      if (s == 0)
+       {
+         hook (ctx, ap, an, tp, qn, swapped);
+         return 0;
+       }
+
+      /* Quotient is one too large, so decrement it and add back A. */
+      if (bn > 0)
+       {
+         mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+         if (cy)
+           bp[an++] = cy;
+       }
+      else
+       MPN_COPY (bp, ap, an);
+
+      MPN_DECR_U (tp, qn, 1);
+    }
  
-  return bn;
+  hook (ctx, NULL, 0, tp, qn, swapped);
+  return an;
  }
diff --git a/mpn/generic/gcdext.c b/mpn/generic/gcdext.c

index 06e6c138a7414f17582c13e388c79759b4dc3bad..48f7b779935e669ba3c1897b355941405ba1b80a 100644 (file)
--- a/mpn/generic/gcdext.c
+++ b/mpn/generic/gcdext.c
@@ -1,7 +1,7 @@
  /* mpn_gcdext -- Extended Greatest Common Divisor.
  
-Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
-Foundation, Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -85,10 +85,10 @@ hgcd_mul_matrix_vector (struct hgcd_matrix *M,
    return n;
  }
  
-#define COMPUTE_V_ITCH(n) (2*(n) + 1)
+#define COMPUTE_V_ITCH(n) (2*(n))
  
  /* Computes |v| = |(g - u a)| / b, where u may be positive or
-   negative, and v is of the opposite sign. a, b are of size n, u and
+   negative, and v is of the opposite sign. max(a, b) is of size n, u and
     v at most size n, and v must have space for n+1 limbs. */
  static mp_size_t
  compute_v (mp_ptr vp,
@@ -108,9 +108,11 @@ compute_v (mp_ptr vp,
  
    size = ABS (usize);
    ASSERT (size <= n);
+  ASSERT (up[size-1] > 0);
  
    an = n;
    MPN_NORMALIZE (ap, an);
+  ASSERT (gn <= an);
  
    if (an >= size)
      mpn_mul (tp, ap, an, up, size);
@@ -118,9 +120,6 @@ compute_v (mp_ptr vp,
      mpn_mul (tp, up, size, ap, an);
  
    size += an;
-  size -= tp[size - 1] == 0;
-
-  ASSERT (gn <= size);
  
    if (usize > 0)
      {
@@ -132,11 +131,11 @@ compute_v (mp_ptr vp,
         return 0;
      }
    else
-    { /* usize < 0 */
-      /* |v| = v = (c - u a) / b = (c + |u| a) / b */
-      mp_limb_t cy = mpn_add (tp, tp, size, gp, gn);
-      if (cy)
-       tp[size++] = cy;
+    { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
+        (g + |u| a) always fits in (|usize| + an) limbs. */
+
+      ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
+      size -= (tp[size - 1] == 0);
      }
  
    /* Now divide t / b. There must be no remainder */
@@ -170,7 +169,7 @@ compute_v (mp_ptr vp,
     For the lehmer call after the loop, Let T denote
     GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
     u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
-   for u, T+1 for v and 2T + 1 scratch space. In all, 7T + 3 is
+   for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
     sufficient for both operations.
  
  */
@@ -194,6 +193,7 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
    mp_size_t matrix_scratch;
    mp_size_t ualloc = n + 1;
  
+  struct gcdext_ctx ctx;
    mp_size_t un;
    mp_ptr u0;
    mp_ptr u1;
@@ -204,6 +204,7 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
  
    ASSERT (an >= n);
    ASSERT (n > 0);
+  ASSERT (bp[n-1] > 0);
  
    TMP_MARK;
  
@@ -272,6 +273,10 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
    u0 = tp; tp += ualloc;
    u1 = tp; tp += ualloc;
  
+  ctx.gp = gp;
+  ctx.up = up;
+  ctx.usize = usizep;
+
    {
      /* For the first hgcd call, there are no u updates, and it makes
         some sense to use a different choice for p. */
@@ -305,21 +310,22 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
         /* mpn_hgcd has failed. Then either one of a or b is very
            small, or the difference is very small. Perform one
            subtraction followed by one division. */
-       mp_size_t gn;
-       mp_size_t updated_un = 1;
-
         u1[0] = 1;
  
-       /* Temporary storage 2n + 1 */
-       n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
-                                   u0, u1, &updated_un, tp, tp + n);
+       ctx.u0 = u0;
+       ctx.u1 = u1;
+       ctx.tp = tp + n; /* ualloc */
+       ctx.un = 1;
+
+       /* Temporary storage n */
+       n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
         if (n == 0)
           {
             TMP_FREE;
-           return gn;
+           return ctx.gn;
           }
  
-       un = updated_un;
+       un = ctx.un;
         ASSERT (un < ualloc);
        }
    }
@@ -361,22 +367,45 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
           /* mpn_hgcd has failed. Then either one of a or b is very
              small, or the difference is very small. Perform one
              subtraction followed by one division. */
-         mp_size_t gn;
-         mp_size_t updated_un = un;
+         ctx.u0 = u0;
+         ctx.u1 = u1;
+         ctx.tp = tp + n; /* ualloc */
+         ctx.un = un;
  
-         /* Temporary storage 2n + 1 */
-         n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
-                                     u0, u1, &updated_un, tp, tp + n);
+         /* Temporary storage n */
+         n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
           if (n == 0)
             {
               TMP_FREE;
-             return gn;
+             return ctx.gn;
             }
  
-         un = updated_un;
+         un = ctx.un;
           ASSERT (un < ualloc);
         }
      }
+  /* We have A = ... a + ... b
+            B =  u0 a +  u1 b
+
+            a = u1  A + ... B
+            b = -u0 A + ... B
+
+     with bounds
+
+       |u0|, |u1| <= B / min(a, b)
+
+     We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
+     in which case the only reduction done so far is a = A - k B for
+     some k.
+
+     Compute g = u a + v b = (u u1 - v u0) A + (...) B
+     Here, u, v are bounded by
+
+       |u| <= b,
+       |v| <= a
+  */
+
+  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
  
    if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
      {
@@ -406,10 +435,9 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
        TMP_FREE;
        return n;
      }
-  else if (mpn_zero_p (u0, un))
+  else if (UNLIKELY (u0[0] == 0) && un == 1)
      {
        mp_size_t gn;
-      ASSERT (un == 1);
        ASSERT (u1[0] == 1);
  
        /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
@@ -420,23 +448,6 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
      }
    else
      {
-      /* We have A = ... a + ... b
-                B =  u0 a +  u1 b
-
-                a = u1  A + ... B
-                b = -u0 A + ... B
-
-        with bounds
-
-          |u0|, |u1| <= B / min(a, b)
-
-        Compute g = u a + v b = (u u1 - v u0) A + (...) B
-        Here, u, v are bounded by
-
-        |u| <= b,
-        |v| <= a
-      */
-
        mp_size_t u0n;
        mp_size_t u1n;
        mp_size_t lehmer_un;
@@ -456,6 +467,8 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
  
        u0n = un;
        MPN_NORMALIZE (u0, u0n);
+      ASSERT (u0n > 0);
+
        if (lehmer_un == 0)
         {
           /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
@@ -481,25 +494,12 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
  
        u1n = un;
        MPN_NORMALIZE (u1, u1n);
-
-      /* It's possible that u0 = 1, u1 = 0 */
-      if (u1n == 0)
-       {
-         ASSERT (un == 1);
-         ASSERT (u0[0] == 1);
-
-         /* u1 == 0 ==> u u1 + v u0 = v */
-         MPN_COPY (up, lehmer_vp, lehmer_vn);
-         *usizep = negate ? lehmer_vn : - lehmer_vn;
-
-         TMP_FREE;
-         return gn;
-       }
+      ASSERT (u1n > 0);
  
        ASSERT (lehmer_un + u1n <= ualloc);
        ASSERT (lehmer_vn + u0n <= ualloc);
  
-      /* Now u0, u1, u are non-zero. We may still have v == 0 */
+      /* We may still have v == 0 */
  
        /* Compute u u0 */
        if (lehmer_un <= u1n)
diff --git a/mpn/generic/gcdext_lehmer.c b/mpn/generic/gcdext_lehmer.c

index 8599a4f5549ac99dcdc7ecda2aed8a6d3ade9dbc..101dd498ec1210a9f59a5e9dad0f7a3bc91f23b3 100644 (file)
--- a/mpn/generic/gcdext_lehmer.c
+++ b/mpn/generic/gcdext_lehmer.c
@@ -1,7 +1,7 @@
  /* mpn_gcdext -- Extended Greatest Common Divisor.
  
-Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
-Foundation, Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -22,10 +22,114 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "longlong.h"
  
-/* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
-   multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
-   needed for the division, with most n for the quotient, and n+1 for
-   the product q u0. In all, 4n + 3. */
+/* Here, d is the index of the cofactor to update. FIXME: Could use qn
+   = 0 for the common case q = 1. */
+void
+mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
+                mp_srcptr qp, mp_size_t qn, int d)
+{
+  struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
+  mp_size_t un = ctx->un;
+
+  if (gp)
+    {
+      mp_srcptr up;
+
+      ASSERT (gn > 0);
+      ASSERT (gp[gn-1] > 0);
+
+      MPN_COPY (ctx->gp, gp, gn);
+      ctx->gn = gn;
+
+      if (d < 0)
+       {
+         int c;
+
+         /* Must return the smallest cofactor, +u1 or -u0 */
+         MPN_CMP (c, ctx->u0, ctx->u1, un);
+         ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
+
+         d = c < 0;
+       }
+
+      up = d ? ctx->u0 : ctx->u1;
+
+      MPN_NORMALIZE (up, un);
+      MPN_COPY (ctx->up, up, un);
+
+      *ctx->usize = d ? -un : un;
+    }
+  else
+    {
+      mp_limb_t cy;
+      mp_ptr u0 = ctx->u0;
+      mp_ptr u1 = ctx->u1;
+
+      ASSERT (d >= 0);
+
+      if (d)
+       MP_PTR_SWAP (u0, u1);
+
+      qn -= (qp[qn-1] == 0);
+
+      /* Update u0 += q  * u1 */
+      if (qn == 1)
+       {
+         mp_limb_t q = qp[0];
+
+         if (q == 1)
+           /* A common case. */
+           cy = mpn_add_n (u0, u0, u1, un);
+         else
+           cy = mpn_addmul_1 (u0, u1, un, q);
+       }
+      else
+       {
+         mp_size_t u1n;
+         mp_ptr tp;
+
+         u1n = un;
+         MPN_NORMALIZE (u1, u1n);
+
+         if (u1n == 0)
+           return;
+
+         /* Should always have u1n == un here, and u1 >= u0. The
+            reason is that we alternate adding u0 to u1 and u1 to u0
+            (corresponding to subtractions a - b and b - a), and we
+            can get a large quotient only just after a switch, which
+            means that we'll add (a multiple of) the larger u to the
+            smaller. */
+
+         tp = ctx->tp;
+
+         if (qn > u1n)
+           mpn_mul (tp, qp, qn, u1, u1n);
+         else
+           mpn_mul (tp, u1, u1n, qp, qn);
+
+         u1n += qn;
+         u1n -= tp[u1n-1] == 0;
+
+         if (u1n >= un)
+           {
+             cy = mpn_add (u0, tp, u1n, u0, un);
+             un = u1n;
+           }
+         else
+           /* Note: Unlikely case, maybe never happens? */
+           cy = mpn_add (u0, u0, un, tp, u1n);
+
+       }
+      u0[un] = cy;
+      ctx->un = un + (cy > 0);
+    }
+}
+
+/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
+   the matrix-vector multiplication adjusting a, b. If hgcd fails, we
+   need at most n for the quotient and n+1 for the u update (reusing
+   the extra u). In all, 4n + 3. */
  
  mp_size_t
  mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
@@ -41,8 +145,16 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
     * which correspond to the first column of the inverse
     *
     *   M^{-1} = (u1, -v1; -u0, v0)
+   *
+   * This implies that
+   *
+   *   a =  u1 A (mod B)
+   *   b = -u0 A (mod B)
+   *
+   * where A, B denotes the input values.
     */
  
+  struct gcdext_ctx ctx;
    mp_size_t un;
    mp_ptr u0;
    mp_ptr u1;
@@ -55,6 +167,10 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
  
    u1[0] = 1; un = 1;
  
+  ctx.gp = gp;
+  ctx.up = up;
+  ctx.usize = usize;
+
    /* FIXME: Handle n == 2 differently, after the loop? */
    while (n >= 2)
      {
@@ -96,7 +212,7 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
        /* Try an mpn_nhgcd2 step */
        if (mpn_hgcd2 (ah, al, bh, bl, &M))
         {
-         n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
+         n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
           MP_PTR_SWAP (ap, tp);
           un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
           MP_PTR_SWAP (u0, u2);
@@ -106,17 +222,18 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
           /* mpn_hgcd2 has failed. Then either one of a or b is very
              small, or the difference is very small. Perform one
              subtraction followed by one division. */
-         mp_size_t gn;
-         mp_size_t updated_un = un;
+         ctx.u0 = u0;
+         ctx.u1 = u1;
+         ctx.tp = u2;
+         ctx.un = un;
  
           /* Temporary storage n for the quotient and ualloc for the
              new cofactor. */
-         n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
-                                     u0, u1, &updated_un, tp, u2);
+         n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
           if (n == 0)
-           return gn;
+           return ctx.gn;
  
-         un = updated_un;
+         un = ctx.un;
         }
      }
    ASSERT_ALWAYS (ap[0] > 0);
diff --git a/mpn/generic/gcdext_subdiv_step.c b/mpn/generic/gcdext_subdiv_step.c

deleted file mode 100644 (file)

index 21a3a3b..0000000
--- a/mpn/generic/gcdext_subdiv_step.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/* gcdext_subdiv_step.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003, 2004, 2005, 2008, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
-   b is small, or the difference is small. Perform one subtraction
-   followed by one division. If the gcd is found, stores it in gp and
-   *gn, and returns zero. Otherwise, compute the reduced a and b,
-   return the new size, and cofactors. */
-
-/* Temporary storage: Needs n limbs for the quotient, at qp. tp must
-   point to an area large enough for the resulting cofactor, plus one
-   limb extra. All in all, 2N + 1 if N is a bound for both inputs and
-   outputs. */
-mp_size_t
-mpn_gcdext_subdiv_step (mp_ptr gp, mp_size_t *gn, mp_ptr up, mp_size_t *usizep,
-                       mp_ptr ap, mp_ptr bp, mp_size_t n,
-                       mp_ptr u0, mp_ptr u1, mp_size_t *unp,
-                       mp_ptr qp, mp_ptr tp)
-{
-  mp_size_t an, bn, un;
-  mp_size_t qn;
-  mp_size_t u0n;
-
-  int swapped;
-
-  an = bn = n;
-
-  ASSERT (an > 0);
-  ASSERT (ap[an-1] > 0 || bp[an-1] > 0);
-
-  MPN_NORMALIZE (ap, an);
-  MPN_NORMALIZE (bp, bn);
-
-  un = *unp;
-
-  swapped = 0;
-
-  if (UNLIKELY (an == 0))
-    {
-    return_b:
-      MPN_COPY (gp, bp, bn);
-      *gn = bn;
-
-      MPN_NORMALIZE (u0, un);
-      MPN_COPY (up, u0, un);
-
-      *usizep = swapped ? un : -un;
-
-      return 0;
-    }
-  else if (UNLIKELY (bn == 0))
-    {
-      MPN_COPY (gp, ap, an);
-      *gn = an;
-
-      MPN_NORMALIZE (u1, un);
-      MPN_COPY (up, u1, un);
-
-      *usizep = swapped ? -un : un;
-
-      return 0;
-    }
-
-  /* Arrange so that a > b, subtract an -= bn, and maintain
-     normalization. */
-  if (an < bn)
-    {
-      MPN_PTR_SWAP (ap, an, bp, bn);
-      MP_PTR_SWAP (u0, u1);
-      swapped ^= 1;
-    }
-  else if (an == bn)
-    {
-      int c;
-      MPN_CMP (c, ap, bp, an);
-      if (UNLIKELY (c == 0))
-       {
-         MPN_COPY (gp, ap, an);
-         *gn = an;
-
-         /* Must return the smallest cofactor, +u1 or -u0 */
-         MPN_CMP (c, u0, u1, un);
-         ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
-
-         if (c < 0)
-           {
-             MPN_NORMALIZE (u0, un);
-             MPN_COPY (up, u0, un);
-             swapped ^= 1;
-           }
-         else
-           {
-             MPN_NORMALIZE_NOT_ZERO (u1, un);
-             MPN_COPY (up, u1, un);
-           }
-
-         *usizep = swapped ? -un : un;
-         return 0;
-       }
-      else if (c < 0)
-       {
-         MP_PTR_SWAP (ap, bp);
-         MP_PTR_SWAP (u0, u1);
-         swapped ^= 1;
-       }
-    }
-  /* Reduce a -= b, u1 += u0 */
-  ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
-  MPN_NORMALIZE (ap, an);
-  ASSERT (an > 0);
-
-  u1[un] = mpn_add_n (u1, u1, u0, un);
-  un += (u1[un] > 0);
-
-  /* Arrange so that a > b, and divide a = q b + r */
-  if (an < bn)
-    {
-      MPN_PTR_SWAP (ap, an, bp, bn);
-      MP_PTR_SWAP (u0, u1);
-      swapped ^= 1;
-    }
-  else if (an == bn)
-    {
-      int c;
-      MPN_CMP (c, ap, bp, an);
-      if (UNLIKELY (c == 0))
-       goto return_b;
-      else if (c < 0)
-       {
-         MP_PTR_SWAP (ap, bp);
-         MP_PTR_SWAP (u0, u1);
-         swapped ^= 1;
-       }
-    }
-
-  /* Reduce a -= q b, u1 += q u0 */
-  qn = an - bn + 1;
-  mpn_tdiv_qr (qp, ap, 0, ap, an, bp, bn);
-
-  if (mpn_zero_p (ap, bn))
-    goto return_b;
-
-  n = bn;
-
-  /* Update u1 += q u0 */
-  u0n = un;
-  MPN_NORMALIZE (u0, u0n);
-
-  if (u0n > 0)
-    {
-      qn -= (qp[qn - 1] == 0);
-
-      if (qn > u0n)
-       mpn_mul (tp, qp, qn, u0, u0n);
-      else
-       mpn_mul (tp, u0, u0n, qp, qn);
-
-      if (qn + u0n > un)
-       {
-         mp_size_t u1n = un;
-         un = qn + u0n;
-         un -= (tp[un-1] == 0);
-         u1[un] = mpn_add (u1, tp, un, u1, u1n);
-       }
-      else
-       {
-         u1[un] = mpn_add (u1, u1, un, tp, qn + u0n);
-       }
-
-      un += (u1[un] > 0);
-    }
-
-  *unp = un;
-  return n;
-}
diff --git a/mpn/generic/get_d.c b/mpn/generic/get_d.c

index fdb6e7056241ec3c8f96a2df131673d115a9fa49..c12d17686c4c19b05b4293063b5bfe242e04dc97 100644 (file)
--- a/mpn/generic/get_d.c
+++ b/mpn/generic/get_d.c
@@ -4,7 +4,7 @@
     CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
     FUTURE GNU MP RELEASES.
  
-Copyright 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,33 +29,20 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define _GMP_IEEE_FLOATS 0
  #endif
  
-#if ! _GMP_IEEE_FLOATS
-/* dummy definition, just to let dead code compile */
-union ieee_double_extract {
-  struct {
-    int manh, manl, sig, exp;
-  } s;
-  double d;
-};
-#endif
-
  /* To force use of the generic C code for testing, put
     "#define _GMP_IEEE_FLOATS 0" at this point.  */
  
  
-
  /* In alpha gcc prior to 3.4, signed DI comparisons involving constants are
     rearranged from "x < n" to "x+(-n) < 0", which is of course hopelessly
     wrong if that addition overflows.
  
-   The workaround here avoids this bug by ensuring n is not a literal
-   constant.  Note that this is alpha specific.  The offending transformation
-   is/was in alpha.c alpha_emit_conditional_branch() under "We want to use
-   cmpcc/bcc".
+   The workaround here avoids this bug by ensuring n is not a literal constant.
+   Note that this is alpha specific.  The offending transformation is/was in
+   alpha.c alpha_emit_conditional_branch() under "We want to use cmpcc/bcc".
  
-   Bizarrely, it turns out this happens also with Cray cc on
-   alphaev5-cray-unicosmk2.0.6.X, and has the same solution.  Don't know why
-   or how.  */
+   Bizarrely, this happens also with Cray cc on alphaev5-cray-unicosmk2.0.6.X,
+   and has the same solution.  Don't know why or how.  */
  
  #if HAVE_HOST_CPU_FAMILY_alpha                         \
    && ((defined (__GNUC__) && ! __GMP_GNUC_PREREQ(3,4)) \
@@ -70,55 +57,53 @@ static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53;
  #endif
  
  
+/* Return the value {ptr,size}*2^exp, and negative if sign<0.  Must have
+   size>=1, and a non-zero high limb ptr[size-1].
  
-/* Return the value {ptr,size}*2^exp, and negative if sign<0.
-   Must have size>=1, and a non-zero high limb ptr[size-1].
-
-   {ptr,size} is truncated towards zero.  This is consistent with other gmp
-   conversions, like mpz_set_f or mpz_set_q, and is easy to implement and
-   test.
+   When we know the fp format, the result is truncated towards zero.  This is
+   consistent with other gmp conversions, like mpz_set_f or mpz_set_q, and is
+   easy to implement and test.
  
-   In the past conversions had attempted (imperfectly) to let the hardware
-   float rounding mode take effect, but that gets tricky since multiple
-   roundings need to be avoided, or taken into account, and denorms mean the
-   effective precision of the mantissa is not constant.  (For reference,
-   mpz_get_d on IEEE systems was ok, except it operated on the absolute
-   value.  mpf_get_d and mpq_get_d suffered from multiple roundings and from
-   not always using enough bits to get the rounding right.)
+   When we do not know the format, such truncation seems much harder.  One
+   would need to defeat any rounding mode, including round-up.
  
     It's felt that GMP is not primarily concerned with hardware floats, and
     really isn't enhanced by getting involved with hardware rounding modes
-   (which could even be some weird unknown style), so something unambiguous
-   and straightforward is best.
+   (which could even be some weird unknown style), so something unambiguous and
+   straightforward is best.
  
  
     The IEEE code below is the usual case, it knows either a 32-bit or 64-bit
     limb and is done with shifts and masks.  The 64-bit case in particular
     should come out nice and compact.
  
-   The generic code works one bit at a time, which will be quite slow, but
-   should support any binary-based "double" and be safe against any rounding
-   mode.  Note in particular it works on IEEE systems too.
+   The generic code used to work one bit at a time, which was not only slow,
+   but implicitly relied upon denoms for intermediates, since the lowest bits'
+   weight of a perfectly valid fp number underflows in non-denorm.  Therefore,
+   the generic code now works limb-per-limb, initially creating a number x such
+   that 1 <= x <= BASE.  (BASE is reached only as result of rounding.)  Then
+   x's exponent is scaled with explicit code (not ldexp to avoid libm
+   dependency).  It is a tap-dance to avoid underflow or overflow, beware!
  
  
     Traps:
  
-   Hardware traps for overflow to infinity, underflow to zero, or
-   unsupported denorms may or may not be taken.  The IEEE code works bitwise
-   and so probably won't trigger them, the generic code works by float
-   operations and so probably will.  This difference might be thought less
-   than ideal, but again its felt straightforward code is better than trying
-   to get intimate with hardware exceptions (of perhaps unknown nature).
+   Hardware traps for overflow to infinity, underflow to zero, or unsupported
+   denorms may or may not be taken.  The IEEE code works bitwise and so
+   probably won't trigger them, the generic code works by float operations and
+   so probably will.  This difference might be thought less than ideal, but
+   again its felt straightforward code is better than trying to get intimate
+   with hardware exceptions (of perhaps unknown nature).
  
  
     Not done:
  
-   mpz_get_d in the past handled size==1 with a cast limb->double.  This
-   might still be worthwhile there (for up to the mantissa many bits), but
-   for mpn_get_d here, the cost of applying "exp" to the resulting exponent
-   would probably use up any benefit a cast may have over bit twiddling.
-   Also, if the exponent is pushed into denorm range then bit twiddling is
-   the only option, to ensure the desired truncation is obtained.
+   mpz_get_d in the past handled size==1 with a cast limb->double.  This might
+   still be worthwhile there (for up to the mantissa many bits), but for
+   mpn_get_d here, the cost of applying "exp" to the resulting exponent would
+   probably use up any benefit a cast may have over bit twiddling.  Also, if
+   the exponent is pushed into denorm range then bit twiddling is the only
+   option, to ensure the desired truncation is obtained.
  
  
     Other:
@@ -130,9 +115,15 @@ static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53;
     cast, neither in the IEEE or generic code.  */
  
  
+
+#undef FORMAT_RECOGNIZED
+
  double
  mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
  {
+  int lshift, nbits;
+  mp_limb_t x, mhi, mlo;
+
    ASSERT (size >= 0);
    ASSERT_MPN (up, size);
    ASSERT (size == 0 || up[size-1] != 0);
@@ -144,10 +135,11 @@ mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
       overflow.  After this exp can of course be reduced to anywhere within
       the {up,size} region without underflow.  */
    if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS * size)
-               > (unsigned long) (LONG_MAX - exp)))
+               > ((unsigned long) LONG_MAX - exp)))
      {
-      if (_GMP_IEEE_FLOATS)
-       goto ieee_infinity;
+#if _GMP_IEEE_FLOATS
+      goto ieee_infinity;
+#endif
  
        /* generic */
        exp = LONG_MAX;
@@ -157,334 +149,253 @@ mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
        exp += GMP_NUMB_BITS * size;
      }
  
+#if _GMP_IEEE_FLOATS
+    {
+      union ieee_double_extract u;
  
-#if 1
-{
-  int lshift, nbits;
-  union ieee_double_extract u;
-  mp_limb_t x, mhi, mlo;
-#if GMP_LIMB_BITS == 64
-  mp_limb_t m;
-  up += size;
-  m = *--up;
-  count_leading_zeros (lshift, m);
+      up += size;
  
-  exp -= (lshift - GMP_NAIL_BITS) + 1;
-  m <<= lshift;
+#if GMP_LIMB_BITS == 64
+      mlo = up[-1];
+      count_leading_zeros (lshift, mlo);
  
-  nbits = GMP_LIMB_BITS - lshift;
+      exp -= (lshift - GMP_NAIL_BITS) + 1;
+      mlo <<= lshift;
  
-  if (nbits < 53 && size > 1)
-    {
-      x = *--up;
-      x <<= GMP_NAIL_BITS;
-      x >>= nbits;
-      m |= x;
-      nbits += GMP_NUMB_BITS;
+      nbits = GMP_LIMB_BITS - lshift;
  
-      if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
+      if (nbits < 53 && size > 1)
         {
-         x = *--up;
+         x = up[-2];
           x <<= GMP_NAIL_BITS;
           x >>= nbits;
-         m |= x;
+         mlo |= x;
           nbits += GMP_NUMB_BITS;
+
+         if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
+           {
+             x = up[-3];
+             x <<= GMP_NAIL_BITS;
+             x >>= nbits;
+             mlo |= x;
+             nbits += GMP_NUMB_BITS;
+           }
         }
-    }
-  mhi = m >> (32 + 11);
-  mlo = m >> 11;
+      mhi = mlo >> (32 + 11);
+      mlo = mlo >> 11;         /* later implicitly truncated to 32 bits */
  #endif
  #if GMP_LIMB_BITS == 32
-  up += size;
-  x = *--up, size--;
-  count_leading_zeros (lshift, x);
+      x = *--up;
+      count_leading_zeros (lshift, x);
  
-  exp -= (lshift - GMP_NAIL_BITS) + 1;
-  x <<= lshift;
-  mhi = x >> 11;
+      exp -= (lshift - GMP_NAIL_BITS) + 1;
+      x <<= lshift;
+      mhi = x >> 11;
  
-  if (lshift < 11)             /* FIXME: never true if NUMB < 20 bits */
-    {
-      /* All 20 bits in mhi */
-      mlo = x << 21;
-      /* >= 1 bit in mlo */
-      nbits = GMP_LIMB_BITS - lshift - 21;
-    }
-  else
-    {
-      if (size != 0)
+      if (lshift < 11)         /* FIXME: never true if NUMB < 20 bits */
         {
-         nbits = GMP_LIMB_BITS - lshift;
-
-         x = *--up, size--;
-         x <<= GMP_NAIL_BITS;
-         mhi |= x >> nbits >> 11;
-
-         mlo = x << GMP_LIMB_BITS - nbits - 11;
-         nbits = nbits + 11 - GMP_NAIL_BITS;
+         /* All 20 bits in mhi */
+         mlo = x << 21;
+         /* >= 1 bit in mlo */
+         nbits = GMP_LIMB_BITS - lshift - 21;
         }
        else
         {
-         mlo = 0;
-         goto done;
+         if (size > 1)
+           {
+             nbits = GMP_LIMB_BITS - lshift;
+
+             x = *--up, size--;
+             x <<= GMP_NAIL_BITS;
+             mhi |= x >> nbits >> 11;
+
+             mlo = x << GMP_LIMB_BITS - nbits - 11;
+             nbits = nbits + 11 - GMP_NAIL_BITS;
+           }
+         else
+           {
+             mlo = 0;
+             goto done;
+           }
         }
-    }
  
-  if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size != 0)
-    {
-      x = *--up, size--;
-      x <<= GMP_NAIL_BITS;
-      x >>= nbits;
-      mlo |= x;
-      nbits += GMP_NUMB_BITS;
+      /* Now all needed bits in mhi have been accumulated.  Add bits to mlo.  */
  
-      if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size != 0)
+      if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size > 1)
         {
-         x = *--up, size--;
+         x = up[-1];
           x <<= GMP_NAIL_BITS;
           x >>= nbits;
           mlo |= x;
           nbits += GMP_NUMB_BITS;
  
-         if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size != 0)
+         if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size > 2)
             {
-             x = *--up;
+             x = up[-2];
               x <<= GMP_NAIL_BITS;
               x >>= nbits;
               mlo |= x;
               nbits += GMP_NUMB_BITS;
+
+             if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size > 3)
+               {
+                 x = up[-3];
+                 x <<= GMP_NAIL_BITS;
+                 x >>= nbits;
+                 mlo |= x;
+                 nbits += GMP_NUMB_BITS;
+               }
             }
         }
-    }
  
- done:;
+    done:;
  
  #endif
-  {
-    if (UNLIKELY (exp >= CONST_1024))
-      {
-       /* overflow, return infinity */
-      ieee_infinity:
-       mhi = 0;
-       mlo = 0;
-       exp = 1024;
-      }
-    else if (UNLIKELY (exp <= CONST_NEG_1023))
-      {
-       int rshift;
-
-       if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
-         return 0.0;    /* denorm underflows to zero */
-
-       rshift = -1022 - exp;
-       ASSERT (rshift > 0 && rshift < 53);
-#if GMP_LIMB_BITS > 53
-       mlo >>= rshift;
-       mhi = mlo >> 32;
-#else
-       if (rshift >= 32)
-         {
-           mlo = mhi;
-           mhi = 0;
-           rshift -= 32;
-         }
-       lshift = GMP_LIMB_BITS - rshift;
-       mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
-       mhi >>= rshift;
-#endif
-       exp = -1023;
-      }
-  }
-  u.s.manh = mhi;
-  u.s.manl = mlo;
-  u.s.exp = exp + 1023;
-  u.s.sig = (sign < 0);
-  return u.d;
-}
-#else
-
-
-#define ONE_LIMB    (GMP_LIMB_BITS == 64 && 2*GMP_NUMB_BITS >= 53)
-#define TWO_LIMBS   (GMP_LIMB_BITS == 32 && 3*GMP_NUMB_BITS >= 53)
-
-  if (_GMP_IEEE_FLOATS && (ONE_LIMB || TWO_LIMBS))
-    {
-      union ieee_double_extract         u;
-      mp_limb_t         m0, m1, m2, rmask;
-      int       lshift, rshift;
-
-      m0 = up[size-1];                     /* high limb */
-      m1 = (size >= 2 ? up[size-2] : 0);   /* second highest limb */
-      count_leading_zeros (lshift, m0);
-
-      /* relative to just under high non-zero bit */
-      exp -= (lshift - GMP_NAIL_BITS) + 1;
-
-      if (ONE_LIMB)
-       {
-         /* lshift to have high of m0 non-zero, and collapse nails */
-         rshift = GMP_LIMB_BITS - lshift;
-         m1 <<= GMP_NAIL_BITS;
-         rmask = GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX;
-         m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
-
-         /* rshift back to have bit 53 of m0 the high non-zero */
-         m0 >>= 11;
-       }
-      else /* TWO_LIMBS */
-       {
-         m2 = (size >= 3 ? up[size-3] : 0);  /* third highest limb */
-
-         /* collapse nails from m1 and m2 */
-#if GMP_NAIL_BITS != 0
-         m1 = (m1 << GMP_NAIL_BITS) | (m2 >> (GMP_NUMB_BITS-GMP_NAIL_BITS));
-         m2 <<= 2*GMP_NAIL_BITS;
-#endif
-
-         /* lshift to have high of m0:m1 non-zero, collapse nails from m0 */
-         rshift = GMP_LIMB_BITS - lshift;
-         rmask = (GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX);
-         m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
-         m1 = (m1 << lshift) | ((m2 >> rshift) & rmask);
-
-         /* rshift back to have bit 53 of m0:m1 the high non-zero */
-         m1 = (m1 >> 11) | (m0 << (GMP_LIMB_BITS-11));
-         m0 >>= 11;
-       }
-
        if (UNLIKELY (exp >= CONST_1024))
         {
           /* overflow, return infinity */
         ieee_infinity:
-         m0 = 0;
-         m1 = 0;
+         mhi = 0;
+         mlo = 0;
           exp = 1024;
         }
        else if (UNLIKELY (exp <= CONST_NEG_1023))
         {
+         int rshift;
+
           if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
             return 0.0;  /* denorm underflows to zero */
  
           rshift = -1022 - exp;
           ASSERT (rshift > 0 && rshift < 53);
-         if (ONE_LIMB)
-           {
-             m0 >>= rshift;
-           }
-         else /* TWO_LIMBS */
+#if GMP_LIMB_BITS > 53
+         mlo >>= rshift;
+         mhi = mlo >> 32;
+#else
+         if (rshift >= 32)
             {
-             if (rshift >= 32)
-               {
-                 m1 = m0;
-                 m0 = 0;
-                 rshift -= 32;
-               }
-             lshift = GMP_LIMB_BITS - rshift;
-             m1 = (m1 >> rshift) | (rshift == 0 ? 0 : m0 << lshift);
-             m0 >>= rshift;
+             mlo = mhi;
+             mhi = 0;
+             rshift -= 32;
             }
-         exp = -1023;
-       }
-
-      if (ONE_LIMB)
-       {
-#if GMP_LIMB_BITS > 32 /* avoid compiler warning about big shift */
-         u.s.manh = m0 >> 32;
+         lshift = GMP_LIMB_BITS - rshift;
+         mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
+         mhi >>= rshift;
  #endif
-         u.s.manl = m0;
-       }
-      else /* TWO_LIMBS */
-       {
-         u.s.manh = m0;
-         u.s.manl = m1;
+         exp = -1023;
         }
-
+      u.s.manh = mhi;
+      u.s.manl = mlo;
        u.s.exp = exp + 1023;
        u.s.sig = (sign < 0);
        return u.d;
      }
-  else
+#define FORMAT_RECOGNIZED 1
+#endif
+
+#if HAVE_DOUBLE_VAX_D
      {
-      /* Non-IEEE or strange limb size, do something generic. */
-
-      mp_size_t             i;
-      mp_limb_t             limb, bit;
-      int           shift;
-      double        base, factor, prev_factor, d, new_d, diff;
-
-      /* "limb" is "up[i]" the limb being examined, "bit" is a mask for the
-        bit being examined, initially the highest non-zero bit.  */
-      i = size-1;
-      limb = up[i];
-      count_leading_zeros (shift, limb);
-      bit = GMP_LIMB_HIGHBIT >> shift;
-
-      /* relative to just under high non-zero bit */
-      exp -= (shift - GMP_NAIL_BITS) + 1;
-
-      /* Power up "factor" to 2^exp, being the value of the "bit" in "limb"
-        being examined.  */
-      base = (exp >= 0 ? 2.0 : 0.5);
-      exp = ABS (exp);
-      factor = 1.0;
-      for (;;)
+      union double_extract u;
+
+      up += size;
+
+      mhi = up[-1];
+
+      count_leading_zeros (lshift, mhi);
+      exp -= lshift;
+      mhi <<= lshift;
+
+      mlo = 0;
+      if (size > 1)
         {
-         if (exp & 1)
+         mlo = up[-2];
+         if (lshift != 0)
+           mhi += mlo >> (GMP_LIMB_BITS - lshift);
+         mlo <<= lshift;
+
+         if (size > 2 && lshift > 8)
             {
-             prev_factor = factor;
-             factor *= base;
-             FORCE_DOUBLE (factor);
-             if (factor == 0.0)
-               return 0.0;     /* underflow */
-             if (factor == prev_factor)
-               {
-                 d = factor;     /* overflow, apparent infinity */
-                 goto generic_done;
-               }
+             x = up[-3];
+             mlo += x >> (GMP_LIMB_BITS - lshift);
             }
-         exp >>= 1;
-         if (exp == 0)
-           break;
-         base *= base;
         }
  
-      /* Add a "factor" for each non-zero bit, working from high to low.
-        Stop if any rounding occurs, hence implementing a truncation.
+      if (UNLIKELY (exp >= 128))
+       {
+         /* overflow, return maximum number */
+         mhi = 0xffffffff;
+         mlo = 0xffffffff;
+         exp = 127;
+       }
+      else if (UNLIKELY (exp < -128))
+       {
+         return 0.0;    /* underflows to zero */
+       }
  
-        Note no attention is paid to DBL_MANT_DIG, since the effective
-        number of bits in the mantissa isn't constant when in denorm range.
-        We also encountered an ARM system with apparently somewhat doubtful
-        software floats where DBL_MANT_DIG claimed 53 bits but only 32
-        actually worked.  */
+      u.s.man3 = mhi >> 24;    /* drop msb, since implicit */
+      u.s.man2 = mhi >> 8;
+      u.s.man1 = (mhi << 8) + (mlo >> 24);
+      u.s.man0 = mlo >> 8;
+      u.s.exp = exp + 128;
+      u.s.sig = sign < 0;
+      return u.d;
+    }
+#define FORMAT_RECOGNIZED 1
+#endif
  
-      d = factor;  /* high bit */
-      for (;;)
+#if ! FORMAT_RECOGNIZED
+    {      /* Non-IEEE or strange limb size, do something generic. */
+      mp_size_t i;
+      double d, weight;
+      unsigned long uexp;
+
+      /* First generate an fp number disregarding exp, instead keeping things
+        within the numb base factor from 1, which should prevent overflow and
+        underflow even for the most exponent limited fp formats.  The
+        termination criteria should be refined, since we now include too many
+        limbs.  */
+      weight = 1/MP_BASE_AS_DOUBLE;
+      d = up[size - 1];
+      for (i = size - 2; i >= 0; i--)
         {
-         factor *= 0.5;  /* next bit */
-         bit >>= 1;
-         if (bit == 0)
-           {
-             /* next limb, if any */
-             i--;
-             if (i < 0)
-               break;
-             limb = up[i];
-             bit = GMP_NUMB_HIGHBIT;
-           }
+         d += up[i] * weight;
+         weight /= MP_BASE_AS_DOUBLE;
+         if (weight == 0)
+           break;
+       }
  
-         if (bit & limb)
-           {
-             new_d = d + factor;
-             FORCE_DOUBLE (new_d);
-             diff = new_d - d;
-             if (diff != factor)
-               break;   /* rounding occured, stop now */
-             d = new_d;
-           }
+      /* Now apply exp.  */
+      exp -= GMP_NUMB_BITS;
+      if (exp > 0)
+       {
+         weight = 2.0;
+         uexp = exp;
+       }
+      else
+       {
+         weight = 0.5;
+         uexp = 1 - (unsigned long) (exp + 1);
         }
+#if 1
+      /* Square-and-multiply exponentiation.  */
+      if (uexp & 1)
+       d *= weight;
+      while (uexp >>= 1)
+       {
+         weight *= weight;
+         if (uexp & 1)
+           d *= weight;
+       }
+#else
+      /* Plain exponentiation.  */
+      while (uexp > 0)
+       {
+         d *= weight;
+         uexp--;
+       }
+#endif
  
-    generic_done:
-      return (sign >= 0 ? d : -d);
+      return sign >= 0 ? d : -d;
      }
  #endif
  }
diff --git a/mpn/generic/get_str.c b/mpn/generic/get_str.c

index ac4fb52a95063d7b7804ab5e8a4ef9128a65ad68..e17497cb0e7d2db49e623bdb7195b7679b4b4ab1 100644 (file)
--- a/mpn/generic/get_str.c
+++ b/mpn/generic/get_str.c
@@ -8,7 +8,7 @@
     GNU MP RELEASE.
  
  Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
-2008 Free Software Foundation, Inc.
+2008, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -350,7 +350,8 @@ mpn_dc_get_str (unsigned char *str, size_t len,
  
  \f
  /* There are no leading zeros on the digits generated at str, but that's not
-   currently a documented feature.  */
+   currently a documented feature.  The current mpz_out_str and mpz_get_str
+   rely on it.  */
  
  size_t
  mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
@@ -439,9 +440,12 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
      mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
      mp_limb_t cy;
      mp_size_t shift;
+    size_t ndig;
+
+    DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
+    xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
  
      n_pows = 0;
-    xn = 1 + un*(mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/mp_bases[base].chars_per_limb;
      for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
        {
         exptab[n_pows] = pn;
diff --git a/mpn/generic/hgcd.c b/mpn/generic/hgcd.c

index 709f880eb22d6fa3f5090962a08c938dba4edcce..f51bbde81c2f330e8ca90365d92cb7056812a35f 100644 (file)
--- a/mpn/generic/hgcd.c
+++ b/mpn/generic/hgcd.c
@@ -4,7 +4,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,482 +25,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "longlong.h"
  
-/* For input of size n, matrix elements are of size at most ceil(n/2)
-   - 1, but we need two limbs extra. */
-void
-mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
-{
-  mp_size_t s = (n+1)/2 + 1;
-  M->alloc = s;
-  M->n = 1;
-  MPN_ZERO (p, 4 * s);
-  M->p[0][0] = p;
-  M->p[0][1] = p + s;
-  M->p[1][0] = p + 2 * s;
-  M->p[1][1] = p + 3 * s;
-
-  M->p[0][0][0] = M->p[1][1][0] = 1;
-}
-
-/* Updated column COL, adding in column (1-COL). */
-static void
-hgcd_matrix_update_1 (struct hgcd_matrix *M, unsigned col)
-{
-  mp_limb_t c0, c1;
-  ASSERT (col < 2);
-
-  c0 = mpn_add_n (M->p[0][col], M->p[0][0], M->p[0][1], M->n);
-  c1 = mpn_add_n (M->p[1][col], M->p[1][0], M->p[1][1], M->n);
-
-  M->p[0][col][M->n] = c0;
-  M->p[1][col][M->n] = c1;
-
-  M->n += (c0 | c1) != 0;
-  ASSERT (M->n < M->alloc);
-}
-
-/* Updated column COL, adding in column Q * (1-COL). Temporary
- * storage: qn + n <= M->alloc, where n is the size of the largest
- * element in column 1 - COL. */
-static void
-hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
-                     unsigned col, mp_ptr tp)
-{
-  ASSERT (col < 2);
-
-  if (qn == 1)
-    {
-      mp_limb_t q = qp[0];
-      mp_limb_t c0, c1;
-
-      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
-      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
-
-      M->p[0][col][M->n] = c0;
-      M->p[1][col][M->n] = c1;
-
-      M->n += (c0 | c1) != 0;
-    }
-  else
-    {
-      unsigned row;
-
-      /* Carries for the unlikely case that we get both high words
-        from the multiplication and carries from the addition. */
-      mp_limb_t c[2];
-      mp_size_t n;
-
-      /* The matrix will not necessarily grow in size by qn, so we
-        need normalization in order not to overflow M. */
-
-      for (n = M->n; n + qn > M->n; n--)
-       {
-         ASSERT (n > 0);
-         if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
-           break;
-       }
-
-      ASSERT (qn + n <= M->alloc);
-
-      for (row = 0; row < 2; row++)
-       {
-         if (qn <= n)
-           mpn_mul (tp, M->p[row][1-col], n, qp, qn);
-         else
-           mpn_mul (tp, qp, qn, M->p[row][1-col], n);
-
-         ASSERT (n + qn >= M->n);
-         c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
-       }
-      if (c[0] | c[1])
-       {
-         M->n = n + qn + 1;
-         M->p[0][col][M->n - 1] = c[0];
-         M->p[1][col][M->n - 1] = c[1];
-       }
-      else
-       {
-         n += qn;
-         n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
-         if (n > M->n)
-           M->n = n;
-       }
-    }
-
-  ASSERT (M->n < M->alloc);
-}
-
-/* Multiply M by M1 from the right. Since the M1 elements fit in
-   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
-   temporary space M->n */
-static void
-hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
-                  mp_ptr tp)
-{
-  mp_size_t n0, n1;
-
-  /* Could avoid copy by some swapping of pointers. */
-  MPN_COPY (tp, M->p[0][0], M->n);
-  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
-  MPN_COPY (tp, M->p[1][0], M->n);
-  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
-
-  /* Depends on zero initialization */
-  M->n = MAX(n0, n1);
-  ASSERT (M->n < M->alloc);
-}
-
-/* Perform a few steps, using some of mpn_hgcd2, subtraction and
-   division. Reduces the size by almost one limb or more, but never
-   below the given size s. Return new size for a and b, or 0 if no
-   more steps are possible.
-
-   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
-   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
-   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
-   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
-   resulting size of $.
-
-   If N is the input size to the calling hgcd, then s = floor(N/2) +
-   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
-   < N, so N is sufficient.
-*/
-
-static mp_size_t
-hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
-          struct hgcd_matrix *M, mp_ptr tp)
-{
-  struct hgcd_matrix1 M1;
-  mp_limb_t mask;
-  mp_limb_t ah, al, bh, bl;
-  mp_size_t an, bn, qn;
-  int col;
-
-  ASSERT (n > s);
-
-  mask = ap[n-1] | bp[n-1];
-  ASSERT (mask > 0);
-
-  if (n == s + 1)
-    {
-      if (mask < 4)
-       goto subtract;
-
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else if (mask & GMP_NUMB_HIGHBIT)
-    {
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else
-    {
-      int shift;
-
-      count_leading_zeros (shift, mask);
-      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
-      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
-      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
-      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
-    }
-
-  /* Try an mpn_hgcd2 step */
-  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
-    {
-      /* Multiply M <- M * M1 */
-      hgcd_matrix_mul_1 (M, &M1, tp);
-
-      /* Can't swap inputs, so we need to copy. */
-      MPN_COPY (tp, ap, n);
-      /* Multiply M1^{-1} (a;b) */
-      return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n);
-    }
-
- subtract:
-  /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and
-     bh was too small, or ah, bh were (almost) equal. Perform one
-     subtraction step (for possible cancellation of high limbs),
-     followed by one division. */
-
-  /* Since we must ensure that #(a-b) > s, we handle cancellation of
-     high limbs explicitly up front. (FIXME: Or is it better to just
-     subtract, normalize, and use an addition to undo if it turns out
-     the the difference is too small?) */
-  for (an = n; an > s; an--)
-    if (ap[an-1] != bp[an-1])
-      break;
-
-  if (an == s)
-    return 0;
-
-  /* Maintain a > b. When needed, swap a and b, and let col keep track
-     of how to update M. */
-  if (ap[an-1] > bp[an-1])
-    {
-      /* a is largest. In the subtraction step, we need to update
-        column 1 of M */
-      col = 1;
-    }
-  else
-    {
-      MP_PTR_SWAP (ap, bp);
-      col = 0;
-    }
-
-  bn = n;
-  MPN_NORMALIZE (bp, bn);
-  if (bn <= s)
-    return 0;
-
-  /* We have #a, #b > s. When is it possible that #(a-b) < s? For
-     cancellation to happen, the numbers must be of the form
-
-       a = x + 1, 0,            ..., 0,            al
-       b = x    , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl
-
-     where al, bl denotes the least significant k limbs. If al < bl,
-     then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX,
-     then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */
-
-  if (ap[an-1] == bp[an-1] + 1)
-    {
-      mp_size_t k;
-      int c;
-      for (k = an-1; k > s; k--)
-       if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX)
-         break;
-
-      MPN_CMP (c, ap, bp, k);
-      if (c < 0)
-       {
-         mp_limb_t cy;
-
-         /* The limbs from k and up are cancelled. */
-         if (k == s)
-           return 0;
-         cy = mpn_sub_n (ap, ap, bp, k);
-         ASSERT (cy == 1);
-         an = k;
-       }
-      else
-       {
-         ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k));
-         ap[k] = 1;
-         an = k + 1;
-       }
-    }
-  else
-    ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an));
-
-  ASSERT (an > s);
-  ASSERT (ap[an-1] > 0);
-  ASSERT (bn > s);
-  ASSERT (bp[bn-1] > 0);
-
-  hgcd_matrix_update_1 (M, col);
-
-  if (an < bn)
-    {
-      MPN_PTR_SWAP (ap, an, bp, bn);
-      col ^= 1;
-    }
-  else if (an == bn)
-    {
-      int c;
-      MPN_CMP (c, ap, bp, an);
-      if (c < 0)
-       {
-         MP_PTR_SWAP (ap, bp);
-         col ^= 1;
-       }
-    }
-
-  /* Divide a / b. */
-  qn = an + 1 - bn;
-
-  /* FIXME: We could use an approximate division, that may return a
-     too small quotient, and only guarantee that the size of r is
-     almost the size of b. FIXME: Let ap and remainder overlap. */
-  mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
-  qn -= (tp[qn -1] == 0);
-
-  /* Normalize remainder */
-  an = bn;
-  for ( ; an > s; an--)
-    if (ap[an-1] > 0)
-      break;
-
-  if (an <= s)
-    {
-      /* Quotient is too large */
-      mp_limb_t cy;
-
-      cy = mpn_add (ap, bp, bn, ap, an);
-
-      if (cy > 0)
-       {
-         ASSERT (bn < n);
-         ap[bn] = cy;
-         bp[bn] = 0;
-         bn++;
-       }
-
-      MPN_DECR_U (tp, qn, 1);
-      qn -= (tp[qn-1] == 0);
-    }
-
-  if (qn > 0)
-    hgcd_matrix_update_q (M, tp, qn, col, tp + qn);
-
-  return bn;
-}
-
-/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
-   with elements of size at most (n+1)/2 - 1. Returns new size of a,
-   b, or zero if no reduction is possible. */
-mp_size_t
-mpn_hgcd_lehmer (mp_ptr ap, mp_ptr bp, mp_size_t n,
-                struct hgcd_matrix *M, mp_ptr tp)
-{
-  mp_size_t s = n/2 + 1;
-  mp_size_t nn;
-
-  ASSERT (n > s);
-  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
-
-  nn = hgcd_step (n, ap, bp, s, M, tp);
-  if (!nn)
-    return 0;
-
-  for (;;)
-    {
-      n = nn;
-      ASSERT (n > s);
-      nn = hgcd_step (n, ap, bp, s, M, tp);
-      if (!nn )
-       return n;
-    }
-}
-
-/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
-   of temporary storage (see mpn_matrix22_mul_itch). */
-void
-mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
-                    mp_ptr tp)
-{
-  mp_size_t n;
-
-  /* About the new size of M:s elements. Since M1's diagonal elements
-     are > 0, no element can decrease. The new elements are of size
-     M->n + M1->n, one limb more or less. The computation of the
-     matrix product produces elements of size M->n + M1->n + 1. But
-     the true size, after normalization, may be three limbs smaller.
-
-     The reason that the product has normalized size >= M->n + M1->n -
-     2 is subtle. It depends on the fact that M and M1 can be factored
-     as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
-     M ending with a large power and M1 starting with a large power of
-     the same matrix. */
-
-  /* FIXME: Strassen multiplication gives only a small speedup. In FFT
-     multiplication range, this function could be sped up quite a lot
-     using invariance. */
-  ASSERT (M->n + M1->n < M->alloc);
-
-  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
-          | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
-
-  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
-          | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
-
-  mpn_matrix22_mul (M->p[0][0], M->p[0][1],
-                   M->p[1][0], M->p[1][1], M->n,
-                   M1->p[0][0], M1->p[0][1],
-                   M1->p[1][0], M1->p[1][1], M1->n, tp);
-
-  /* Index of last potentially non-zero limb, size is one greater. */
-  n = M->n + M1->n;
-
-  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-
-  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
-
-  M->n = n + 1;
-}
-
-/* Multiplies the least significant p limbs of (a;b) by M^-1.
-   Temporary space needed: 2 * (p + M->n)*/
-mp_size_t
-mpn_hgcd_matrix_adjust (struct hgcd_matrix *M,
-                       mp_size_t n, mp_ptr ap, mp_ptr bp,
-                       mp_size_t p, mp_ptr tp)
-{
-  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
-     = (r11 a - r01 b; - r10 a + r00 b */
-
-  mp_ptr t0 = tp;
-  mp_ptr t1 = tp + p + M->n;
-  mp_limb_t ah, bh;
-  mp_limb_t cy;
-
-  ASSERT (p + M->n  < n);
-
-  /* First compute the two values depending on a, before overwriting a */
-
-  if (M->n >= p)
-    {
-      mpn_mul (t0, M->p[1][1], M->n, ap, p);
-      mpn_mul (t1, M->p[1][0], M->n, ap, p);
-    }
-  else
-    {
-      mpn_mul (t0, ap, p, M->p[1][1], M->n);
-      mpn_mul (t1, ap, p, M->p[1][0], M->n);
-    }
-
-  /* Update a */
-  MPN_COPY (ap, t0, p);
-  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
-
-  if (M->n >= p)
-    mpn_mul (t0, M->p[0][1], M->n, bp, p);
-  else
-    mpn_mul (t0, bp, p, M->p[0][1], M->n);
-
-  cy = mpn_sub (ap, ap, n, t0, p + M->n);
-  ASSERT (cy <= ah);
-  ah -= cy;
-
-  /* Update b */
-  if (M->n >= p)
-    mpn_mul (t0, M->p[0][0], M->n, bp, p);
-  else
-    mpn_mul (t0, bp, p, M->p[0][0], M->n);
-
-  MPN_COPY (bp, t0, p);
-  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
-  cy = mpn_sub (bp, bp, n, t1, p + M->n);
-  ASSERT (cy <= bh);
-  bh -= cy;
-
-  if (ah > 0 || bh > 0)
-    {
-      ap[n] = ah;
-      bp[n] = bh;
-      n++;
-    }
-  else
-    {
-      /* The subtraction can reduce the size by at most one limb. */
-      if (ap[n-1] == 0 && bp[n-1] == 0)
-       n--;
-    }
-  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
-  return n;
-}
  
  /* Size analysis for hgcd:
  
@@ -530,15 +54,14 @@ mpn_hgcd_itch (mp_size_t n)
    mp_size_t nscaled;
  
    if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
-    return MPN_HGCD_LEHMER_ITCH (n);
+    return n;
  
    /* Get the recursion depth. */
    nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
    count_leading_zeros (count, nscaled);
    k = GMP_LIMB_BITS - count;
  
-  return 20 * ((n+3) / 4) + 22 * k
-    + MPN_HGCD_LEHMER_ITCH (HGCD_THRESHOLD);
+  return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
  }
  
  /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
@@ -550,9 +73,8 @@ mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
           struct hgcd_matrix *M, mp_ptr tp)
  {
    mp_size_t s = n/2 + 1;
-  mp_size_t n2 = (3*n)/4 + 1;
  
-  mp_size_t p, nn;
+  mp_size_t nn;
    int success = 0;
  
    if (n <= s)
@@ -564,76 +86,83 @@ mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
  
    ASSERT ((n+1)/2 - 1 < M->alloc);
  
-  if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
-    return mpn_hgcd_lehmer (ap, bp, n, M, tp);
-
-  p = n/2;
-  nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
-  if (nn > 0)
-    {
-      /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
-        = 2 (n - 1) */
-      n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
-      success = 1;
-    }
-  while (n > n2)
+  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
      {
-      /* Needs n + 1 storage */
-      nn = hgcd_step (n, ap, bp, s, M, tp);
-      if (!nn)
-       return success ? n : 0;
-      n = nn;
-      success = 1;
-    }
+      mp_size_t n2 = (3*n)/4 + 1;
+      mp_size_t p = n/2;
  
-  if (n > s + 2)
-    {
-      struct hgcd_matrix M1;
-      mp_size_t scratch;
+      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+      if (nn)
+       {
+         n = nn;
+         success = 1;
+       }
+
+      /* NOTE: It apppears this loop never runs more than once (at
+        least when not recursing to hgcd_appr). */
+      while (n > n2)
+       {
+         /* Needs n + 1 storage */
+         nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+         if (!nn)
+           return success ? n : 0;
  
-      p = 2*s - n + 1;
-      scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+         n = nn;
+         success = 1;
+       }
  
-      mpn_hgcd_matrix_init(&M1, n - p, tp);
-      nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
-      if (nn > 0)
+      if (n > s + 2)
         {
-         /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
-         ASSERT (M->n + 2 >= M1.n);
+         struct hgcd_matrix M1;
+         mp_size_t scratch;
  
-         /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
-            then either q or q + 1 is a correct quotient, and M1 will
-            start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
-            rules out the case that the size of M * M1 is much
-            smaller than the expected M->n + M1->n. */
+         p = 2*s - n + 1;
+         scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
  
-         ASSERT (M->n + M1.n < M->alloc);
+         mpn_hgcd_matrix_init(&M1, n - p, tp);
  
-         /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
-            = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
-         n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+         /* FIXME: Should use hgcd_reduce, but that may require more
+            scratch space, which requires review. */
  
-         /* We need a bound for of M->n + M1.n. Let n be the original
-            input size. Then
+         nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
+         if (nn > 0)
+           {
+             /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+             ASSERT (M->n + 2 >= M1.n);
  
-              ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+             /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+                then either q or q + 1 is a correct quotient, and M1 will
+                start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+                rules out the case that the size of M * M1 is much
+                smaller than the expected M->n + M1->n. */
  
-            and it follows that
+             ASSERT (M->n + M1.n < M->alloc);
  
-              M.n + M1.n <= ceil(n/2) + 1
+             /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+                = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+             n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
  
-            Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
-            amount of needed scratch space. */
-         mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
-         success = 1;
+             /* We need a bound for of M->n + M1.n. Let n be the original
+                input size. Then
+
+                ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+                and it follows that
+
+                M.n + M1.n <= ceil(n/2) + 1
+
+                Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+                amount of needed scratch space. */
+             mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+             success = 1;
+           }
         }
      }
  
-  /* This really is the base case */
    for (;;)
      {
        /* Needs s+3 < n */
-      nn = hgcd_step (n, ap, bp, s, M, tp);
+      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
        if (!nn)
         return success ? n : 0;
  
diff --git a/mpn/generic/hgcd2.c b/mpn/generic/hgcd2.c

index ffc8c44f674756e6b978ab2f71defd7a4f0fd666..08641cc527abdf686b0bd4c6dc97b0760edcaa23 100644 (file)
--- a/mpn/generic/hgcd2.c
+++ b/mpn/generic/hgcd2.c
@@ -4,7 +4,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008 Free Software
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -199,7 +199,7 @@ div2 (mp_ptr rp,
  
  /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
     matrix M. Returns 1 if we make progress, i.e. can perform at least
-   one subtraction. Otherwise returns zero.. */
+   one subtraction. Otherwise returns zero. */
  
  /* FIXME: Possible optimizations:
  
@@ -338,8 +338,6 @@ mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
    for (;;)
      {
        ASSERT (ah >= bh);
-      if (ah == bh)
-       break;
  
        ah -= bh;
        if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
@@ -369,8 +367,6 @@ mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
         }
      subtract_a1:
        ASSERT (bh >= ah);
-      if (ah == bh)
-       break;
  
        bh -= ah;
        if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
@@ -439,31 +435,3 @@ mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
    n += (ah | bh) > 0;
    return n;
  }
-
-/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
-   the left. Uses three buffers, to avoid a copy. */
-mp_size_t
-mpn_hgcd_mul_matrix1_inverse_vector (const struct hgcd_matrix1 *M,
-                                    mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
-{
-  mp_limb_t h0, h1;
-
-  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
-
-     r  = u11 * a
-     r -= u01 * b
-     b *= u00
-     b -= u10 * a
-  */
-
-  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
-  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
-  ASSERT (h0 == h1);
-
-  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
-  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
-  ASSERT (h0 == h1);
-
-  n -= (rp[n-1] | bp[n-1]) == 0;
-  return n;
-}
diff --git a/mpn/generic/hgcd2_jacobi.c b/mpn/generic/hgcd2_jacobi.c

new file mode 100644 (file)

index 0000000..c888deb
--- /dev/null
+++ b/mpn/generic/hgcd2_jacobi.c
@@ -0,0 +1,356 @@
+/* hgcd2_jacobi.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nails not supported.
+#endif
+
+/* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and
+   possibly be renamed. */
+static inline mp_limb_t
+div1 (mp_ptr rp,
+      mp_limb_t n0,
+      mp_limb_t d0)
+{
+  mp_limb_t q = 0;
+
+  if ((mp_limb_signed_t) n0 < 0)
+    {
+      int cnt;
+      for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
+       {
+         d0 = d0 << 1;
+       }
+
+      q = 0;
+      while (cnt)
+       {
+         q <<= 1;
+         if (n0 >= d0)
+           {
+             n0 = n0 - d0;
+             q |= 1;
+           }
+         d0 = d0 >> 1;
+         cnt--;
+       }
+    }
+  else
+    {
+      int cnt;
+      for (cnt = 0; n0 >= d0; cnt++)
+       {
+         d0 = d0 << 1;
+       }
+
+      q = 0;
+      while (cnt)
+       {
+         d0 = d0 >> 1;
+         q <<= 1;
+         if (n0 >= d0)
+           {
+             n0 = n0 - d0;
+             q |= 1;
+           }
+         cnt--;
+       }
+    }
+  *rp = n0;
+  return q;
+}
+
+/* Two-limb division optimized for small quotients.  */
+static inline mp_limb_t
+div2 (mp_ptr rp,
+      mp_limb_t nh, mp_limb_t nl,
+      mp_limb_t dh, mp_limb_t dl)
+{
+  mp_limb_t q = 0;
+
+  if ((mp_limb_signed_t) nh < 0)
+    {
+      int cnt;
+      for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
+       {
+         dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+         dl = dl << 1;
+       }
+
+      while (cnt)
+       {
+         q <<= 1;
+         if (nh > dh || (nh == dh && nl >= dl))
+           {
+             sub_ddmmss (nh, nl, nh, nl, dh, dl);
+             q |= 1;
+           }
+         dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+         dh = dh >> 1;
+         cnt--;
+       }
+    }
+  else
+    {
+      int cnt;
+      for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
+       {
+         dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+         dl = dl << 1;
+       }
+
+      while (cnt)
+       {
+         dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+         dh = dh >> 1;
+         q <<= 1;
+         if (nh > dh || (nh == dh && nl >= dl))
+           {
+             sub_ddmmss (nh, nl, nh, nl, dh, dl);
+             q |= 1;
+           }
+         cnt--;
+       }
+    }
+
+  rp[0] = nl;
+  rp[1] = nh;
+
+  return q;
+}
+
+int
+mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+                 struct hgcd_matrix1 *M, unsigned *bitsp)
+{
+  mp_limb_t u00, u01, u10, u11;
+  unsigned bits = *bitsp;
+
+  if (ah < 2 || bh < 2)
+    return 0;
+
+  if (ah > bh || (ah == bh && al > bl))
+    {
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+      if (ah < 2)
+       return 0;
+
+      u00 = u01 = u11 = 1;
+      u10 = 0;
+      bits = mpn_jacobi_update (bits, 1, 1);
+    }
+  else
+    {
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+      if (bh < 2)
+       return 0;
+
+      u00 = u10 = u11 = 1;
+      u01 = 0;
+      bits = mpn_jacobi_update (bits, 0, 1);
+    }
+
+  if (ah < bh)
+    goto subtract_a;
+
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+      if (ah == bh)
+       goto done;
+
+      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+       {
+         ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+         bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+         break;
+       }
+
+      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
+        1), affecting the second column of M. */
+      ASSERT (ah > bh);
+      sub_ddmmss (ah, al, ah, al, bh, bl);
+
+      if (ah < 2)
+       goto done;
+
+      if (ah <= bh)
+       {
+         /* Use q = 1 */
+         u01 += u00;
+         u11 += u10;
+         bits = mpn_jacobi_update (bits, 1, 1);
+       }
+      else
+       {
+         mp_limb_t r[2];
+         mp_limb_t q = div2 (r, ah, al, bh, bl);
+         al = r[0]; ah = r[1];
+         if (ah < 2)
+           {
+             /* A is too small, but q is correct. */
+             u01 += q * u00;
+             u11 += q * u10;
+             bits = mpn_jacobi_update (bits, 1, q & 3);
+             goto done;
+           }
+         q++;
+         u01 += q * u00;
+         u11 += q * u10;
+         bits = mpn_jacobi_update (bits, 1, q & 3);
+       }
+    subtract_a:
+      ASSERT (bh >= ah);
+      if (ah == bh)
+       goto done;
+
+      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+       {
+         ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+         bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+         goto subtract_a1;
+       }
+
+      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
+        1), affecting the first column of M. */
+      sub_ddmmss (bh, bl, bh, bl, ah, al);
+
+      if (bh < 2)
+       goto done;
+
+      if (bh <= ah)
+       {
+         /* Use q = 1 */
+         u00 += u01;
+         u10 += u11;
+         bits = mpn_jacobi_update (bits, 0, 1);
+       }
+      else
+       {
+         mp_limb_t r[2];
+         mp_limb_t q = div2 (r, bh, bl, ah, al);
+         bl = r[0]; bh = r[1];
+         if (bh < 2)
+           {
+             /* B is too small, but q is correct. */
+             u00 += q * u01;
+             u10 += q * u11;
+             bits = mpn_jacobi_update (bits, 0, q & 3);
+             goto done;
+           }
+         q++;
+         u00 += q * u01;
+         u10 += q * u11;
+         bits = mpn_jacobi_update (bits, 0, q & 3);
+       }
+    }
+
+  /* NOTE: Since we discard the least significant half limb, we don't
+     get a truly maximal M (corresponding to |a - b| <
+     2^{GMP_LIMB_BITS +1}). */
+  /* Single precision loop */
+  for (;;)
+    {
+      ASSERT (ah >= bh);
+      if (ah == bh)
+       break;
+
+      ah -= bh;
+      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+       break;
+
+      if (ah <= bh)
+       {
+         /* Use q = 1 */
+         u01 += u00;
+         u11 += u10;
+         bits = mpn_jacobi_update (bits, 1, 1);
+       }
+      else
+       {
+         mp_limb_t r;
+         mp_limb_t q = div1 (&r, ah, bh);
+         ah = r;
+         if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+           {
+             /* A is too small, but q is correct. */
+             u01 += q * u00;
+             u11 += q * u10;
+             bits = mpn_jacobi_update (bits, 1, q & 3);
+             break;
+           }
+         q++;
+         u01 += q * u00;
+         u11 += q * u10;
+         bits = mpn_jacobi_update (bits, 1, q & 3);
+       }
+    subtract_a1:
+      ASSERT (bh >= ah);
+      if (ah == bh)
+       break;
+
+      bh -= ah;
+      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+       break;
+
+      if (bh <= ah)
+       {
+         /* Use q = 1 */
+         u00 += u01;
+         u10 += u11;
+         bits = mpn_jacobi_update (bits, 0, 1);
+       }
+      else
+       {
+         mp_limb_t r;
+         mp_limb_t q = div1 (&r, bh, ah);
+         bh = r;
+         if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+           {
+             /* B is too small, but q is correct. */
+             u00 += q * u01;
+             u10 += q * u11;
+             bits = mpn_jacobi_update (bits, 0, q & 3);
+             break;
+           }
+         q++;
+         u00 += q * u01;
+         u10 += q * u11;
+         bits = mpn_jacobi_update (bits, 0, q & 3);
+       }
+    }
+
+ done:
+  M->u[0][0] = u00; M->u[0][1] = u01;
+  M->u[1][0] = u10; M->u[1][1] = u11;
+  *bitsp = bits;
+
+  return 1;
+}
diff --git a/mpn/generic/hgcd_appr.c b/mpn/generic/hgcd_appr.c

new file mode 100644 (file)

index 0000000..bb8536a
--- /dev/null
+++ b/mpn/generic/hgcd_appr.c
@@ -0,0 +1,258 @@
+/* hgcd_appr.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
+   HGCD_THRESHOLD at the end? */
+mp_size_t
+mpn_hgcd_appr_itch (mp_size_t n)
+{
+  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+    return n;
+  else
+    {
+      unsigned k;
+      int count;
+      mp_size_t nscaled;
+
+      /* Get the recursion depth. */
+      nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
+      count_leading_zeros (count, nscaled);
+      k = GMP_LIMB_BITS - count;
+
+      return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+    }
+}
+
+/* Destroys inputs. */
+int
+mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
+              struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_size_t s;
+  int success = 0;
+
+  ASSERT (n > 0);
+
+  ASSERT ((ap[n-1] | bp[n-1]) != 0);
+
+  if (n <= 2)
+    /* Implies s = n. A fairly uninteresting case but exercised by the
+       random inputs of the testsuite. */
+    return 0;
+
+  ASSERT ((n+1)/2 - 1 < M->alloc);
+
+  /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
+     we discard some of the least significant limbs, we must keep one
+     additional bit to account for the truncation error. We maintain
+     the GMP_NUMB_BITS * s - extra_bits as the current target size. */
+
+  s = n/2 + 1;
+  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+    {
+      unsigned extra_bits = 0;
+
+      while (n > 2)
+       {
+         mp_size_t nn;
+
+         ASSERT (n > s);
+         ASSERT (n <= 2*s);
+
+         nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+         if (!nn)
+           break;
+
+         n = nn;
+         success = 1;
+
+         /* We can truncate and discard the lower p bits whenever nbits <=
+            2*sbits - p. To account for the truncation error, we must
+            adjust
+
+            sbits <-- sbits + 1 - p,
+
+            rather than just sbits <-- sbits - p. This adjustment makes
+            the produced matrix sligthly smaller than it could be. */
+
+         if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
+           {
+             mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
+
+             if (extra_bits == 0)
+               {
+                 /* We cross a limb boundary and bump s. We can't do that
+                    if the result is that it makes makes min(U, V)
+                    smaller than 2^{GMP_NUMB_BITS} s. */
+                 if (s + 1 == n
+                     || mpn_zero_p (ap + s + 1, n - s - 1)
+                     || mpn_zero_p (bp + s + 1, n - s - 1))
+                   continue;
+
+                 extra_bits = GMP_NUMB_BITS - 1;
+                 s++;
+               }
+             else
+               {
+                 extra_bits--;
+               }
+
+             /* Drop the p least significant limbs */
+             ap += p; bp += p; n -= p; s -= p;
+           }
+       }
+
+      ASSERT (s > 0);
+
+      if (extra_bits > 0)
+       {
+         /* We can get here only of we have dropped at least one of the
+            least significant bits, so we can decrement ap and bp. We can
+            then shift left extra bits using mpn_shiftr. */
+         /* NOTE: In the unlikely case that n is large, it would be
+            preferable to do an initial subdiv step to reduce the size
+            before shifting, but that would mean daplicating
+            mpn_gcd_subdiv_step with a bit count rather than a limb
+            count. */
+         ap--; bp--;
+         ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
+         bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
+         n += (ap[n] | bp[n]) > 0;
+
+         ASSERT (success);
+
+         while (n > 2)
+           {
+             mp_size_t nn;
+
+             ASSERT (n > s);
+             ASSERT (n <= 2*s);
+
+             nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+             if (!nn)
+               return 1;
+
+             n = nn;
+           }
+       }
+
+      if (n == 2)
+       {
+         struct hgcd_matrix1 M1;
+         ASSERT (s == 1);
+
+         if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
+           {
+             /* Multiply M <- M * M1 */
+             mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+             success = 1;
+           }
+       }
+      return success;
+    }
+  else
+    {
+      mp_size_t n2 = (3*n)/4 + 1;
+      mp_size_t p = n/2;
+      mp_size_t nn;
+
+      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+      if (nn)
+       {
+         n = nn;
+         /* FIXME: Discard some of the low limbs immediately? */
+         success = 1;
+       }
+
+      while (n > n2)
+       {
+         mp_size_t nn;
+
+         /* Needs n + 1 storage */
+         nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+         if (!nn)
+           return success;
+
+         n = nn;
+         success = 1;
+       }
+      if (n > s + 2)
+       {
+         struct hgcd_matrix M1;
+         mp_size_t scratch;
+
+         p = 2*s - n + 1;
+         scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+         mpn_hgcd_matrix_init(&M1, n - p, tp);
+         if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
+           {
+             /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+             ASSERT (M->n + 2 >= M1.n);
+
+             /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+                then either q or q + 1 is a correct quotient, and M1 will
+                start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+                rules out the case that the size of M * M1 is much
+                smaller than the expected M->n + M1->n. */
+
+             ASSERT (M->n + M1.n < M->alloc);
+
+             /* We need a bound for of M->n + M1.n. Let n be the original
+                input size. Then
+
+                ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+                and it follows that
+
+                M.n + M1.n <= ceil(n/2) + 1
+
+                Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+                amount of needed scratch space. */
+             mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+             return 1;
+           }
+       }
+
+      for(;;)
+       {
+         mp_size_t nn;
+
+         ASSERT (n > s);
+         ASSERT (n <= 2*s);
+
+         nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+         if (!nn)
+           return success;
+
+         n = nn;
+         success = 1;
+       }
+    }
+}
diff --git a/mpn/generic/hgcd_jacobi.c b/mpn/generic/hgcd_jacobi.c

new file mode 100644 (file)

index 0000000..728755a
--- /dev/null
+++ b/mpn/generic/hgcd_jacobi.c
@@ -0,0 +1,233 @@
+/* hgcd_jacobi.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This file is almost a copy of hgcd.c, with some added calls to
+   mpn_jacobi_update */
+
+struct hgcd_jacobi_ctx
+{
+  struct hgcd_matrix *M;
+  unsigned *bitsp;
+};
+
+static void
+hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+                 mp_srcptr qp, mp_size_t qn, int d)
+{
+  ASSERT (!gp);
+  ASSERT (d >= 0);
+
+  MPN_NORMALIZE (qp, qn);
+  if (qn > 0)
+    {
+      struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
+      /* NOTES: This is a bit ugly. A tp area is passed to
+        gcd_subdiv_step, which stores q at the start of that area. We
+        now use the rest. */
+      mp_ptr tp = (mp_ptr) qp + qn;
+
+      mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
+      *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
+    }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+   division. Reduces the size by almost one limb or more, but never
+   below the given size s. Return new size for a and b, or 0 if no
+   more steps are possible.
+
+   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
+   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
+   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+   resulting size of M.
+
+   If N is the input size to the calling hgcd, then s = floor(N/2) +
+   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
+   < N, so N is sufficient.
+*/
+
+static mp_size_t
+hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+                 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+  struct hgcd_matrix1 M1;
+  mp_limb_t mask;
+  mp_limb_t ah, al, bh, bl;
+
+  ASSERT (n > s);
+
+  mask = ap[n-1] | bp[n-1];
+  ASSERT (mask > 0);
+
+  if (n == s + 1)
+    {
+      if (mask < 4)
+       goto subtract;
+
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else if (mask & GMP_NUMB_HIGHBIT)
+    {
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else
+    {
+      int shift;
+
+      count_leading_zeros (shift, mask);
+      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+    }
+
+  /* Try an mpn_hgcd2 step */
+  if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
+    {
+      /* Multiply M <- M * M1 */
+      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+      /* Can't swap inputs, so we need to copy. */
+      MPN_COPY (tp, ap, n);
+      /* Multiply M1^{-1} (a;b) */
+      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+    }
+
+ subtract:
+  {
+    struct hgcd_jacobi_ctx ctx;
+    ctx.M = M;
+    ctx.bitsp = bitsp;
+
+    return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
+  }
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+   with elements of size at most (n+1)/2 - 1. Returns new size of a,
+   b, or zero if no reduction is possible. */
+
+/* Same scratch requirements as for mpn_hgcd. */
+mp_size_t
+mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
+                struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+  mp_size_t s = n/2 + 1;
+
+  mp_size_t nn;
+  int success = 0;
+
+  if (n <= s)
+    /* Happens when n <= 2, a fairly uninteresting case but exercised
+       by the random inputs of the testsuite. */
+    return 0;
+
+  ASSERT ((ap[n-1] | bp[n-1]) > 0);
+
+  ASSERT ((n+1)/2 - 1 < M->alloc);
+
+  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
+    {
+      mp_size_t n2 = (3*n)/4 + 1;
+      mp_size_t p = n/2;
+
+      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
+      if (nn > 0)
+       {
+         /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+            = 2 (n - 1) */
+         n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+         success = 1;
+       }
+      while (n > n2)
+       {
+         /* Needs n + 1 storage */
+         nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+         if (!nn)
+           return success ? n : 0;
+         n = nn;
+         success = 1;
+       }
+
+      if (n > s + 2)
+       {
+         struct hgcd_matrix M1;
+         mp_size_t scratch;
+
+         p = 2*s - n + 1;
+         scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+         mpn_hgcd_matrix_init(&M1, n - p, tp);
+         nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
+         if (nn > 0)
+           {
+             /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+             ASSERT (M->n + 2 >= M1.n);
+
+             /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+                then either q or q + 1 is a correct quotient, and M1 will
+                start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+                rules out the case that the size of M * M1 is much
+                smaller than the expected M->n + M1->n. */
+
+             ASSERT (M->n + M1.n < M->alloc);
+
+             /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+                = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+             n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+
+             /* We need a bound for of M->n + M1.n. Let n be the original
+                input size. Then
+
+                ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+                and it follows that
+
+                M.n + M1.n <= ceil(n/2) + 1
+
+                Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+                amount of needed scratch space. */
+             mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+             success = 1;
+           }
+       }
+    }
+
+  for (;;)
+    {
+      /* Needs s+3 < n */
+      nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+      if (!nn)
+       return success ? n : 0;
+
+      n = nn;
+      success = 1;
+    }
+}
diff --git a/mpn/generic/hgcd_matrix.c b/mpn/generic/hgcd_matrix.c

new file mode 100644 (file)

index 0000000..9eee8ff
--- /dev/null
+++ b/mpn/generic/hgcd_matrix.c
@@ -0,0 +1,255 @@
+/* hgcd_matrix.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* For input of size n, matrix elements are of size at most ceil(n/2)
+   - 1, but we need two limbs extra. */
+void
+mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
+{
+  mp_size_t s = (n+1)/2 + 1;
+  M->alloc = s;
+  M->n = 1;
+  MPN_ZERO (p, 4 * s);
+  M->p[0][0] = p;
+  M->p[0][1] = p + s;
+  M->p[1][0] = p + 2 * s;
+  M->p[1][1] = p + 3 * s;
+
+  M->p[0][0][0] = M->p[1][1][0] = 1;
+}
+
+/* Update column COL, adding in Q * column (1-COL). Temporary storage:
+ * qn + n <= M->alloc, where n is the size of the largest element in
+ * column 1 - COL. */
+void
+mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
+                         unsigned col, mp_ptr tp)
+{
+  ASSERT (col < 2);
+
+  if (qn == 1)
+    {
+      mp_limb_t q = qp[0];
+      mp_limb_t c0, c1;
+
+      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
+      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
+
+      M->p[0][col][M->n] = c0;
+      M->p[1][col][M->n] = c1;
+
+      M->n += (c0 | c1) != 0;
+    }
+  else
+    {
+      unsigned row;
+
+      /* Carries for the unlikely case that we get both high words
+        from the multiplication and carries from the addition. */
+      mp_limb_t c[2];
+      mp_size_t n;
+
+      /* The matrix will not necessarily grow in size by qn, so we
+        need normalization in order not to overflow M. */
+
+      for (n = M->n; n + qn > M->n; n--)
+       {
+         ASSERT (n > 0);
+         if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
+           break;
+       }
+
+      ASSERT (qn + n <= M->alloc);
+
+      for (row = 0; row < 2; row++)
+       {
+         if (qn <= n)
+           mpn_mul (tp, M->p[row][1-col], n, qp, qn);
+         else
+           mpn_mul (tp, qp, qn, M->p[row][1-col], n);
+
+         ASSERT (n + qn >= M->n);
+         c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
+       }
+
+      n += qn;
+
+      if (c[0] | c[1])
+       {
+         M->p[0][col][n] = c[0];
+         M->p[1][col][n] = c[1];
+         n++;
+       }
+      else
+       {
+         n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
+         ASSERT (n >= M->n);
+       }
+      M->n = n;
+    }
+
+  ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Since the M1 elements fit in
+   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
+   temporary space M->n */
+void
+mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
+                      mp_ptr tp)
+{
+  mp_size_t n0, n1;
+
+  /* Could avoid copy by some swapping of pointers. */
+  MPN_COPY (tp, M->p[0][0], M->n);
+  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
+  MPN_COPY (tp, M->p[1][0], M->n);
+  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
+
+  /* Depends on zero initialization */
+  M->n = MAX(n0, n1);
+  ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
+   of temporary storage (see mpn_matrix22_mul_itch). */
+void
+mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
+                    mp_ptr tp)
+{
+  mp_size_t n;
+
+  /* About the new size of M:s elements. Since M1's diagonal elements
+     are > 0, no element can decrease. The new elements are of size
+     M->n + M1->n, one limb more or less. The computation of the
+     matrix product produces elements of size M->n + M1->n + 1. But
+     the true size, after normalization, may be three limbs smaller.
+
+     The reason that the product has normalized size >= M->n + M1->n -
+     2 is subtle. It depends on the fact that M and M1 can be factored
+     as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
+     M ending with a large power and M1 starting with a large power of
+     the same matrix. */
+
+  /* FIXME: Strassen multiplication gives only a small speedup. In FFT
+     multiplication range, this function could be sped up quite a lot
+     using invariance. */
+  ASSERT (M->n + M1->n < M->alloc);
+
+  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
+          | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
+
+  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
+          | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
+
+  mpn_matrix22_mul (M->p[0][0], M->p[0][1],
+                   M->p[1][0], M->p[1][1], M->n,
+                   M1->p[0][0], M1->p[0][1],
+                   M1->p[1][0], M1->p[1][1], M1->n, tp);
+
+  /* Index of last potentially non-zero limb, size is one greater. */
+  n = M->n + M1->n;
+
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+
+  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
+
+  M->n = n + 1;
+}
+
+/* Multiplies the least significant p limbs of (a;b) by M^-1.
+   Temporary space needed: 2 * (p + M->n)*/
+mp_size_t
+mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,
+                       mp_size_t n, mp_ptr ap, mp_ptr bp,
+                       mp_size_t p, mp_ptr tp)
+{
+  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
+     = (r11 a - r01 b; - r10 a + r00 b */
+
+  mp_ptr t0 = tp;
+  mp_ptr t1 = tp + p + M->n;
+  mp_limb_t ah, bh;
+  mp_limb_t cy;
+
+  ASSERT (p + M->n  < n);
+
+  /* First compute the two values depending on a, before overwriting a */
+
+  if (M->n >= p)
+    {
+      mpn_mul (t0, M->p[1][1], M->n, ap, p);
+      mpn_mul (t1, M->p[1][0], M->n, ap, p);
+    }
+  else
+    {
+      mpn_mul (t0, ap, p, M->p[1][1], M->n);
+      mpn_mul (t1, ap, p, M->p[1][0], M->n);
+    }
+
+  /* Update a */
+  MPN_COPY (ap, t0, p);
+  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
+
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][1], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][1], M->n);
+
+  cy = mpn_sub (ap, ap, n, t0, p + M->n);
+  ASSERT (cy <= ah);
+  ah -= cy;
+
+  /* Update b */
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][0], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][0], M->n);
+
+  MPN_COPY (bp, t0, p);
+  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
+  cy = mpn_sub (bp, bp, n, t1, p + M->n);
+  ASSERT (cy <= bh);
+  bh -= cy;
+
+  if (ah > 0 || bh > 0)
+    {
+      ap[n] = ah;
+      bp[n] = bh;
+      n++;
+    }
+  else
+    {
+      /* The subtraction can reduce the size by at most one limb. */
+      if (ap[n-1] == 0 && bp[n-1] == 0)
+       n--;
+    }
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+  return n;
+}
diff --git a/mpn/generic/hgcd_reduce.c b/mpn/generic/hgcd_reduce.c

new file mode 100644 (file)

index 0000000..ec201f5
--- /dev/null
+++ b/mpn/generic/hgcd_reduce.c
@@ -0,0 +1,236 @@
+/* hgcd_reduce.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Computes R -= A * B. Result must be non-negative. Normalized down
+   to size an, and resulting size is returned. */
+static mp_size_t
+submul (mp_ptr rp, mp_size_t rn,
+       mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+  mp_ptr tp;
+  TMP_DECL;
+
+  ASSERT (bn > 0);
+  ASSERT (an >= bn);
+  ASSERT (rn >= an);
+  ASSERT (an + bn <= rn + 1);
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS (an + bn);
+
+  mpn_mul (tp, ap, an, bp, bn);
+  if (an + bn > rn)
+    {
+      ASSERT (tp[rn] == 0);
+      bn--;
+    }
+  ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn));
+  TMP_FREE;
+
+  while (rn > an && (rp[rn-1] == 0))
+    rn--;
+
+  return rn;
+}
+
+/* Computes (a, b)  <--  M^{-1} (a; b) */
+/* FIXME:
+    x Take scratch parameter, and figure out scratch need.
+
+    x Use some fallback for small M->n?
+*/
+static mp_size_t
+hgcd_matrix_apply (const struct hgcd_matrix *M,
+                  mp_ptr ap, mp_ptr bp,
+                  mp_size_t n)
+{
+  mp_size_t an, bn, un, vn, nn;
+  mp_size_t mn[2][2];
+  mp_size_t modn;
+  mp_ptr tp, sp, scratch;
+  mp_limb_t cy;
+  unsigned i, j;
+
+  TMP_DECL;
+
+  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+
+  an = n;
+  MPN_NORMALIZE (ap, an);
+  bn = n;
+  MPN_NORMALIZE (bp, bn);
+
+  for (i = 0; i < 2; i++)
+    for (j = 0; j < 2; j++)
+      {
+       mp_size_t k;
+       k = M->n;
+       MPN_NORMALIZE (M->p[i][j], k);
+       mn[i][j] = k;
+      }
+
+  ASSERT (mn[0][0] > 0);
+  ASSERT (mn[1][1] > 0);
+  ASSERT ( (mn[0][1] | mn[1][0]) > 0);
+
+  TMP_MARK;
+
+  if (mn[0][1] == 0)
+    {
+      /* A unchanged, M = (1, 0; q, 1) */
+      ASSERT (mn[0][0] == 1);
+      ASSERT (M->p[0][0][0] == 1);
+      ASSERT (mn[1][1] == 1);
+      ASSERT (M->p[1][1][0] == 1);
+
+      /* Put B <-- B - q A */
+      nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
+    }
+  else if (mn[1][0] == 0)
+    {
+      /* B unchanged, M = (1, q; 0, 1) */
+      ASSERT (mn[0][0] == 1);
+      ASSERT (M->p[0][0][0] == 1);
+      ASSERT (mn[1][1] == 1);
+      ASSERT (M->p[1][1][0] == 1);
+
+      /* Put A  <-- A - q * B */
+      nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
+    }
+  else
+    {
+      /* A = m00 a + m01 b  ==> a <= A / m00, b <= A / m01.
+        B = m10 a + m11 b  ==> a <= B / m10, b <= B / m11. */
+      un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
+      vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
+
+      nn = MAX (un, vn);
+      /* In the range of interest, mulmod_bnm1 should always beat mullo. */
+      modn = mpn_mulmod_bnm1_next_size (nn + 1);
+
+      scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n));
+      tp = TMP_ALLOC_LIMBS (modn);
+      sp = TMP_ALLOC_LIMBS (modn);
+
+      ASSERT (n <= 2*modn);
+
+      if (n > modn)
+       {
+         cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
+         MPN_INCR_U (ap, modn, cy);
+
+         cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
+         MPN_INCR_U (bp, modn, cy);
+
+         n = modn;
+       }
+
+      mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
+      mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
+
+      /* FIXME: Handle the small n case in some better way. */
+      if (n + mn[1][1] < modn)
+       MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
+      if (n + mn[0][1] < modn)
+       MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
+
+      cy = mpn_sub_n (tp, tp, sp, modn);
+      MPN_DECR_U (tp, modn, cy);
+
+      ASSERT (mpn_zero_p (tp + nn, modn - nn));
+
+      mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
+      MPN_COPY (ap, tp, nn);
+      mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
+
+      if (n + mn[1][0] < modn)
+       MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
+      if (n + mn[0][0] < modn)
+       MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
+
+      cy = mpn_sub_n (tp, tp, sp, modn);
+      MPN_DECR_U (tp, modn, cy);
+
+      ASSERT (mpn_zero_p (tp + nn, modn - nn));
+      MPN_COPY (bp, tp, nn);
+
+      while ( (ap[nn-1] | bp[nn-1]) == 0)
+       {
+         nn--;
+         ASSERT (nn > 0);
+       }
+    }
+  TMP_FREE;
+
+  return nn;
+}
+
+mp_size_t
+mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
+{
+  mp_size_t itch;
+  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+    {
+      itch = mpn_hgcd_itch (n-p);
+
+      /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
+        (p + ceil((n-p)/2) - 1 <= n + p - 1 */
+      if (itch < n + p - 1)
+       itch = n + p - 1;
+    }
+  else
+    {
+      itch = 2*(n-p) + mpn_hgcd_itch (n-p);
+      /* Currently, hgcd_matrix_apply allocates its own storage. */
+    }
+  return itch;
+}
+
+/* FIXME: Document storage need. */
+mp_size_t
+mpn_hgcd_reduce (struct hgcd_matrix *M,
+                mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
+                mp_ptr tp)
+{
+  mp_size_t nn;
+  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+    {
+      nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
+      if (nn > 0)
+       /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+          = 2 (n - 1) */
+       return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+    }
+  else
+    {
+      MPN_COPY (tp, ap + p, n - p);
+      MPN_COPY (tp + n - p, bp + p, n - p);
+      if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
+       return hgcd_matrix_apply (M, ap, bp, n);
+    }
+  return 0;
+}
diff --git a/mpn/generic/hgcd_step.c b/mpn/generic/hgcd_step.c

new file mode 100644 (file)

index 0000000..740c56b
--- /dev/null
+++ b/mpn/generic/hgcd_step.c
@@ -0,0 +1,117 @@
+/* hgcd_step.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static void
+hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+          mp_srcptr qp, mp_size_t qn, int d)
+{
+  ASSERT (!gp);
+  ASSERT (d >= 0);
+  ASSERT (d <= 1);
+
+  MPN_NORMALIZE (qp, qn);
+  if (qn > 0)
+    {
+      struct hgcd_matrix *M = (struct hgcd_matrix *) p;
+      /* NOTES: This is a bit ugly. A tp area is passed to
+        gcd_subdiv_step, which stores q at the start of that area. We
+        now use the rest. */
+      mp_ptr tp = (mp_ptr) qp + qn;
+      mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);
+    }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+   division. Reduces the size by almost one limb or more, but never
+   below the given size s. Return new size for a and b, or 0 if no
+   more steps are possible.
+
+   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
+   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+   fails, needs space for the quotient, qn <= n - s limbs, for and
+   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+   (resulting size of M) + 1.
+
+   If N is the input size to the calling hgcd, then s = floor(N/2) +
+   1, M->n < N, qn + product size <= n - s + n - s + 1 = 2 (n - s) + 1
+   <= N.
+*/
+
+mp_size_t
+mpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+              struct hgcd_matrix *M, mp_ptr tp)
+{
+  struct hgcd_matrix1 M1;
+  mp_limb_t mask;
+  mp_limb_t ah, al, bh, bl;
+
+  ASSERT (n > s);
+
+  mask = ap[n-1] | bp[n-1];
+  ASSERT (mask > 0);
+
+  if (n == s + 1)
+    {
+      if (mask < 4)
+       goto subtract;
+
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else if (mask & GMP_NUMB_HIGHBIT)
+    {
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else
+    {
+      int shift;
+
+      count_leading_zeros (shift, mask);
+      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+    }
+
+  /* Try an mpn_hgcd2 step */
+  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
+    {
+      /* Multiply M <- M * M1 */
+      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+      /* Can't swap inputs, so we need to copy. */
+      MPN_COPY (tp, ap, n);
+      /* Multiply M1^{-1} (a;b) */
+      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+    }
+
+ subtract:
+
+  return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);
+}
diff --git a/mpn/generic/invert.c b/mpn/generic/invert.c

index dda2500943870eb04766af6fc0a1724ec62096a5..ac62d23722ebedb20d534e30aec9df5b2f7059aa 100644 (file)
--- a/mpn/generic/invert.c
+++ b/mpn/generic/invert.c
@@ -6,7 +6,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
  
-Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright (C) 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,10 +23,6 @@ License for more details.
  You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
-/* FIXME: Remove NULL and TMP_*, as soon as all the callers properly
-   allocate and pass the scratch to the function. */
-#include <stdlib.h>            /* for NULL */
-
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "longlong.h"
@@ -46,9 +42,6 @@ mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
      TMP_DECL;
  
      TMP_MARK;
-    if (scratch == NULL)
-      scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (n));
-
      if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
        {
         /* Maximum scratch needed by this branch: 2*n */
@@ -74,7 +67,7 @@ mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
        ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
        e = mpn_ni_invertappr (ip, dp, n, scratch);
  
-      if (e) { /* Assume the error can only be "0" (no error) or "1". */
+      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
         /* Code to detect and correct the "off by one" approximation. */
         mpn_mul_n (scratch, ip, dp, n);
         ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n));
diff --git a/mpn/generic/invertappr.c b/mpn/generic/invertappr.c

index 8064956298df072c349f7afc380fd1e20cd566fa..6430d2ea3dc67a03f572baf6160ccaf6ec6d65a3 100644 (file)
--- a/mpn/generic/invertappr.c
+++ b/mpn/generic/invertappr.c
@@ -12,7 +12,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
  
-Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright (C) 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -41,14 +41,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     parts, the use of log_2 (or counting the bits) underestimate the maximum
     number of iterations.  */
  
-/* This is intended for constant THRESHOLDs only, where the compiler
-   can completely fold the result.  */
-#define LOG2C(n) \
- (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
-  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
-  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
-  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
-
  #if TUNE_PROGRAM_BUILD
  #define NPOWS \
   ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
diff --git a/mpn/generic/jacbase.c b/mpn/generic/jacbase.c

index 6972a130d96270c0a49a2a71c2ef8f7a51d167d8..0cd80d83140743823945d8e7ee7f075d6fe4904a 100644 (file)
--- a/mpn/generic/jacbase.c
+++ b/mpn/generic/jacbase.c
@@ -3,7 +3,7 @@
     THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
     INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
  
-Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2010 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -72,15 +72,15 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define PROCESS_TWOS_EVEN               \
    {                                     \
      int  two, mask, shift;              \
-                                        \
+                                       \
      two = JACOBI_TWO_U_BIT1 (b);        \
      mask = (~a & 2);                    \
      a >>= 1;                            \
-                                        \
+                                       \
      shift = (~a & 1);                   \
      a >>= shift;                        \
      result_bit1 ^= two ^ (two & mask);  \
-                                        \
+                                       \
      while ((a & 1) == 0)                \
        {                                 \
         a >>= 1;                        \
@@ -91,14 +91,14 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define PROCESS_TWOS_ANY                \
    {                                     \
      int  two, mask, shift;              \
-                                        \
+                                       \
      two = JACOBI_TWO_U_BIT1 (b);        \
      shift = (~a & 1);                   \
      a >>= shift;                        \
-                                        \
+                                       \
      mask = shift << 1;                  \
      result_bit1 ^= (two & mask);        \
-                                        \
+                                       \
      while ((a & 1) == 0)                \
        {                                 \
         a >>= 1;                        \
@@ -108,9 +108,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    }
  #endif
  
-
+#if JACOBI_BASE_METHOD < 4
  /* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
-   with a restricted range of inputs accepted, namely b>1, b odd, and a<=b.
+   with a restricted range of inputs accepted, namely b>1, b odd.
  
     The initial result_bit1 is taken as a parameter for the convenience of
     mpz_kronecker_ui() et al.  The sign changes both here and in those
@@ -122,17 +122,13 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
     Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
     possible, but a couple of tests suggest it's not a significant speedup,
-   and may even be a slowdown, so what's here is good enough for now.
-
-   Future: The code doesn't demand a<=b actually, so maybe this could be
-   relaxed.  All the places this is used currently call with a<=b though.  */
+   and may even be a slowdown, so what's here is good enough for now. */
  
  int
  mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
  {
    ASSERT (b & 1);  /* b odd */
    ASSERT (b != 1);
-  ASSERT (a <= b);
  
    if (a == 0)
      return 0;
@@ -141,11 +137,15 @@ mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
    if (a == 1)
      goto done;
  
+  if (a >= b)
+    goto a_gt_b;
+
    for (;;)
      {
        result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
        MP_LIMB_T_SWAP (a, b);
  
+    a_gt_b:
        do
         {
           /* working on (a/b), a,b odd, a>=b */
@@ -166,3 +166,67 @@ mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
   done:
    return JACOBI_BIT1_TO_PN (result_bit1);
  }
+#endif
+
+#if JACOBI_BASE_METHOD == 4
+/* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a
+ * parameter. We have no need for the convention that the sign is in
+ * bit 1, internally we use bit 0. */
+
+/* FIXME: Could try table-based count_trailing_zeros. */
+int
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit)
+{
+  int c;
+
+  ASSERT (b & 1);
+  ASSERT (b > 1);
+
+  if (a == 0)
+    /* This is the only line which depends on b > 1 */
+    return 0;
+
+  bit >>= 1;
+
+  /* Below, we represent a and b shifted right so that the least
+     significant one bit is implicit. */
+
+  b >>= 1;
+
+  count_trailing_zeros (c, a);
+  bit ^= c & (b ^ (b >> 1));
+
+  /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */
+  a >>= c;
+  a >>= 1;
+
+  do
+    {
+      mp_limb_t t = a - b;
+      mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t);
+
+      if (t == 0)
+       return 0;
+
+      /* If b > a, invoke reciprocity */
+      bit ^= (bgta & a & b);
+
+      /* b <-- min (a, b) */
+      b += (bgta & t);
+
+      /* a <-- |a - b| */
+      a = (t ^ bgta) - bgta;
+
+      /* Number of trailing zeros is the same no matter if we look at
+       * t or a, but using t gives more parallelism. */
+      count_trailing_zeros (c, t);
+      c ++;
+      /* (2/b) = -1 if b = 3 or 5 mod 8 */
+      bit ^= c & (b ^ (b >> 1));
+      a >>= c;
+    }
+  while (b > 0);
+
+  return 1-2*(bit & 1);
+}
+#endif /* JACOBI_BASE_METHOD == 4 */
diff --git a/mpn/generic/jacobi.c b/mpn/generic/jacobi.c

new file mode 100644 (file)

index 0000000..478f063
--- /dev/null
+++ b/mpn/generic/jacobi.c
@@ -0,0 +1,284 @@
+/* jacobi.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_DC_THRESHOLD
+#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
+#endif
+
+/* Schönhage's rules:
+ *
+ * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
+ *
+ * If r1 is odd, then
+ *
+ *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
+ *
+ * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
+ *
+ * If r1 is even, r2 must be odd. We have
+ *
+ *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
+ *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
+ *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
+ *
+ * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
+ * q1 times gives
+ *
+ *   (r1 | r0) = (r1 | r2) = (r3 | r2)
+ *
+ * On the other hand, if r1 = 2 (mod 4), the sign factor is
+ * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
+ *
+ *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
+ *   = q1 (r0-1)/2 + q1 (q1-1)/2
+ *
+ * and we can summarize the even case as
+ *
+ *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
+ *
+ * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
+ *
+ * What about termination? The remainder sequence ends with (0|1) = 1
+ * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
+ * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
+ * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
+ *
+ * Examples: (11|15) = - (15|11) = - (4|11)
+ *            (4|11) =    (4| 3) =   (1| 3)
+ *            (1| 3) = (3|1) = (0|1) = 1
+ *
+ *             (2|7) = (2|1) = (0|1) = 1
+ *
+ * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
+ *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
+ *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
+ *
+ */
+
+/* In principle, the state consists of four variables: e (one bit), a,
+   b (two bits each), d (one bit). Collected factors are (-1)^e. a and
+   b are the least significant bits of the current remainders. d
+   (denominator) is 0 if we're currently subtracting multiplies of a
+   from b, and 1 if we're subtracting b from a.
+
+   e is stored in the least significant bit, while a, b and d are
+   coded as only 13 distinct values in bits 1-4, according to the
+   following table. For rows not mentioning d, the value is either
+   implied, or it doesn't matter. */
+
+#if WANT_ASSERT
+static const struct
+{
+  unsigned char a;
+  unsigned char b;
+} decode_table[13] = {
+  /*  0 */ { 0, 1 },
+  /*  1 */ { 0, 3 },
+  /*  2 */ { 1, 1 },
+  /*  3 */ { 1, 3 },
+  /*  4 */ { 2, 1 },
+  /*  5 */ { 2, 3 },
+  /*  6 */ { 3, 1 },
+  /*  7 */ { 3, 3 }, /* d = 1 */
+  /*  8 */ { 1, 0 },
+  /*  9 */ { 1, 2 },
+  /* 10 */ { 3, 0 },
+  /* 11 */ { 3, 2 },
+  /* 12 */ { 3, 3 }, /* d = 0 */
+};
+#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
+#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
+#endif /* WANT_ASSERT */
+
+const unsigned char jacobi_table[208] = {
+#include "jacobitab.h"
+};
+
+#define BITS_FAIL 31
+
+static void
+jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+            mp_srcptr qp, mp_size_t qn, int d)
+{
+  unsigned *bitsp = (unsigned *) p;
+
+  if (gp)
+    {
+      ASSERT (gn > 0);
+      if (gn != 1 || gp[0] != 1)
+       {
+         *bitsp = BITS_FAIL;
+         return;
+       }
+    }
+
+  if (qp)
+    {
+      ASSERT (qn > 0);
+      ASSERT (d >= 0);
+      *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
+    }
+}
+
+#define CHOOSE_P(n) (2*(n) / 3)
+
+int
+mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
+{
+  mp_size_t scratch;
+  mp_size_t matrix_scratch;
+  mp_ptr tp;
+
+  TMP_DECL;
+
+  ASSERT (n > 0);
+  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+  ASSERT ( (bp[0] | ap[0]) & 1);
+
+  /* FIXME: Check for small sizes first, before setting up temporary
+     storage etc. */
+  scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
+
+  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+    {
+      mp_size_t hgcd_scratch;
+      mp_size_t update_scratch;
+      mp_size_t p = CHOOSE_P (n);
+      mp_size_t dc_scratch;
+
+      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      hgcd_scratch = mpn_hgcd_itch (n - p);
+      update_scratch = p + n - 1;
+
+      dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+      if (dc_scratch > scratch)
+       scratch = dc_scratch;
+    }
+
+  TMP_MARK;
+  tp = TMP_ALLOC_LIMBS(scratch);
+
+  while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
+    {
+      struct hgcd_matrix M;
+      mp_size_t p = 2*n/3;
+      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+      mp_size_t nn;
+      mpn_hgcd_matrix_init (&M, n - p, tp);
+
+      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
+                           tp + matrix_scratch);
+      if (nn > 0)
+       {
+         ASSERT (M.n <= (n - p - 1)/2);
+         ASSERT (M.n + p <= (p + n - 1) / 2);
+         /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+         n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
+       }
+      else
+       {
+         /* Temporary storage n */
+         n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
+         if (!n)
+           {
+             TMP_FREE;
+             return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+           }
+       }
+    }
+
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t ah, al, bh, bl;
+      mp_limb_t mask;
+
+      mask = ap[n-1] | bp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+       {
+         ah = ap[n-1]; al = ap[n-2];
+         bh = bp[n-1]; bl = bp[n-2];
+       }
+      else
+       {
+         int shift;
+
+         count_leading_zeros (shift, mask);
+         ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+         al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+         bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+         bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+       }
+
+      /* Try an mpn_nhgcd2 step */
+      if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
+       {
+         n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+         MP_PTR_SWAP (ap, tp);
+       }
+      else
+       {
+         /* mpn_hgcd2 has failed. Then either one of a or b is very
+            small, or the difference is very small. Perform one
+            subtraction followed by one division. */
+         n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
+         if (!n)
+           {
+             TMP_FREE;
+             return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+           }
+       }
+    }
+
+  if (bits >= 16)
+    MP_PTR_SWAP (ap, bp);
+
+  ASSERT (bp[0] & 1);
+
+  if (n == 1)
+    {
+      mp_limb_t al, bl;
+      al = ap[0];
+      bl = bp[0];
+
+      TMP_FREE;
+      if (bl == 1)
+       return 1 - 2*(bits & 1);
+      else
+       return mpn_jacobi_base (al, bl, bits << 1);
+    }
+
+  else
+    {
+      int res = mpn_jacobi_2 (ap, bp, bits & 1);
+      TMP_FREE;
+      return res;
+    }
+}
diff --git a/mpn/generic/jacobi_2.c b/mpn/generic/jacobi_2.c

new file mode 100644 (file)

index 0000000..9a8a2b5
--- /dev/null
+++ b/mpn/generic/jacobi_2.c
@@ -0,0 +1,342 @@
+/* jacobi_2.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_2_METHOD
+#define JACOBI_2_METHOD 2
+#endif
+
+/* Computes (a / b) where b is odd, and a and b are otherwise arbitrary
+   two-limb numbers. */
+#if JACOBI_2_METHOD == 1
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+  mp_limb_t ah, al, bh, bl;
+  int c;
+
+  al = ap[0];
+  ah = ap[1];
+  bl = bp[0];
+  bh = bp[1];
+
+  ASSERT (bl & 1);
+
+  bl = ((bh << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK) | (bl >> 1);
+  bh >>= 1;
+
+  if ( (bh | bl) == 0)
+    return 1 - 2*(bit & 1);
+
+  if ( (ah | al) == 0)
+    return 0;
+
+  if (al == 0)
+    {
+      al = ah;
+      ah = 0;
+      bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+    }
+  count_trailing_zeros (c, al);
+  bit ^= c & (bl ^ (bl >> 1));
+
+  c++;
+  if (UNLIKELY (c == GMP_NUMB_BITS))
+    {
+      al = ah;
+      ah = 0;
+    }
+  else
+    {
+      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+      ah >>= c;
+    }
+  while ( (ah | bh) > 0)
+    {
+      mp_limb_t th, tl;
+      mp_limb_t bgta;
+
+      sub_ddmmss (th, tl, ah, al, bh, bl);
+      if ( (tl | th) == 0)
+       return 0;
+
+      bgta = LIMB_HIGHBIT_TO_MASK (th);
+
+      /* If b > a, invoke reciprocity */
+      bit ^= (bgta & al & bl);
+
+      /* b <-- min (a, b) */
+      add_ssaaaa (bh, bl, bh, bl, th & bgta, tl & bgta);
+
+      if ( (bh | bl) == 0)
+       return 1 - 2*(bit & 1);
+
+      /* a <-- |a - b| */
+      al = (bgta ^ tl) - bgta;
+      ah = (bgta ^ th);
+
+      if (UNLIKELY (al == 0))
+       {
+         /* If b > a, al == 0 implies that we have a carry to
+            propagate. */
+         al = ah - bgta;
+         ah = 0;
+         bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+       }
+      count_trailing_zeros (c, al);
+      c++;
+      bit ^= c & (bl ^ (bl >> 1));
+
+      if (UNLIKELY (c == GMP_NUMB_BITS))
+       {
+         al = ah;
+         ah = 0;
+       }
+      else
+       {
+         al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+         ah >>= c;
+       }
+    }
+
+  ASSERT (bl > 0);
+
+  while ( (al | bl) & GMP_LIMB_HIGHBIT)
+    {
+      /* Need an extra comparison to get the mask. */
+      mp_limb_t t = al - bl;
+      mp_limb_t bgta = - (bl > al);
+
+      if (t == 0)
+       return 0;
+
+      /* If b > a, invoke reciprocity */
+      bit ^= (bgta & al & bl);
+
+      /* b <-- min (a, b) */
+      bl += (bgta & t);
+
+      /* a <-- |a - b| */
+      al = (t ^ bgta) - bgta;
+
+      /* Number of trailing zeros is the same no matter if we look at
+       * t or a, but using t gives more parallelism. */
+      count_trailing_zeros (c, t);
+      c ++;
+      /* (2/b) = -1 if b = 3 or 5 mod 8 */
+      bit ^= c & (bl ^ (bl >> 1));
+
+      if (UNLIKELY (c == GMP_NUMB_BITS))
+       return 1 - 2*(bit & 1);
+
+      al >>= c;
+    }
+
+  /* Here we have a little impedance mismatch. Better to inline it? */
+  return mpn_jacobi_base (2*al+1, 2*bl+1, bit << 1);
+}
+#elif JACOBI_2_METHOD == 2
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+  mp_limb_t ah, al, bh, bl;
+  int c;
+
+  al = ap[0];
+  ah = ap[1];
+  bl = bp[0];
+  bh = bp[1];
+
+  ASSERT (bl & 1);
+
+  /* Use bit 1. */
+  bit <<= 1;
+
+  if (bh == 0 && bl == 1)
+    /* (a|1) = 1 */
+    return 1 - (bit & 2);
+
+  if (al == 0)
+    {
+      if (ah == 0)
+       /* (0|b) = 0, b > 1 */
+       return 0;
+
+      count_trailing_zeros (c, ah);
+      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+      al = bl;
+      bl = ah >> c;
+
+      if (bl == 1)
+       /* (1|b) = 1 */
+       return 1 - (bit & 2);
+
+      ah = bh;
+
+      bit ^= al & bl;
+
+      goto b_reduced;
+    }
+  if ( (al & 1) == 0)
+    {
+      count_trailing_zeros (c, al);
+
+      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+      ah >>= c;
+      bit ^= (c << 1) & (bl ^ (bl >> 1));
+    }
+  if (ah == 0)
+    {
+      if (bh > 0)
+       {
+         bit ^= al & bl;
+         MP_LIMB_T_SWAP (al, bl);
+         ah = bh;
+         goto b_reduced;
+       }
+      goto ab_reduced;
+    }
+
+  while (bh > 0)
+    {
+      /* Compute (a|b) */
+      while (ah > bh)
+       {
+         sub_ddmmss (ah, al, ah, al, bh, bl);
+         if (al == 0)
+           {
+             count_trailing_zeros (c, ah);
+             bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+             al = bl;
+             bl = ah >> c;
+             ah = bh;
+
+             bit ^= al & bl;
+             goto b_reduced;
+           }
+         count_trailing_zeros (c, al);
+         bit ^= (c << 1) & (bl ^ (bl >> 1));
+         al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+         ah >>= c;
+       }
+      if (ah == bh)
+       goto cancel_hi;
+
+      if (ah == 0)
+       {
+         bit ^= al & bl;
+         MP_LIMB_T_SWAP (al, bl);
+         ah = bh;
+         break;
+       }
+
+      bit ^= al & bl;
+
+      /* Compute (b|a) */
+      while (bh > ah)
+       {
+         sub_ddmmss (bh, bl, bh, bl, ah, al);
+         if (bl == 0)
+           {
+             count_trailing_zeros (c, bh);
+             bit ^= ((GMP_NUMB_BITS + c) << 1) & (al ^ (al >> 1));
+
+             bl = bh >> c;
+             bit ^= al & bl;
+             goto b_reduced;
+           }
+         count_trailing_zeros (c, bl);
+         bit ^= (c << 1) & (al ^ (al >> 1));
+         bl = ((bh << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (bl >> c);
+         bh >>= c;
+       }
+      bit ^= al & bl;
+
+      /* Compute (a|b) */
+      if (ah == bh)
+       {
+       cancel_hi:
+         if (al < bl)
+           {
+             MP_LIMB_T_SWAP (al, bl);
+             bit ^= al & bl;
+           }
+         al -= bl;
+         if (al == 0)
+           return 0;
+
+         count_trailing_zeros (c, al);
+         bit ^= (c << 1) & (bl ^ (bl >> 1));
+         al >>= c;
+
+         if (al == 1)
+           return 1 - (bit & 2);
+
+         MP_LIMB_T_SWAP (al, bl);
+         bit ^= al & bl;
+         break;
+       }
+    }
+
+ b_reduced:
+  /* Compute (a|b), with b a single limb. */
+  ASSERT (bl & 1);
+
+  if (bl == 1)
+    /* (a|1) = 1 */
+    return 1 - (bit & 2);
+
+  while (ah > 0)
+    {
+      ah -= (al < bl);
+      al -= bl;
+      if (al == 0)
+       {
+         if (ah == 0)
+           return 0;
+         count_trailing_zeros (c, ah);
+         bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+         al = ah >> c;
+         goto ab_reduced;
+       }
+      count_trailing_zeros (c, al);
+
+      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+      ah >>= c;
+      bit ^= (c << 1) & (bl ^ (bl >> 1));
+    }
+ ab_reduced:
+  ASSERT (bl & 1);
+  ASSERT (bl > 1);
+
+  return mpn_jacobi_base (al, bl, bit);
+}
+#else
+#error Unsupported value for JACOBI_2_METHOD
+#endif
diff --git a/mpn/generic/matrix22_mul.c b/mpn/generic/matrix22_mul.c

index 7e710d2a1dee4885d763dd7200b07d3821f10349..6e0e67586f72545c90a7147888081c85089dfce2 100644 (file)
--- a/mpn/generic/matrix22_mul.c
+++ b/mpn/generic/matrix22_mul.c
@@ -149,7 +149,7 @@ mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t
      {
        s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
        s0s = 1;                         /* s4 = -r0 + r1 - r2 + r3 */
-                                       /* Reverse sign! */
+                                       /* Reverse sign! */
      }
    else
      {
diff --git a/mpn/generic/matrix22_mul1_inverse_vector.c b/mpn/generic/matrix22_mul1_inverse_vector.c

new file mode 100644 (file)

index 0000000..c8a5811
--- /dev/null
+++ b/mpn/generic/matrix22_mul1_inverse_vector.c
@@ -0,0 +1,54 @@
+/* matrix22_mul1_inverse_vector.c
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
+   the left. Uses three buffers, to avoid a copy. */
+mp_size_t
+mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *M,
+                                 mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+  mp_limb_t h0, h1;
+
+  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
+
+     r  = u11 * a
+     r -= u01 * b
+     b *= u00
+     b -= u10 * a
+  */
+
+  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
+  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
+  ASSERT (h0 == h1);
+
+  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
+  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
+  ASSERT (h0 == h1);
+
+  n -= (rp[n-1] | bp[n-1]) == 0;
+  return n;
+}
diff --git a/mpn/generic/mod_1.c b/mpn/generic/mod_1.c

index c5bbaad2765eca25102b6592ced8606d51f5ca74..66c332ea3831d260b6f90ebbca269025fe14f99a 100644 (file)
--- a/mpn/generic/mod_1.c
+++ b/mpn/generic/mod_1.c
@@ -3,7 +3,7 @@
     Return the single-limb remainder.
     There are no constraints on the value of the divisor.
  
-Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009 Free
+Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009, 2012 Free
  Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -58,6 +58,27 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
  #endif
  
+#if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
+/* Duplicates declaratinos in tune/speed.h */
+mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+
+void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
+void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
+
+#undef mpn_mod_1_1p
+#define mpn_mod_1_1p(ap, n, b, pre)                         \
+  (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre)     \
+   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre)  \
+      : __gmpn_mod_1_1p (ap, n, b, pre)))
+
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p_cps(pre, b)                               \
+  (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b)          \
+   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b)       \
+      : __gmpn_mod_1_1p_cps (pre, b)))
+#endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
+
  
  /* The comments in mpn/generic/divrem_1.c apply here too.
  
@@ -118,12 +139,12 @@ mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
    if (UDIV_NEEDS_NORMALIZATION
        && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
      {
+      mp_limb_t nshift;
        for (i = un - 2; i >= 0; i--)
         {
           n0 = up[i] << GMP_NAIL_BITS;
-         udiv_qrnnd (dummy, r, r,
-                     (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
-                     d);
+         nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+         udiv_qrnnd (dummy, r, r, nshift, d);
           r >>= GMP_NAIL_BITS;
           n1 = n0;
         }
@@ -133,19 +154,18 @@ mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
      }
    else
      {
-      mp_limb_t inv;
+      mp_limb_t inv, nshift;
        invert_limb (inv, d);
  
        for (i = un - 2; i >= 0; i--)
         {
           n0 = up[i] << GMP_NAIL_BITS;
-         udiv_qrnnd_preinv (dummy, r, r,
-                            (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
-                            d, inv);
+         nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+         udiv_rnnd_preinv (r, r, nshift, d, inv);
           r >>= GMP_NAIL_BITS;
           n1 = n0;
         }
-      udiv_qrnnd_preinv (dummy, r, r, n1 << cnt, d, inv);
+      udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
        r >>= GMP_NAIL_BITS;
        return r >> cnt;
      }
@@ -191,7 +211,7 @@ mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
        for (i = un - 1; i >= 0; i--)
         {
           n0 = up[i] << GMP_NAIL_BITS;
-         udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
+         udiv_rnnd_preinv (r, r, n0, d, inv);
           r >>= GMP_NAIL_BITS;
         }
        return r;
diff --git a/mpn/generic/mod_1_1.c b/mpn/generic/mod_1_1.c

index c6a61eb6c5d3b3c635e613e8f280fbc36d24ba7d..d0e885d5f54e9b68be88610118d045a6e51ffeab 100644 (file)
--- a/mpn/generic/mod_1_1.c
+++ b/mpn/generic/mod_1_1.c
@@ -1,14 +1,14 @@
  /* mpn_mod_1_1p (ap, n, b, cps)
     Divide (ap,,n) by b.  Return the single-limb remainder.
  
-   Contributed to the GNU project by Torbjorn Granlund.
+   Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
     Based on a suggestion by Peter L. Montgomery.
  
     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2008, 2009 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,6 +29,111 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "longlong.h"
  
+#ifndef MOD_1_1P_METHOD
+# define MOD_1_1P_METHOD 1    /* need to make sure this is 2 for asm testing */
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
+ * carry out, in the form of a mask. */
+
+#if defined (__GNUC__)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)                         \
+  __asm__ (  "add      %6, %k2\n\t"                                    \
+            "adc       %4, %k1\n\t"                                    \
+            "sbb       %k0, %k0"                                       \
+          : "=r" (m), "=r" (s1), "=&r" (s0)                            \
+          : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),                 \
+            "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)                         \
+  __asm__ (  "add      %6, %q2\n\t"                                    \
+            "adc       %4, %q1\n\t"                                    \
+            "sbb       %q0, %q0"                                       \
+          : "=r" (m), "=r" (s1), "=&r" (s0)                            \
+          : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),               \
+            "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)                         \
+  __asm__ (  "addcc    %r5, %6, %2\n\t"                                \
+            "addxcc    %r3, %4, %1\n\t"                                \
+            "subx      %%g0, %%g0, %0"                                 \
+          : "=r" (m), "=r" (sh), "=&r" (sl)                            \
+          : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)                \
+        __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)                         \
+  __asm__ (  "addcc    %r5, %6, %2\n\t"                                \
+            "addccc    %r7, %8, %%g0\n\t"                              \
+            "addccc    %r3, %4, %1\n\t"                                \
+            "clr       %0\n\t"                                         \
+            "movcs     %%xcc, -1, %0"                                  \
+          : "=r" (m), "=r" (sh), "=&r" (sl)                            \
+          : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),               \
+            "rJ" ((al) >> 32), "rI" ((bl) >> 32)                       \
+        __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+   processor running in 32-bit mode, since the carry flag then gets the 32-bit
+   carry.  */
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)                         \
+  __asm__ (  "add%I6c  %2, %5, %6\n\t"                                 \
+            "adde      %1, %3, %4\n\t"                                 \
+            "subfe     %0, %0, %0\n\t"                                 \
+            "nor       %0, %0, %0"                                     \
+          : "=r" (m), "=r" (s1), "=&r" (s0)                            \
+          : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)                         \
+  __asm__ (  "algr     %2, %6\n\t"                                     \
+            "alcgr     %1, %4\n\t"                                     \
+            "lghi      %0, 0\n\t"                                      \
+            "alcgr     %0, %0\n\t"                                     \
+            "lcgr      %0, %0"                                         \
+          : "=r" (m), "=r" (s1), "=&r" (s0)                            \
+          : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),                 \
+            "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
+#endif
+
+#if defined (__arm__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)                         \
+  __asm__ (  "adds     %2, %5, %6\n\t"                                 \
+            "adcs      %1, %3, %4\n\t"                                 \
+            "movcc     %0, #0\n\t"                                     \
+            "movcs     %0, #-1"                                        \
+          : "=r" (m), "=r" (sh), "=&r" (sl)                            \
+          : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_mssaaaa
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)                         \
+  do {                                                                 \
+    UWtype __s0, __s1, __c0, __c1;                                     \
+    __s0 = (a0) + (b0);                                                        \
+    __s1 = (a1) + (b1);                                                        \
+    __c0 = __s0 < (a0);                                                        \
+    __c1 = __s1 < (a1);                                                        \
+    (s0) = __s0;                                                       \
+    __s1 = __s1 + __c0;                                                        \
+    (s1) = __s1;                                                       \
+    (m) = - (__c1 + (__s1 < __c0));                                    \
+  } while (0)
+#endif
+
+#if MOD_1_1P_METHOD == 1
  void
  mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
  {
@@ -41,23 +146,28 @@ mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
    b <<= cnt;
    invert_limb (bi, b);
  
-  if (UNLIKELY (cnt == 0))
-    B1modb = -b;
-  else
-    B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
-  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
-  udiv_rnd_preinv (B2modb, B1modb, b, bi);
-
    cps[0] = bi;
    cps[1] = cnt;
+
+  B1modb = -b;
+  if (LIKELY (cnt != 0))
+    B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
    cps[2] = B1modb >> cnt;
+
+  /* In the normalized case, this can be simplified to
+   *
+   *   B2modb = - b * bi;
+   *   ASSERT (B2modb <= b);    // NB: equality iff b = B/2
+   */
+  udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
    cps[3] = B2modb >> cnt;
  }
  
  mp_limb_t
  mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
  {
-  mp_limb_t rh, rl, bi, q, ph, pl, r;
+  mp_limb_t rh, rl, bi, ph, pl, r;
    mp_limb_t B1modb, B2modb;
    mp_size_t i;
    int cnt;
@@ -68,7 +178,8 @@ mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
    B1modb = bmodb[2];
    B2modb = bmodb[3];
  
-  umul_ppmm (ph, pl, ap[n - 1], B1modb);
+  rl = ap[n - 1];
+  umul_ppmm (ph, pl, rl, B1modb);
    add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
  
    for (i = n - 3; i >= 0; i -= 1)
@@ -84,8 +195,8 @@ mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
        add_ssaaaa (rh, rl, rh, rl, ph, pl);
      }
  
-  bi = bmodb[0];
    cnt = bmodb[1];
+  bi = bmodb[0];
  
    if (LIKELY (cnt != 0))
      rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
@@ -93,7 +204,108 @@ mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
    mask = -(mp_limb_t) (rh >= b);
    rh -= mask & b;
  
-  udiv_qrnnd_preinv (q, r, rh, rl << cnt, b, bi);
+  udiv_rnnd_preinv (r, rh, rl << cnt, b, bi);
+
+  return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 1 */
+
+#if MOD_1_1P_METHOD == 2
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B2modb;
+  int cnt;
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+
+  if (LIKELY (cnt != 0))
+    {
+      mp_limb_t B1modb = -b;
+      B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+      ASSERT (B1modb <= b);            /* NB: not fully reduced mod b */
+      cps[2] = B1modb >> cnt;
+    }
+  B2modb = - b * bi;
+  ASSERT (B2modb <= b);    // NB: equality iff b = B/2
+  cps[3] = B2modb;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
+{
+  int cnt;
+  mp_limb_t bi, B1modb;
+  mp_limb_t r0, r1;
+  mp_limb_t r;
+
+  ASSERT (n >= 2);             /* fix tuneup.c if this is changed */
+
+  r0 = ap[n-2];
+  r1 = ap[n-1];
+
+  if (n > 2)
+    {
+      mp_limb_t B2modb, B2mb;
+      mp_limb_t p0, p1;
+      mp_limb_t r2;
+      mp_size_t j;
+
+      B2modb = bmodb[3];
+      B2mb = B2modb - b;
+
+      umul_ppmm (p1, p0, r1, B2modb);
+      add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
+
+      for (j = n-4; j >= 0; j--)
+       {
+         mp_limb_t cy;
+         /* mp_limb_t t = r0 + B2mb; */
+         umul_ppmm (p1, p0, r1, B2modb);
+
+         ADDC_LIMB (cy, r0, r0, r2 & B2modb);
+         /* Alternative, for cmov: if (cy) r0 = t; */
+         r0 -= (-cy) & b;
+         add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
+       }
+
+      r1 -= (r2 & b);
+    }
+
+  cnt = bmodb[1];
+
+  if (LIKELY (cnt != 0))
+    {
+      mp_limb_t t;
+      mp_limb_t B1modb = bmodb[2];
+
+      umul_ppmm (r1, t, r1, B1modb);
+      r0 += t;
+      r1 += (r0 < t);
+
+      /* Normalize */
+      r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
+      r0 <<= cnt;
+
+      /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows
+        that. */
+    }
+  else
+    {
+      mp_limb_t mask = -(mp_limb_t) (r1 >= b);
+      r1 -= mask & b;
+    }
+
+  bi = bmodb[0];
  
+  udiv_rnnd_preinv (r, r1, r0, b, bi);
    return r >> cnt;
  }
+#endif /* MOD_1_1P_METHOD == 2 */
diff --git a/mpn/generic/mod_1_2.c b/mpn/generic/mod_1_2.c

index c81b2026f7643c5a126f9175b5a2e7a194e68f49..7fec9504c9ff514340146d033e0af79d74bb030e 100644 (file)
--- a/mpn/generic/mod_1_2.c
+++ b/mpn/generic/mod_1_2.c
@@ -3,12 +3,13 @@
     Requires that b < B / 2.
  
     Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
  
     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2008, 2009 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -43,15 +44,17 @@ mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
    b <<= cnt;
    invert_limb (bi, b);
  
-  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
-  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
-  udiv_rnd_preinv (B2modb, B1modb, b, bi);
-  udiv_rnd_preinv (B3modb, B2modb, b, bi);
-
    cps[0] = bi;
    cps[1] = cnt;
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
    cps[2] = B1modb >> cnt;
+
+  udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
    cps[3] = B2modb >> cnt;
+
+  udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
    cps[4] = B3modb >> cnt;
  
  #if WANT_ASSERT
@@ -70,7 +73,7 @@ mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
  mp_limb_t
  mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
  {
-  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
    mp_limb_t B1modb, B2modb, B3modb;
    mp_size_t i;
    int cnt;
@@ -88,7 +91,7 @@ mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
           rl = ap[n - 1];
           bi = cps[0];
           cnt = cps[1];
-         udiv_qrnnd_preinv (q, r, rl >> (GMP_LIMB_BITS - cnt),
+         udiv_rnnd_preinv (r, rl >> (GMP_LIMB_BITS - cnt),
                              rl << cnt, b, bi);
           return r >> cnt;
         }
@@ -101,8 +104,8 @@ mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
      }
    else
      {
-      umul_ppmm (rh, rl, ap[n - 1], B1modb);
-      add_ssaaaa (rh, rl, rh, rl, 0, ap[n - 2]);
+      rh = ap[n - 1];
+      rl = ap[n - 2];
      }
  
    for (i = n - 4; i >= 0; i -= 2)
@@ -122,20 +125,14 @@ mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
        add_ssaaaa (rh, rl, rh, rl, ph, pl);
      }
  
-  bi = cps[0];
-  cnt = cps[1];
-
-#if 1
    umul_ppmm (rh, cl, rh, B1modb);
    add_ssaaaa (rh, rl, rh, rl, 0, cl);
-  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
-  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
-                    (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
-  ASSERT (q <= 2);     /* optimize for small quotient? */
-#endif
  
-  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+  cnt = cps[1];
+  bi = cps[0];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
  
    return r >> cnt;
  }
diff --git a/mpn/generic/mod_1_3.c b/mpn/generic/mod_1_3.c

index c7c629963af65f1bfea98dbce00d52119e2cafbf..75d1cca3cb48cf36fbe1bd414ab07a988ebe389a 100644 (file)
--- a/mpn/generic/mod_1_3.c
+++ b/mpn/generic/mod_1_3.c
@@ -3,12 +3,13 @@
     Requires that d < B / 3.
  
     Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
  
     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010, 2013 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -43,17 +44,20 @@ mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
    b <<= cnt;
    invert_limb (bi, b);
  
-  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
-  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
-  udiv_rnd_preinv (B2modb, B1modb, b, bi);
-  udiv_rnd_preinv (B3modb, B2modb, b, bi);
-  udiv_rnd_preinv (B4modb, B3modb, b, bi);
-
    cps[0] = bi;
    cps[1] = cnt;
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
    cps[2] = B1modb >> cnt;
+
+  udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
    cps[3] = B2modb >> cnt;
+
+  udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
    cps[4] = B3modb >> cnt;
+
+  udiv_rnnd_preinv (B4modb, B3modb, 0, b, bi);
    cps[5] = B4modb >> cnt;
  
  #if WANT_ASSERT
@@ -72,7 +76,7 @@ mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
  mp_limb_t
  mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
  {
-  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
    mp_limb_t B1modb, B2modb, B3modb, B4modb;
    mp_size_t i;
    int cnt;
@@ -102,8 +106,8 @@ mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
        n -= 1;
        break;
      case 1:    /* n mod 3 = 2 */
-      umul_ppmm (ph, pl, ap[n - 1], B1modb);
-      add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
+      rh = ap[n - 1];
+      rl = ap[n - 2];
        n -= 2;
        break;
      }
@@ -129,20 +133,14 @@ mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
        add_ssaaaa (rh, rl, rh, rl, ph, pl);
      }
  
-  bi = cps[0];
-  cnt = cps[1];
-
-#if 1
    umul_ppmm (rh, cl, rh, B1modb);
    add_ssaaaa (rh, rl, rh, rl, 0, cl);
-  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
-  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
-                    (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
-  ASSERT (q <= 3);     /* optimize for small quotient? */
-#endif
  
-  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+  cnt = cps[1];
+  bi = cps[0];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
  
    return r >> cnt;
  }
diff --git a/mpn/generic/mod_1_4.c b/mpn/generic/mod_1_4.c

index bde191b9561ad7c589ec18c790a104c14dd4e105..570ff43e217bfe6aad6ff09b14eb08b3fbad235a 100644 (file)
--- a/mpn/generic/mod_1_4.c
+++ b/mpn/generic/mod_1_4.c
@@ -1,14 +1,15 @@
-/* mpn_mod_1s_3p (ap, n, b, cps)
+/* mpn_mod_1s_4p (ap, n, b, cps)
     Divide (ap,,n) by b.  Return the single-limb remainder.
     Requires that d < B / 4.
  
     Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
  
     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2008, 2009 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -43,19 +44,23 @@ mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
    b <<= cnt;
    invert_limb (bi, b);
  
-  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
-  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
-  udiv_rnd_preinv (B2modb, B1modb, b, bi);
-  udiv_rnd_preinv (B3modb, B2modb, b, bi);
-  udiv_rnd_preinv (B4modb, B3modb, b, bi);
-  udiv_rnd_preinv (B5modb, B4modb, b, bi);
-
    cps[0] = bi;
    cps[1] = cnt;
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
    cps[2] = B1modb >> cnt;
+
+  udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
    cps[3] = B2modb >> cnt;
+
+  udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
    cps[4] = B3modb >> cnt;
+
+  udiv_rnnd_preinv (B4modb, B3modb, 0, b, bi);
    cps[5] = B4modb >> cnt;
+
+  udiv_rnnd_preinv (B5modb, B4modb, 0, b, bi);
    cps[6] = B5modb >> cnt;
  
  #if WANT_ASSERT
@@ -74,7 +79,7 @@ mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
  mp_limb_t
  mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
  {
-  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
    mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
    mp_size_t i;
    int cnt;
@@ -104,8 +109,8 @@ mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
        n -= 1;
        break;
      case 2:
-      umul_ppmm (ph, pl, ap[n - 1], B1modb);
-      add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
+      rh = ap[n - 1];
+      rl = ap[n - 2];
        n -= 2;
        break;
      case 3:
@@ -142,20 +147,14 @@ mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
        add_ssaaaa (rh, rl, rh, rl, ph, pl);
      }
  
-  bi = cps[0];
-  cnt = cps[1];
-
-#if 1
    umul_ppmm (rh, cl, rh, B1modb);
    add_ssaaaa (rh, rl, rh, rl, 0, cl);
-  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
-  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
-                    (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
-  ASSERT (q <= 4);     /* optimize for small quotient? */
-#endif
  
-  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+  cnt = cps[1];
+  bi = cps[0];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
  
    return r >> cnt;
  }
diff --git a/mpn/generic/mu_bdiv_qr.c b/mpn/generic/mu_bdiv_qr.c

index 312d011a142704f2b1b473ee10db8b8a952c63b8..fabdc4690d16bca3b177707c77d9fb7d967eccc6 100644 (file)
--- a/mpn/generic/mu_bdiv_qr.c
+++ b/mpn/generic/mu_bdiv_qr.c
@@ -8,7 +8,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
  
-Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -64,9 +64,7 @@ mpn_mu_bdiv_qr (mp_ptr qp,
    mp_size_t qn;
    mp_size_t in;
    mp_limb_t cy, c0;
-  int k;
    mp_size_t tn, wn;
-  mp_size_t i;
  
    qn = nn - dn;
  
diff --git a/mpn/generic/mu_div_qr.c b/mpn/generic/mu_div_qr.c

index 34a2abb5e0ad1c93fd059753d4fcb779642b8c2a..b7aaa70b6fc5c3cf88fe350d29ab680ac2a5a218 100644 (file)
--- a/mpn/generic/mu_div_qr.c
+++ b/mpn/generic/mu_div_qr.c
@@ -247,7 +247,7 @@ mpn_preinv_mu_div_qr (mp_ptr qp,
    if (qh != 0)
      mpn_sub_n (rp, np, dp, dn);
    else
-    MPN_COPY (rp, np, dn);
+    MPN_COPY_INCR (rp, np, dn);
  
    if (qn == 0)
      return qh;                 /* Degenerate use.  Should we allow this? */
diff --git a/mpn/generic/mul.c b/mpn/generic/mul.c

index c176b45a27bc5c477f59f6b7db8adbc3eba6f287..8e8ce5817dcc0cbb50e92e17702632c5712b68c3 100644 (file)
--- a/mpn/generic/mul.c
+++ b/mpn/generic/mul.c
@@ -3,7 +3,7 @@
     Contributed to the GNU project by Torbjorn Granlund.
  
  Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005,
-2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+2006, 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -28,6 +28,39 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MUL_BASECASE_MAX_UN 500
  #endif
  
+/* Areas where the different toom algorithms can be called (extracted
+   from the t-toom*.c files, and ignoring small constant offsets):
+
+   1/6  1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5   1 vn/un
+                                        4/7              6/7
+                                      6/11
+                                       |--------------------| toom22 (small)
+                                                           || toom22 (large)
+                                                       |xxxx| toom22 called
+                      |-------------------------------------| toom32
+                                         |xxxxxxxxxxxxxxxx| | toom32 called
+                                               |------------| toom33
+                                                          |x| toom33 called
+             |---------------------------------|            | toom42
+                     |xxxxxxxxxxxxxxxxxxxxxxxx|            | toom42 called
+                                       |--------------------| toom43
+                                               |xxxxxxxxxx|   toom43 called
+         |-----------------------------|                      toom52 (unused)
+                                                   |--------| toom44
+                                                  |xxxxxxxx| toom44 called
+                              |--------------------|        | toom53
+                                        |xxxxxx|              toom53 called
+    |-------------------------|                               toom62 (unused)
+                                           |----------------| toom54 (unused)
+                      |--------------------|                  toom63
+                             |xxxxxxxxx|                   | toom63 called
+                          |---------------------------------| toom6h
+                                                  |xxxxxxxx| toom6h called
+                                  |-------------------------| toom8h (32 bit)
+                 |------------------------------------------| toom8h (64 bit)
+                                                  |xxxxxxxx| toom8h called
+*/
+
  #define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
  #define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
  
@@ -170,7 +203,7 @@ mpn_mul (mp_ptr prodp,
        /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
          square to a (3vn-1)*vn rectangle.  Leaving such a rectangle is hardly
          wise; we would get better balance by slightly moving the bound.  We
-        will sometimes end up with un < vn, like the the X3 arm below.  */
+        will sometimes end up with un < vn, like in the X3 arm below.  */
        if (un >= 3 * vn)
         {
           mp_limb_t cy;
diff --git a/mpn/generic/mul_basecase.c b/mpn/generic/mul_basecase.c

index 726bd6741fb61e6ca6ddff04541e914464204ed5..0eac22473a759ddbf2a451c3586e82e7482397c8 100644 (file)
--- a/mpn/generic/mul_basecase.c
+++ b/mpn/generic/mul_basecase.c
@@ -4,7 +4,6 @@
     THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
     SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
  
-
  Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
  Foundation, Inc.
  
diff --git a/mpn/generic/mul_fft.c b/mpn/generic/mul_fft.c

index 798f83df1cf33f242ff5627094dc45da1d699c92..e2555c7d143719f58e0367f4acd2a20a523150d3 100644 (file)
--- a/mpn/generic/mul_fft.c
+++ b/mpn/generic/mul_fft.c
@@ -7,7 +7,7 @@
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
  Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
-2009, 2010 Free Software Foundation, Inc.
+2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -64,11 +64,11 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define HAVE_NATIVE_mpn_add_n_sub_n 1
  #endif
  
-static mp_limb_t mpn_mul_fft_internal
-__GMP_PROTO ((mp_ptr, mp_size_t, int, mp_ptr *, mp_ptr *,
-             mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_size_t, int **, mp_ptr, int));
-static void mpn_mul_fft_decompose
-__GMP_PROTO ((mp_ptr, mp_ptr *, int, int, mp_srcptr, mp_size_t, int, int, mp_ptr));
+static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *,
+                                      mp_ptr *, mp_ptr, mp_ptr, mp_size_t,
+                                      mp_size_t, mp_size_t, int **, mp_ptr, int);
+static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, int, mp_srcptr,
+                                  mp_size_t, int, int, mp_ptr);
  
  
  /* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
@@ -81,7 +81,7 @@ __GMP_PROTO ((mp_ptr, mp_ptr *, int, int, mp_srcptr, mp_size_t, int, int, mp_ptr
  
  #if TUNE_PROGRAM_BUILD || (defined (MUL_FFT_TABLE3) && defined (SQR_FFT_TABLE3))
  
-#ifndef FFT_TABLE3_SIZE                /* When tuning, this is define in gmp-impl.h */
+#ifndef FFT_TABLE3_SIZE                /* When tuning this is defined in gmp-impl.h */
  #if defined (MUL_FFT_TABLE3_SIZE) && defined (SQR_FFT_TABLE3_SIZE)
  #if MUL_FFT_TABLE3_SIZE > SQR_FFT_TABLE3_SIZE
  #define FFT_TABLE3_SIZE MUL_FFT_TABLE3_SIZE
diff --git a/mpn/generic/mullo_n.c b/mpn/generic/mullo_n.c

index 24c2c3c5ab4f290bdad55b8678b3e5b5bc2f0550..8c39b2bff0806fea71f1fc697cf32518b5469ca8 100644 (file)
--- a/mpn/generic/mullo_n.c
+++ b/mpn/generic/mullo_n.c
@@ -7,7 +7,7 @@
     FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
     THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2004, 2005, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2004, 2005, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -40,7 +40,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MULLO_MUL_N_THRESHOLD MUL_FFT_THRESHOLD
  #endif
  
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
  #define MAYBE_range_basecase 1
  #define MAYBE_range_toom22   1
  #else
diff --git a/mpn/generic/mulmid.c b/mpn/generic/mulmid.c

new file mode 100644 (file)

index 0000000..67afd09
--- /dev/null
+++ b/mpn/generic/mulmid.c
@@ -0,0 +1,245 @@
+/* mpn_mulmid -- middle product
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
+
+
+void
+mpn_mulmid (mp_ptr rp,
+            mp_srcptr ap, mp_size_t an,
+            mp_srcptr bp, mp_size_t bn)
+{
+  mp_size_t rn, k;
+  mp_ptr scratch, temp;
+
+  ASSERT (an >= bn);
+  ASSERT (bn >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
+  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
+
+  if (bn < MULMID_TOOM42_THRESHOLD)
+    {
+      /* region not tall enough to make toom42 worthwhile for any portion */
+
+      if (an < CHUNK)
+       {
+         /* region not too wide either, just call basecase directly */
+         mpn_mulmid_basecase (rp, ap, an, bp, bn);
+         return;
+       }
+
+      /* Region quite wide. For better locality, use basecase on chunks:
+
+        AAABBBCC..
+        .AAABBBCC.
+        ..AAABBBCC
+      */
+
+      k = CHUNK - bn + 1;    /* number of diagonals per chunk */
+
+      /* first chunk (marked A in the above diagram) */
+      mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+
+      /* remaining chunks (B, C, etc) */
+      an -= k;
+
+      while (an >= CHUNK)
+       {
+         mp_limb_t t0, t1, cy;
+         ap += k, rp += k;
+         t0 = rp[0], t1 = rp[1];
+         mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+         ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
+         MPN_INCR_U (rp + 1, k + 1, t1 + cy);
+         an -= k;
+       }
+
+      if (an >= bn)
+       {
+         /* last remaining chunk */
+         mp_limb_t t0, t1, cy;
+         ap += k, rp += k;
+         t0 = rp[0], t1 = rp[1];
+         mpn_mulmid_basecase (rp, ap, an, bp, bn);
+         ADDC_LIMB (cy, rp[0], rp[0], t0);
+         MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
+       }
+
+      return;
+    }
+
+  /* region is tall enough for toom42 */
+
+  rn = an - bn + 1;
+
+  if (rn < MULMID_TOOM42_THRESHOLD)
+    {
+      /* region not wide enough to make toom42 worthwhile for any portion */
+
+      TMP_DECL;
+
+      if (bn < CHUNK)
+       {
+         /* region not too tall either, just call basecase directly */
+         mpn_mulmid_basecase (rp, ap, an, bp, bn);
+         return;
+       }
+
+      /* Region quite tall. For better locality, use basecase on chunks:
+
+        AAAAA....
+        .AAAAA...
+        ..BBBBB..
+        ...BBBBB.
+        ....CCCCC
+      */
+
+      TMP_MARK;
+
+      temp = TMP_ALLOC_LIMBS (rn + 2);
+
+      /* first chunk (marked A in the above diagram) */
+      bp += bn - CHUNK, an -= bn - CHUNK;
+      mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
+
+      /* remaining chunks (B, C, etc) */
+      bn -= CHUNK;
+
+      while (bn >= CHUNK)
+       {
+         ap += CHUNK, bp -= CHUNK;
+         mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
+         mpn_add_n (rp, rp, temp, rn + 2);
+         bn -= CHUNK;
+       }
+
+      if (bn)
+       {
+         /* last remaining chunk */
+         ap += CHUNK, bp -= bn;
+         mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
+         mpn_add_n (rp, rp, temp, rn + 2);
+       }
+
+      TMP_FREE;
+      return;
+    }
+
+  /* we're definitely going to use toom42 somewhere */
+
+  if (bn > rn)
+    {
+      /* slice region into chunks, use toom42 on all chunks except possibly
+        the last:
+
+         AA....
+         .AA...
+         ..BB..
+         ...BB.
+         ....CC
+      */
+
+      TMP_DECL;
+      TMP_MARK;
+
+      temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
+      scratch = temp + rn + 2;
+
+      /* first chunk (marked A in the above diagram) */
+      bp += bn - rn;
+      mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
+
+      /* remaining chunks (B, C, etc) */
+      bn -= rn;
+
+      while (bn >= rn)
+        {
+          ap += rn, bp -= rn;
+         mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
+          mpn_add_n (rp, rp, temp, rn + 2);
+          bn -= rn;
+        }
+
+      if (bn)
+        {
+          /* last remaining chunk */
+          ap += rn, bp -= bn;
+         mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
+          mpn_add_n (rp, rp, temp, rn + 2);
+        }
+
+      TMP_FREE;
+    }
+  else
+    {
+      /* slice region into chunks, use toom42 on all chunks except possibly
+        the last:
+
+         AAABBBCC..
+         .AAABBBCC.
+         ..AAABBBCC
+      */
+
+      TMP_DECL;
+      TMP_MARK;
+
+      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
+
+      /* first chunk (marked A in the above diagram) */
+      mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+
+      /* remaining chunks (B, C, etc) */
+      rn -= bn;
+
+      while (rn >= bn)
+        {
+         mp_limb_t t0, t1, cy;
+          ap += bn, rp += bn;
+          t0 = rp[0], t1 = rp[1];
+          mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+         ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
+         MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
+         rn -= bn;
+        }
+
+      TMP_FREE;
+
+      if (rn)
+        {
+          /* last remaining chunk */
+         mp_limb_t t0, t1, cy;
+          ap += bn, rp += bn;
+          t0 = rp[0], t1 = rp[1];
+          mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
+         ADDC_LIMB (cy, rp[0], rp[0], t0);
+         MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
+        }
+    }
+}
diff --git a/mpn/generic/mulmid_basecase.c b/mpn/generic/mulmid_basecase.c

new file mode 100644 (file)

index 0000000..e926cc4
--- /dev/null
+++ b/mpn/generic/mulmid_basecase.c
@@ -0,0 +1,72 @@
+/* mpn_mulmid_basecase -- classical middle product algorithm
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Middle product of {up,un} and {vp,vn}, write result to {rp,un-vn+3}.
+   Must have un >= vn >= 1.
+
+   Neither input buffer may overlap with the output buffer. */
+
+void
+mpn_mulmid_basecase (mp_ptr rp,
+                     mp_srcptr up, mp_size_t un,
+                     mp_srcptr vp, mp_size_t vn)
+{
+  mp_limb_t lo, hi;  /* last two limbs of output */
+  mp_limb_t cy;
+
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, up, un));
+  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, vp, vn));
+
+  up += vn - 1;
+  un -= vn - 1;
+
+  /* multiply by first limb, store result */
+  lo = mpn_mul_1 (rp, up, un, vp[0]);
+  hi = 0;
+
+  /* accumulate remaining rows */
+  for (vn--; vn; vn--)
+    {
+      up--, vp++;
+      cy = mpn_addmul_1 (rp, up, un, vp[0]);
+      add_ssaaaa (hi, lo, hi, lo, 0, cy);
+    }
+
+  /* store final limbs */
+#if GMP_NAIL_BITS != 0
+  hi = (hi << GMP_NAIL_BITS) + (lo >> GMP_NUMB_BITS);
+  lo &= GMP_NUMB_MASK;
+#endif
+
+  rp[un] = lo;
+  rp[un + 1] = hi;
+}
diff --git a/mpn/generic/mulmid_n.c b/mpn/generic/mulmid_n.c

new file mode 100644 (file)

index 0000000..b78f007
--- /dev/null
+++ b/mpn/generic/mulmid_n.c
@@ -0,0 +1,51 @@
+/* mpn_mulmid_n -- balanced middle product
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpn_mulmid_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+  if (n < MULMID_TOOM42_THRESHOLD)
+    {
+      mpn_mulmid_basecase (rp, ap, 2*n - 1, bp, n);
+    }
+  else
+    {
+      mp_ptr scratch;
+      TMP_DECL;
+      TMP_MARK;
+      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (n));
+      mpn_toom42_mulmid (rp, ap, bp, n, scratch);
+      TMP_FREE;
+    }
+}
diff --git a/mpn/generic/mulmod_bnm1.c b/mpn/generic/mulmod_bnm1.c

index df0eca815bb4694edabf0ca26bb8a9c5851376d5..44bfe90a90e958fe79ff79065ecdb0288b9f9728 100644 (file)
--- a/mpn/generic/mulmod_bnm1.c
+++ b/mpn/generic/mulmod_bnm1.c
@@ -7,7 +7,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
diff --git a/mpn/generic/perfpow.c b/mpn/generic/perfpow.c

index 709e2bb69de2174f04579d638c07fa2d289ba09f..b62dee89ab2f4572f9e5c06abaf9cbf653c90623 100644 (file)
--- a/mpn/generic/perfpow.c
+++ b/mpn/generic/perfpow.c
@@ -2,7 +2,7 @@
  
     Contributed to the GNU project by Martin Boij.
  
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,34 +26,38 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define SMALL 20
  #define MEDIUM 100
  
-/*
-   Returns non-zero if {np,nn} == {xp,xn} ^ k.
+/* Return non-zero if {np,nn} == {xp,xn} ^ k.
     Algorithm:
-       For s = 1, 2, 4, ..., s_max, compute the s least significant
-       limbs of {xp,xn}^k. Stop if they don't match the s least
-       significant limbs of {np,nn}.
-*/
+       For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of
+       {xp,xn}^k. Stop if they don't match the s least significant limbs of
+       {np,nn}.
+
+   FIXME: Low xn limbs can be expected to always match, if computed as a mod
+   B^{xn} root. So instead of using mpn_powlo, compute an approximation of the
+   most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and
+   compare to {np, nn}. Or use an even cruder approximation based on fix-point
+   base 2 logarithm.  */
  static int
-pow_equals (mp_srcptr np, mp_size_t nn,
+pow_equals (mp_srcptr np, mp_size_t n,
             mp_srcptr xp,mp_size_t xn,
             mp_limb_t k, mp_bitcnt_t f,
             mp_ptr tp)
  {
    mp_limb_t *tp2;
-  mp_bitcnt_t y, z, count;
+  mp_bitcnt_t y, z;
    mp_size_t i, bn;
    int ans;
    mp_limb_t h, l;
    TMP_DECL;
  
-  ASSERT (nn > 1 || (nn == 1 && np[0] > 1));
-  ASSERT (np[nn - 1] > 0);
+  ASSERT (n > 1 || (n == 1 && np[0] > 1));
+  ASSERT (np[n - 1] > 0);
    ASSERT (xn > 0);
  
    if (xn == 1 && xp[0] == 1)
      return 0;
  
-  z = 1 + (nn >> 1);
+  z = 1 + (n >> 1);
    for (bn = 1; bn < z; bn <<= 1)
      {
        mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
@@ -63,19 +67,18 @@ pow_equals (mp_srcptr np, mp_size_t nn,
  
    TMP_MARK;
  
-  /* Final check. Estimate the size of {xp,xn}^k before computing
-     the power with full precision.
-     Optimization: It might pay off to make a more accurate estimation of
-     the logarithm of {xp,xn}, rather than using the index of the MSB.
-  */
+  /* Final check. Estimate the size of {xp,xn}^k before computing the power
+     with full precision.  Optimization: It might pay off to make a more
+     accurate estimation of the logarithm of {xp,xn}, rather than using the
+     index of the MSB.  */
  
-  count_leading_zeros (count, xp[xn - 1]);
-  y = xn * GMP_LIMB_BITS - count - 1;  /* msb_index (xp, xn) */
+  MPN_SIZEINBASE_2EXP(y, xp, xn, 1);
+  y -= 1;  /* msb_index (xp, xn) */
  
    umul_ppmm (h, l, k, y);
    h -= l == 0;  l--;   /* two-limb decrement */
  
-  z = f - 1; /* msb_index (np, nn) */
+  z = f - 1; /* msb_index (np, n) */
    if (h == 0 && l <= z)
      {
        mp_limb_t size;
@@ -86,7 +89,7 @@ pow_equals (mp_srcptr np, mp_size_t nn,
        tp2 = TMP_ALLOC_LIMBS (y);
  
        i = mpn_pow_1 (tp, xp, xn, k, tp2);
-      if (i == nn && mpn_cmp (tp, np, nn) == 0)
+      if (i == n && mpn_cmp (tp, np, n) == 0)
         ans = 1;
        else
         ans = 0;
@@ -100,140 +103,39 @@ pow_equals (mp_srcptr np, mp_size_t nn,
    return ans;
  }
  
-/*
-   Computes rp such that rp^k * yp = 1 (mod 2^b).
-   Algorithm:
-       Apply Hensel lifting repeatedly, each time
-       doubling (approx.) the number of known bits in rp.
-*/
-static void
-binv_root (mp_ptr rp, mp_srcptr yp,
-          mp_limb_t k, mp_size_t bn,
-          mp_bitcnt_t b, mp_ptr tp)
-{
-  mp_limb_t *tp2 = tp + bn, *tp3 = tp + 2 * bn, di, k2 = k + 1;
-  mp_bitcnt_t order[GMP_LIMB_BITS * 2];
-  int i, d = 0;
-
-  ASSERT (bn > 0);
-  ASSERT (b > 0);
-  ASSERT ((k & 1) != 0);
-
-  binvert_limb (di, k);
-
-  rp[0] = 1;
-  for (; b != 1; b = (b + 1) >> 1)
-    order[d++] = b;
-
-  for (i = d - 1; i >= 0; i--)
-    {
-      b = order[i];
-      bn = 1 + (b - 1) / GMP_LIMB_BITS;
-
-      mpn_mul_1 (tp, rp, bn, k2);
-
-      mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
-      mpn_mullo_n (rp, yp, tp2, bn);
-
-      mpn_sub_n (tp2, tp, rp, bn);
-      mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, di, 0);
-      if ((b % GMP_LIMB_BITS) != 0)
-       rp[(b - 1) / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
-    }
-  return;
-}
  
-/*
-   Computes rp such that rp^2 * yp = 1 (mod 2^{b+1}).
-   Returns non-zero if such an integer rp exists.
-*/
-static int
-binv_sqroot (mp_ptr rp, mp_srcptr yp,
-            mp_size_t bn, mp_bitcnt_t b,
-            mp_ptr tp)
-{
-  mp_limb_t k = 3, *tp2 = tp + bn, *tp3 = tp + (bn << 1);
-  mp_bitcnt_t order[GMP_LIMB_BITS * 2];
-  int i, d = 0;
-
-  ASSERT (bn > 0);
-  ASSERT (b > 0);
-
-  rp[0] = 1;
-  if (b == 1)
-    {
-      if ((yp[0] & 3) != 1)
-       return 0;
-    }
-  else
-    {
-      if ((yp[0] & 7) != 1)
-       return 0;
-
-      for (; b != 2; b = (b + 2) >> 1)
-       order[d++] = b;
-
-      for (i = d - 1; i >= 0; i--)
-       {
-         b = order[i];
-         bn = 1 + b / GMP_LIMB_BITS;
-
-         mpn_mul_1 (tp, rp, bn, k);
-
-         mpn_powlo (tp2, rp, &k, 1, bn, tp3);
-         mpn_mullo_n (rp, yp, tp2, bn);
-
-#if HAVE_NATIVE_mpn_rsh1sub_n
-         mpn_rsh1sub_n (rp, tp, rp, bn);
-#else
-         mpn_sub_n (tp2, tp, rp, bn);
-         mpn_rshift (rp, tp2, bn, 1);
-#endif
-         rp[b / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
-       }
-    }
-  return 1;
-}
-
-/*
-   Returns non-zero if {np,nn} is a kth power.
-*/
+/* Return non-zero if N = {np,n} is a kth power.
+   I = {ip,n} = N^(-1) mod B^n.  */
  static int
  is_kth_power (mp_ptr rp, mp_srcptr np,
-             mp_limb_t k, mp_srcptr yp,
-             mp_size_t nn, mp_bitcnt_t f,
+             mp_limb_t k, mp_srcptr ip,
+             mp_size_t n, mp_bitcnt_t f,
               mp_ptr tp)
  {
-  mp_limb_t x, c;
    mp_bitcnt_t b;
-  mp_size_t i, rn, xn;
+  mp_size_t rn, xn;
  
-  ASSERT (nn > 0);
-  ASSERT (((k & 1) != 0) || (k == 2));
+  ASSERT (n > 0);
+  ASSERT ((k & 1) != 0 || k == 2);
    ASSERT ((np[0] & 1) != 0);
  
    if (k == 2)
      {
        b = (f + 1) >> 1;
        rn = 1 + b / GMP_LIMB_BITS;
-      if (binv_sqroot (rp, yp, rn, b, tp) != 0)
+      if (mpn_bsqrtinv (rp, ip, b, tp) != 0)
         {
+         rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
           xn = rn;
           MPN_NORMALIZE (rp, xn);
-         if (pow_equals (np, nn, rp, xn, k, f, tp) != 0)
+         if (pow_equals (np, n, rp, xn, k, f, tp) != 0)
             return 1;
  
-         /* Check if (2^b - rp)^2 == np */
-         c = 0;
-         for (i = 0; i < rn; i++)
-           {
-             x = rp[i];
-             rp[i] = -x - c;
-             c |= (x != 0);
-           }
-         rp[rn - 1] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
+         /* Check if (2^b - r)^2 == n */
+         mpn_neg (rp, rp, rn);
+         rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
           MPN_NORMALIZE (rp, rn);
-         if (pow_equals (np, nn, rp, rn, k, f, tp) != 0)
+         if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
             return 1;
         }
      }
@@ -241,9 +143,11 @@ is_kth_power (mp_ptr rp, mp_srcptr np,
      {
        b = 1 + (f - 1) / k;
        rn = 1 + (b - 1) / GMP_LIMB_BITS;
-      binv_root (rp, yp, k, rn, b, tp);
+      mpn_brootinv (rp, ip, rn, k, tp);
+      if ((b % GMP_LIMB_BITS) != 0)
+       rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
        MPN_NORMALIZE (rp, rn);
-      if (pow_equals (np, nn, rp, rn, k, f, tp) != 0)
+      if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
         return 1;
      }
    MPN_ZERO (rp, rn); /* Untrash rp */
@@ -251,17 +155,18 @@ is_kth_power (mp_ptr rp, mp_srcptr np,
  }
  
  static int
-perfpow (mp_srcptr np, mp_size_t nn,
+perfpow (mp_srcptr np, mp_size_t n,
          mp_limb_t ub, mp_limb_t g,
          mp_bitcnt_t f, int neg)
  {
-  mp_limb_t *yp, *tp, k = 0, *rp1;
-  int ans = 0;
+  mp_ptr ip, tp, rp;
+  mp_limb_t k;
+  int ans;
    mp_bitcnt_t b;
    gmp_primesieve_t ps;
    TMP_DECL;
  
-  ASSERT (nn > 0);
+  ASSERT (n > 0);
    ASSERT ((np[0] & 1) != 0);
    ASSERT (ub > 0);
  
@@ -269,18 +174,25 @@ perfpow (mp_srcptr np, mp_size_t nn,
    gmp_init_primesieve (&ps);
    b = (f + 3) >> 1;
  
-  yp = TMP_ALLOC_LIMBS (nn);
-  rp1 = TMP_ALLOC_LIMBS (nn);
-  tp = TMP_ALLOC_LIMBS (5 * nn);       /* FIXME */
-  MPN_ZERO (rp1, nn);
-
-  mpn_binvert (yp, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
+  ip = TMP_ALLOC_LIMBS (n);
+  rp = TMP_ALLOC_LIMBS (n);
+  tp = TMP_ALLOC_LIMBS (5 * n);                /* FIXME */
+  MPN_ZERO (rp, n);
+
+  /* FIXME: It seems the inverse in ninv is needed only to get non-inverted
+     roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and
+     similarly for nth roots. It should be more efficient to compute n^{1/2} as
+     n * n^{-1/2}, with a mullo instead of a binvert. And we can do something
+     similar for kth roots if we switch to an iteration converging to n^{1/k -
+     1}, and we can then eliminate this binvert call. */
+  mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
    if (b % GMP_LIMB_BITS)
-    yp[(b - 1) / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
+    ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
  
    if (neg)
      gmp_nextprime (&ps);
  
+  ans = 0;
    if (g > 0)
      {
        ub = MIN (ub, g + 1);
@@ -288,7 +200,7 @@ perfpow (mp_srcptr np, mp_size_t nn,
         {
           if ((g % k) == 0)
             {
-             if (is_kth_power (rp1, np, k, yp, nn, f, tp) != 0)
+             if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
                 {
                   ans = 1;
                   goto ret;
@@ -300,7 +212,7 @@ perfpow (mp_srcptr np, mp_size_t nn,
      {
        while ((k = gmp_nextprime (&ps)) < ub)
         {
-         if (is_kth_power (rp1, np, k, yp, nn, f, tp) != 0)
+         if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
             {
               ans = 1;
               goto ret;
@@ -314,35 +226,38 @@ perfpow (mp_srcptr np, mp_size_t nn,
  
  static const unsigned short nrtrial[] = { 100, 500, 1000 };
  
-/* Table of (log_{p_i} 2) values, where p_i is
-   the (nrtrial[i] + 1)'th prime number.
-*/
-static const double logs[] = { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
+/* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime
+   number.  */
+static const double logs[] =
+  { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
  
  int
-mpn_perfect_power_p (mp_srcptr np, mp_size_t nn)
+mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
  {
    mp_size_t ncn, s, pn, xn;
-  mp_limb_t *nc, factor, g = 0;
+  mp_limb_t *nc, factor, g;
    mp_limb_t exp, *prev, *next, d, l, r, c, *tp, cry;
-  mp_bitcnt_t twos = 0, count;
-  int ans, where = 0, neg = 0, trial;
+  mp_bitcnt_t twos, count;
+  int ans, where, neg, trial;
    TMP_DECL;
  
    nc = (mp_ptr) np;
  
-  if (nn < 0)
+  neg = 0;
+  if (n < 0)
      {
        neg = 1;
-      nn = -nn;
+      n = -n;
      }
  
-  if (nn == 0 || (nn == 1 && np[0] == 1))
+  if (n == 0 || (n == 1 && np[0] == 1))
      return 1;
  
    TMP_MARK;
  
-  ncn = nn;
+  g = 0;
+
+  ncn = n;
    twos = mpn_scan1 (np, 0);
    if (twos > 0)
      {
@@ -352,13 +267,13 @@ mpn_perfect_power_p (mp_srcptr np, mp_size_t nn)
           goto ret;
         }
        s = twos / GMP_LIMB_BITS;
-      if (s + 1 == nn && POW2_P (np[s]))
+      if (s + 1 == n && POW2_P (np[s]))
         {
           ans = ! (neg && POW2_P (twos));
           goto ret;
         }
        count = twos % GMP_LIMB_BITS;
-      ncn = nn - s;
+      ncn = n - s;
        nc = TMP_ALLOC_LIMBS (ncn);
        if (count > 0)
         {
@@ -379,6 +294,7 @@ mpn_perfect_power_p (mp_srcptr np, mp_size_t nn)
    else
      trial = 2;
  
+  where = 0;
    factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
  
    if (factor != 0)
@@ -389,10 +305,8 @@ mpn_perfect_power_p (mp_srcptr np, mp_size_t nn)
           MPN_COPY (nc, np, ncn);
         }
  
-      /* Remove factors found by trialdiv.
-        Optimization: Perhaps better to use
-        the strategy in mpz_remove ().
-      */
+      /* Remove factors found by trialdiv.  Optimization: Perhaps better to use
+        the strategy in mpz_remove ().  */
        prev = TMP_ALLOC_LIMBS (ncn + 2);
        next = TMP_ALLOC_LIMBS (ncn + 2);
        tp = TMP_ALLOC_LIMBS (4 * ncn);
@@ -482,8 +396,7 @@ mpn_perfect_power_p (mp_srcptr np, mp_size_t nn)
        while (factor != 0);
      }
  
-  count_leading_zeros (count, nc[ncn-1]);
-  count = GMP_LIMB_BITS * ncn - count;   /* log (nc) + 1 */
+  MPN_SIZEINBASE_2EXP(count, nc, ncn, 1);   /* log (nc) + 1 */
    d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
    ans = perfpow (nc, ncn, d, g, count, neg);
  
diff --git a/mpn/generic/perfsqr.c b/mpn/generic/perfsqr.c

index 1c65dfb05c545c859f60658aa267a245ac64f32e..892da1ae729b20f3c81343e818d9b495ef870a47 100644 (file)
--- a/mpn/generic/perfsqr.c
+++ b/mpn/generic/perfsqr.c
@@ -1,8 +1,8 @@
  /* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
     zero otherwise.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -102,8 +102,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  /* FIXME: The %= here isn't good, and might destroy any savings from keeping
     the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
     Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
-   and a shift count, like mpn_preinv_divrem_1.         But mod_34lsub1 is our
-   normal case, so lets not worry too much about mod_1.         */
+   and a shift count, like mpn_preinv_divrem_1.  But mod_34lsub1 is our
+   normal case, so lets not worry too much about mod_1.  */
  #define PERFSQR_MOD_PP(r, up, usize)                                   \
    do {                                                                 \
      if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD))      \
@@ -145,7 +145,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    } while (0)
  
  /* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
-   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch. */
+   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch.  */
  #define PERFSQR_MOD_2(r, d, inv, mhi, mlo)                     \
    do {                                                         \
      mp_limb_t  m;                                              \
@@ -185,7 +185,7 @@ mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
    /* Check that we have even multiplicity of 2, and then check that the rest is
       a possible perfect square.  Leave disabled until we can determine this
       really is an improvement.  It it is, it could completely replace the
-     simple probe above, since this should through out more non-squares, but at
+     simple probe above, since this should throw out more non-squares, but at
       the expense of somewhat more cycles.  */
    {
      mp_limb_t lo;
diff --git a/mpn/generic/popham.c b/mpn/generic/popham.c

index d81ad9dd988d77d53870d78e8bd5af558f576271..4aa0674d570762bf26d87fc6c5d7f65afbd08308 100644 (file)
--- a/mpn/generic/popham.c
+++ b/mpn/generic/popham.c
@@ -1,6 +1,7 @@
  /* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
  
-Copyright 1994, 1996, 2000, 2001, 2002, 2005, 2011 Free Software Foundation, Inc.
+Copyright 1994, 1996, 2000, 2001, 2002, 2005, 2011, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
diff --git a/mpn/generic/powlo.c b/mpn/generic/powlo.c

index 7eb64540da7e033a3625f647c9fc498dbbce09b8..f6cc2bd97e3271639c44d49a8d5fc345249f9a08 100644 (file)
--- a/mpn/generic/powlo.c
+++ b/mpn/generic/powlo.c
@@ -1,6 +1,6 @@
  /* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
  
-Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -81,8 +81,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
  
    TMP_MARK;
  
-  count_leading_zeros (cnt, ep[en - 1]);
-  ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
+  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
  
    windowsize = win_size (ebi);
  
diff --git a/mpn/generic/powm.c b/mpn/generic/powm.c

index 8c58795bd9432f1ec9d6c097dc929b960fd323d8..14a73934f5ab521d1e975b216d56bfa91ea3b885 100644 (file)
--- a/mpn/generic/powm.c
+++ b/mpn/generic/powm.c
@@ -6,7 +6,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -74,6 +74,24 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "longlong.h"
  
+#undef MPN_REDC_1
+#define MPN_REDC_1(rp, up, mp, n, invm)                                        \
+  do {                                                                 \
+    mp_limb_t cy;                                                      \
+    cy = mpn_redc_1 (rp, up, mp, n, invm);                             \
+    if (cy != 0)                                                       \
+      mpn_sub_n (rp, rp, mp, n);                                       \
+  } while (0)
+
+#undef MPN_REDC_2
+#define MPN_REDC_2(rp, up, mp, n, mip)                                 \
+  do {                                                                 \
+    mp_limb_t cy;                                                      \
+    cy = mpn_redc_2 (rp, up, mp, n, mip);                              \
+    if (cy != 0)                                                       \
+      mpn_sub_n (rp, rp, mp, n);                                       \
+  } while (0)
+
  #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
  #define WANT_REDC_2 1
  #endif
@@ -155,8 +173,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
  
    TMP_MARK;
  
-  count_leading_zeros (cnt, ep[en - 1]);
-  ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
+  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
  
  #if 0
    if (bn < n)
@@ -212,12 +229,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
    mpn_sqr (tp, this_pp, n);
  #if WANT_REDC_2
    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
    else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-    mpn_redc_2 (rp, tp, mp, n, mip);
+    MPN_REDC_2 (rp, tp, mp, n, mip);
  #else
    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
  #endif
    else
      mpn_redc_n (rp, tp, mp, n, mip);
@@ -229,12 +246,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
        this_pp += n;
  #if WANT_REDC_2
        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-       mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+       MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
        else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-       mpn_redc_2 (this_pp, tp, mp, n, mip);
+       MPN_REDC_2 (this_pp, tp, mp, n, mip);
  #else
        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-       mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+       MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
  #endif
        else
         mpn_redc_n (this_pp, tp, mp, n, mip);
@@ -301,23 +318,51 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
      {
        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
         {
+         if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
+             || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+           {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
+             INNERLOOP;
+           }
+         else
+           {
  #undef MPN_MUL_N
  #undef MPN_SQR
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
-         INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
+             INNERLOOP;
+           }
         }
        else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
         {
+         if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+             || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+           {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_2 (rp, tp, mp, n, mip)
+             INNERLOOP;
+           }
+         else
+           {
  #undef MPN_MUL_N
  #undef MPN_SQR
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_2 (rp, tp, mp, n, mip)
-         INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_2 (rp, tp, mp, n, mip)
+             INNERLOOP;
+           }
         }
        else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
         {
@@ -326,7 +371,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_2 (rp, tp, mp, n, mip)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_2 (rp, tp, mp, n, mip)
           INNERLOOP;
         }
        else
@@ -344,13 +389,27 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
      {
        if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
         {
+         if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+             || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+           {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
+             INNERLOOP;
+           }
+         else
+           {
  #undef MPN_MUL_N
  #undef MPN_SQR
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
-         INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
+             INNERLOOP;
+           }
         }
        else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
         {
@@ -359,7 +418,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
           INNERLOOP;
         }
        else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
@@ -369,7 +428,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_2 (rp, tp, mp, n, mip)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_2 (rp, tp, mp, n, mip)
           INNERLOOP;
         }
        else
@@ -390,23 +449,51 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
      {
        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
         {
+         if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
+             || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+           {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
+             INNERLOOP;
+           }
+         else
+           {
  #undef MPN_MUL_N
  #undef MPN_SQR
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
-         INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
+             INNERLOOP;
+           }
         }
        else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
         {
+         if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+             || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+           {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_n (rp, tp, mp, n, mip)
+             INNERLOOP;
+           }
+         else
+           {
  #undef MPN_MUL_N
  #undef MPN_SQR
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
  #define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_n (rp, tp, mp, n, mip)
-         INNERLOOP;
+             INNERLOOP;
+           }
         }
        else
         {
@@ -423,13 +510,27 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
      {
        if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
         {
+         if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+             || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+           {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
+             INNERLOOP;
+           }
+         else
+           {
  #undef MPN_MUL_N
  #undef MPN_SQR
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
-         INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
+             INNERLOOP;
+           }
         }
        else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
         {
@@ -438,7 +539,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
  #undef MPN_REDUCE
  #define MPN_MUL_N(r,a,b,n)             mpn_mul_n (r,a,b,n)
  #define MPN_SQR(r,a,n)                 mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)     mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1 (rp, tp, mp, n, mip[0])
           INNERLOOP;
         }
        else
@@ -461,12 +562,12 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
  
  #if WANT_REDC_2
    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
    else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-    mpn_redc_2 (rp, tp, mp, n, mip);
+    MPN_REDC_2 (rp, tp, mp, n, mip);
  #else
    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    mpn_redc_1 (rp, tp, mp, n, mip[0]);
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
  #endif
    else
      mpn_redc_n (rp, tp, mp, n, mip);
diff --git a/mpn/generic/powm_sec.c b/mpn/generic/powm_sec.c

index 196030866b52eeee7c47352954ca2aa4f92fb616..ba7c0c72b067d8eb19f551384fa3a00977044664 100644 (file)
--- a/mpn/generic/powm_sec.c
+++ b/mpn/generic/powm_sec.c
@@ -47,15 +47,36 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
     * Choose window size without looping.  (Superoptimize or think(tm).)
  
-   * Call new division functions, not mpn_tdiv_qr.
+   * Call side-channel silent division function for converting to REDC residue.
+
+   * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
+     redc_1 and redc_n.  On such systems, we will switch to redc_2 causing
+     slowdown.
  */
  
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "longlong.h"
  
-#define WANT_CACHE_SECURITY 1
+#undef MPN_REDC_1_SEC
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm)                            \
+  do {                                                                 \
+    mp_limb_t cy;                                                      \
+    cy = mpn_redc_1 (rp, up, mp, n, invm);                             \
+    mpn_subcnd_n (rp, rp, mp, n, cy);                                  \
+  } while (0)
  
+#undef MPN_REDC_2_SEC
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip)                             \
+  do {                                                                 \
+    mp_limb_t cy;                                                      \
+    cy = mpn_redc_2 (rp, up, mp, n, mip);                              \
+    mpn_subcnd_n (rp, rp, mp, n, cy);                                  \
+  } while (0)
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
  
  /* Define our own mpn squaring function.  We do this since we cannot use a
     native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
@@ -158,7 +179,7 @@ mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp)
  #endif
  
  #define getbit(p,bi) \
-  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+  ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
  
  static inline mp_limb_t
  getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
@@ -174,50 +195,78 @@ getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
    else
      {
        bi -= nbits;                     /* bit index of low bit to extract */
-      i = bi / GMP_LIMB_BITS;          /* word index of low bit to extract */
-      bi %= GMP_LIMB_BITS;             /* bit index in low word */
+      i = bi / GMP_NUMB_BITS;          /* word index of low bit to extract */
+      bi %= GMP_NUMB_BITS;             /* bit index in low word */
        r = p[i] >> bi;                  /* extract (low) bits */
-      nbits_in_r = GMP_LIMB_BITS - bi; /* number of bits now in r */
+      nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
        if (nbits_in_r < nbits)          /* did we get enough bits? */
         r += p[i + 1] << nbits_in_r;    /* prepend bits from higher word */
        return r & (((mp_limb_t ) 1 << nbits) - 1);
      }
  }
  
+#ifndef POWM_SEC_TABLE
+#if GMP_NUMB_BITS < 50
+#define POWM_SEC_TABLE  2,33,96,780,2741
+#else
+#define POWM_SEC_TABLE  2,130,524,2578
+#endif
+#endif
+
+#if TUNE_PROGRAM_BUILD
+extern int win_size (mp_bitcnt_t);
+#else
  static inline int
  win_size (mp_bitcnt_t eb)
  {
    int k;
-  static mp_bitcnt_t x[] = {0,4,27,100,325,1026,2905,7848,20457,51670,~(mp_bitcnt_t)0};
+  static mp_bitcnt_t x[] = {0,POWM_SEC_TABLE,~(mp_bitcnt_t)0};
    for (k = 1; eb > x[k]; k++)
      ;
    return k;
  }
+#endif
  
-/* Convert U to REDC form, U_r = B^n * U mod M */
+/* Convert U to REDC form, U_r = B^n * U mod M.
+   Uses scratch space at tp of size 2un + n + 1.  */
  static void
  redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
  {
+#if 0
    mp_ptr qp;
  
-  qp = tp + un + n;
+  qp = tp + un + n;            /* un + n - n + 1 = un + 1 limbs */
  
    MPN_ZERO (tp, n);
    MPN_COPY (tp + n, up, un);
+
    mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
+#else
+  /* FIXME: Use passed scratch space instead of allocating our own!  */
+  mp_ptr scratch;
+  TMP_DECL;
+  TMP_MARK;
+
+  MPN_ZERO (tp, n);
+  MPN_COPY (tp + n, up, un);
+
+  scratch = TMP_ALLOC_LIMBS ((un + n) + 2 * n + 2);
+  mpn_sb_div_r_sec (tp, un + n, mp, n, scratch);
+  MPN_COPY (rp, tp, n);
+  TMP_FREE;
+#endif
  }
  
  /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
-   Requires that mp[n-1..0] is odd.  FIXME: is this true?
-   Requires that ep[en-1..0] is > 1.
-   Uses scratch space at tp of 3n+1 limbs.  */
+   Requires that mp[n-1..0] is odd.
+   Requires that ep[en-1..0] > 1.
+   Uses scratch space at tp as defined by mpn_powm_sec_itch.  */
  void
  mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
               mp_srcptr ep, mp_size_t en,
               mp_srcptr mp, mp_size_t n, mp_ptr tp)
  {
-  mp_limb_t minv;
-  int cnt;
+  mp_limb_t ip[2], *mip;
    mp_bitcnt_t ebi;
    int windowsize, this_windowsize;
    mp_limb_t expbits;
@@ -228,28 +277,63 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
    ASSERT (en > 1 || (en == 1 && ep[0] > 0));
    ASSERT (n >= 1 && ((mp[0] & 1) != 0));
  
-  count_leading_zeros (cnt, ep[en - 1]);
-  ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
+  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
  
    windowsize = win_size (ebi);
  
-  binvert_limb (minv, mp[0]);
-  minv = -minv;
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+  else
+    {
+      mip = ip;
+      mpn_binvert (mip, mp, 2, tp);
+      mip[0] = -mip[0]; mip[1] = ~mip[1];
+    }
+#else
+  mip = ip;
+  binvert_limb (mip[0], mp[0]);
+  mip[0] = -mip[0];
+#endif
+
  
-  pp = tp + 4 * n;
+  pp = tp;
+  tp += (n << windowsize);     /* put tp after power table */
  
+  /* Compute pp[0] table entry */
+  /* scratch: |   n   | 1 |   n+2    |  */
+  /*          | pp[0] | 1 | redcify  |  */
    this_pp = pp;
    this_pp[n] = 1;
-  redcify (this_pp, this_pp + n, 1, mp, n, tp + 6 * n);
+  redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
    this_pp += n;
-  redcify (this_pp, bp, bn, mp, n, tp + 6 * n);
+
+  /* Compute pp[1] table entry.  To avoid excessive scratch usage in the
+     degenerate situation where B >> M, we let redcify use scratch space which
+     will later be used by the pp table (element 2 and up).  */
+  /* scratch: |   n   |   n   |  bn + n + 1  |  */
+  /*          | pp[0] | pp[1] |   redcify    |  */
+  redcify (this_pp, bp, bn, mp, n, this_pp + n);
  
    /* Precompute powers of b and put them in the temporary area at pp.  */
+  /* scratch: |   n   |   n   | ...  |                    |   2n      |  */
+  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  product  |  */
    for (i = (1 << windowsize) - 2; i > 0; i--)
      {
        mpn_mul_basecase (tp, this_pp, n, pp + n, n);
        this_pp += n;
-      mpn_redc_1_sec (this_pp, tp, mp, n, minv);
+#if WANT_REDC_2
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+       MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+      else
+       MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+#else
+      MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+#endif
      }
  
    expbits = getbits (ep, ebi, windowsize);
@@ -258,83 +342,105 @@ mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
    else
      ebi -= windowsize;
  
-#if WANT_CACHE_SECURITY
    mpn_tabselect (rp, pp, n, 1 << windowsize, expbits);
-#else
-  MPN_COPY (rp, pp + n * expbits, n);
-#endif
  
-  while (ebi != 0)
-    {
-      expbits = getbits (ep, ebi, windowsize);
-      this_windowsize = windowsize;
-      if (ebi < windowsize)
-       {
-         this_windowsize -= windowsize - ebi;
-         ebi = 0;
-       }
-      else
-       ebi -= windowsize;
-
-      do
-       {
-         mpn_local_sqr (tp, rp, n, tp + 2 * n);
-         mpn_redc_1_sec (rp, tp, mp, n, minv);
-         this_windowsize--;
-       }
-      while (this_windowsize != 0);
+  /* Main exponentiation loop.  */
+  /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
+  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
+
+#define INNERLOOP                                                      \
+  while (ebi != 0)                                                     \
+    {                                                                  \
+      expbits = getbits (ep, ebi, windowsize);                         \
+      this_windowsize = windowsize;                                    \
+      if (ebi < windowsize)                                            \
+       {                                                               \
+         this_windowsize -= windowsize - ebi;                          \
+         ebi = 0;                                                      \
+       }                                                               \
+      else                                                             \
+       ebi -= windowsize;                                              \
+                                                                       \
+      do                                                               \
+       {                                                               \
+         mpn_local_sqr (tp, rp, n, tp + 2 * n);                        \
+         MPN_REDUCE (rp, tp, mp, n, mip);                              \
+         this_windowsize--;                                            \
+       }                                                               \
+      while (this_windowsize != 0);                                    \
+                                                                       \
+      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);       \
+      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);                       \
+                                                                       \
+      MPN_REDUCE (rp, tp, mp, n, mip);                                 \
+    }
  
-#if WANT_CACHE_SECURITY
-      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
-      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+      INNERLOOP;
+    }
+  else
+    {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_2_SEC (rp, tp, mp, n, mip)
+      INNERLOOP;
+    }
  #else
-      mpn_mul_basecase (tp, rp, n, pp + n * expbits, n);
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n)             mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n)                 mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)     MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+  INNERLOOP;
  #endif
-      mpn_redc_1_sec (rp, tp, mp, n, minv);
-    }
  
    MPN_COPY (tp, rp, n);
    MPN_ZERO (tp + n, n);
-  mpn_redc_1_sec (rp, tp, mp, n, minv);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+  else
+    MPN_REDC_2_SEC (rp, tp, mp, n, mip);
+#else
+  MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+#endif
    cnd = mpn_sub_n (tp, rp, mp, n);     /* we need just retval */
    mpn_subcnd_n (rp, rp, mp, n, !cnd);
  }
  
-#if ! HAVE_NATIVE_mpn_tabselect
-/* Select entry `which' from table `tab', which has nents entries, each `n'
-   limbs.  Store the selected entry at rp.  Reads entire table to avoid
-   side-channel information leaks.  O(n*nents).
-   FIXME: Move to its own file.  */
-void
-mpn_tabselect (volatile mp_limb_t *rp, volatile mp_limb_t *tab, mp_size_t n,
-              mp_size_t nents, mp_size_t which)
-{
-  mp_size_t k, i;
-  mp_limb_t mask;
-  volatile mp_limb_t *tp;
-
-  for (k = 0; k < nents; k++)
-    {
-      mask = -(mp_limb_t) (which == k);
-      tp = tab + n * k;
-      for (i = 0; i < n; i++)
-       {
-         rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
-       }
-    }
-}
-#endif
-
  mp_size_t
  mpn_powm_sec_itch (mp_size_t bn, mp_size_t en, mp_size_t n)
  {
    int windowsize;
    mp_size_t redcify_itch, itch;
  
+  /* The top scratch usage will either be when reducing B in the 2nd redcify
+     call, or more typically n*2^windowsize + 3n or 4n, in the main loop.  (It
+     is 3n or 4n depending on if we use mpn_local_sqr or a native
+     mpn_sqr_basecase.  We assume 4n always for now.) */
+
    windowsize = win_size (en * GMP_NUMB_BITS); /* slight over-estimate of exp */
-  itch = 4 * n + (n << windowsize);
-  redcify_itch = 2 * bn + n + 1;
-  /* The 6n is due to the placement of reduce scratch 6n into the start of the
-     scratch area.  */
-  return MAX (itch, redcify_itch + 6 * n);
+
+  /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
+     the 2bn + n + 1 term is due to redcify's own usage.  */
+  redcify_itch = (2 * n) + (2 * bn + n + 1);
+
+  /* The n * 2^windowsize term is due to the power table, the 4n term is due to
+     scratch needs of squaring/multiplication in the exponentiation loop.  */
+  itch = (n << windowsize) + (4 * n);
+
+  return MAX (itch, redcify_itch);
  }
diff --git a/mpn/generic/pre_mod_1.c b/mpn/generic/pre_mod_1.c

index 961733ba34453623077387436d3b4b9053915a57..1486d6e2e74c9194f0f74a60d12adee3b5847a9a 100644 (file)
--- a/mpn/generic/pre_mod_1.c
+++ b/mpn/generic/pre_mod_1.c
@@ -34,7 +34,6 @@ mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
  {
    mp_size_t i;
    mp_limb_t n0, r;
-  mp_limb_t dummy;
  
    ASSERT (un >= 1);
    ASSERT (d & GMP_LIMB_HIGHBIT);
@@ -46,7 +45,7 @@ mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
    for (i = un - 2; i >= 0; i--)
      {
        n0 = up[i];
-      udiv_qrnnd_preinv (dummy, r, r, n0, d, dinv);
+      udiv_rnnd_preinv (r, r, n0, d, dinv);
      }
    return r;
  }
diff --git a/mpn/generic/random2.c b/mpn/generic/random2.c

index 7d3da9fa4d8edb5355bad19305a2dd0d74705060..a778d4901286e6ea476a0cd5b0f277603b0e37d3 100644 (file)
--- a/mpn/generic/random2.c
+++ b/mpn/generic/random2.c
@@ -1,7 +1,7 @@
  /* mpn_random2 -- Generate random numbers with relatively long strings
     of ones and zeroes.  Suitable for border testing.
  
-Copyright 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software
+Copyright 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -22,7 +22,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp.h"
  #include "gmp-impl.h"
  
-static void gmp_rrandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_bitcnt_t));
+static void gmp_rrandomb (mp_ptr, gmp_randstate_t, mp_bitcnt_t);
  
  /* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
     Thus, we get the same random number sequence in the common cases.
diff --git a/mpn/generic/redc_1.c b/mpn/generic/redc_1.c

index 177f3932f925c3adf359989562f644c225b0f2fc..8c84aea0e62a2c716d561696b0036c104f6fddb2 100644 (file)
--- a/mpn/generic/redc_1.c
+++ b/mpn/generic/redc_1.c
@@ -1,10 +1,11 @@
-/* mpn_redc_1.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_1.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
     mp[] is n limbs; up[] is 2n limbs.
  
     THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
     SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
  
-Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,7 +25,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp.h"
  #include "gmp-impl.h"
  
-void
+mp_limb_t
  mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
  {
    mp_size_t j;
@@ -40,7 +41,7 @@ mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
        up[0] = cy;
        up++;
      }
+
    cy = mpn_add_n (rp, up, up - n, n);
-  if (cy != 0)
-    mpn_sub_n (rp, rp, mp, n);
+  return cy;
  }
diff --git a/mpn/generic/redc_1_sec.c b/mpn/generic/redc_1_sec.c

deleted file mode 100644 (file)

index 3d91438..0000000
--- a/mpn/generic/redc_1_sec.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/* mpn_redc_1_sec.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
-   mp[] is n limbs; up[] is 2n limbs.
-
-   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-
-Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_redc_1_sec (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
-{
-  mp_size_t j;
-  mp_limb_t cy;
-
-  ASSERT (n > 0);
-  ASSERT_MPN (up, 2*n);
-
-  for (j = n - 1; j >= 0; j--)
-    {
-      cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
-      ASSERT (up[0] == 0);
-      up[0] = cy;
-      up++;
-    }
-  cy = mpn_add_n (rp, up, up - n, n);
-  mpn_subcnd_n (rp, rp, mp, n, cy);
-}
diff --git a/mpn/generic/redc_2.c b/mpn/generic/redc_2.c

index 2b27586618e9ff046e0e046f6223abb520a91b28..efc1e925444ccda12a55c83aedd6e10cb805a549 100644 (file)
--- a/mpn/generic/redc_2.c
+++ b/mpn/generic/redc_2.c
@@ -1,10 +1,11 @@
-/* mpn_redc_2.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_2.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
     mp[] is n limbs; up[] is 2n limbs.
  
     THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
     SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
  
-Copyright (C) 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -33,7 +34,8 @@ you lose
  /* For testing purposes, define our own mpn_addmul_2 if there is none already
     available.  */
  #ifndef HAVE_NATIVE_mpn_addmul_2
-mp_limb_t
+#undef mpn_addmul_2
+static mp_limb_t
  mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
  {
    rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);
@@ -66,7 +68,7 @@ mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
    } while (0)
  #endif
  
-void
+mp_limb_t
  mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
  {
    mp_limb_t q[2];
@@ -92,7 +94,7 @@ mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
        up[n] = upn;
        up += 2;
      }
+
    cy = mpn_add_n (rp, up, up - n, n);
-  if (cy != 0)
-    mpn_sub_n (rp, rp, mp, n);
+  return cy;
  }
diff --git a/mpn/generic/redc_n.c b/mpn/generic/redc_n.c

index 99f618ff4839c81bae4ac4b497686bac9a42741b..debfba2218589aa068089b93410c8ef10e2f268a 100644 (file)
--- a/mpn/generic/redc_n.c
+++ b/mpn/generic/redc_n.c
@@ -1,10 +1,10 @@
-/* mpn_redc_n.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_n.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
     mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.
  
     THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
     SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
  
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -32,6 +32,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      assumption.
  
    * Decrease scratch usage.
+
+  * Consider removing the residue canonicalisation.
  */
  
  void
@@ -43,6 +45,8 @@ mpn_redc_n (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr ip)
    TMP_DECL;
    TMP_MARK;
  
+  ASSERT (n > 8);
+
    rn = mpn_mulmod_bnm1_next_size (n);
  
    scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n));
diff --git a/mpn/generic/remove.c b/mpn/generic/remove.c

index 427a46fbd1cb04c54bf65e25a5aabcba5ae010c9..446b259b783a0abdabf8c876e970c3c9af42f512 100644 (file)
--- a/mpn/generic/remove.c
+++ b/mpn/generic/remove.c
@@ -7,7 +7,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
  
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -47,6 +47,25 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     * If we allow ourselves to clobber U, we could save the other of qp and qp2.
  */
  
+/* FIXME: We need to wrap mpn_bdiv_qr due to the itch interface.  This need
+   indicates a flaw in the current itch mechanism: Which operands not greater
+   than un,un will incur the worst itch?  We need a parallel foo_maxitch set
+   of functions.  */
+static void
+mpn_bdiv_qr_wrap (mp_ptr qp, mp_ptr rp,
+                 mp_srcptr np, mp_size_t nn,
+                 mp_srcptr dp, mp_size_t dn)
+{
+  mp_ptr scratch_out;
+  TMP_DECL;
+
+  TMP_MARK;
+  scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (nn, dn));
+  mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch_out);
+
+  TMP_FREE;
+}
+
  mp_bitcnt_t
  mpn_remove (mp_ptr wp, mp_size_t *wn,
             mp_ptr up, mp_size_t un, mp_ptr vp, mp_size_t vn,
@@ -55,7 +74,7 @@ mpn_remove (mp_ptr wp, mp_size_t *wn,
    mp_ptr    pwpsp[LOG];
    mp_size_t pwpsn[LOG];
    mp_size_t npowers;
-  mp_ptr tp, qp, np, pp, qp2, scratch_out;
+  mp_ptr tp, qp, np, pp, qp2;
    mp_size_t pn, nn, qn, i;
    mp_bitcnt_t pwr;
    TMP_DECL;
@@ -67,25 +86,21 @@ mpn_remove (mp_ptr wp, mp_size_t *wn,
  
    TMP_MARK;
  
-  tp = TMP_ALLOC_LIMBS ((un + vn) / 2); /* remainder */
-  qp = TMP_ALLOC_LIMBS (un);           /* quotient, alternating */
-  qp2 = TMP_ALLOC_LIMBS (un);          /* quotient, alternating */
+  tp = TMP_ALLOC_LIMBS ((un + 1 + vn) / 2); /* remainder */
+  qp = TMP_ALLOC_LIMBS (un + 1);       /* quotient, alternating */
+  qp2 = TMP_ALLOC_LIMBS (un + 1);      /* quotient, alternating */
    np = TMP_ALLOC_LIMBS (un + LOG);     /* powers of V */
    pp = vp;
    pn = vn;
  
-  /* FIXME: This allocation need indicate a flaw in the current itch mechanism:
-     Which operands not greater than un,un will incur the worst itch?  We need
-     a parallel foo_maxitch set of functions.  */
-  scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (un, un >> 1));
-
    MPN_COPY (qp, up, un);
    qn = un;
  
    npowers = 0;
    while (qn >= pn)
      {
-      mpn_bdiv_qr (qp2, tp, qp, qn, pp, pn, scratch_out);
+      qp[qn] = 0;
+      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
        if (!mpn_zero_p (tp, pn))
         break;                  /* could not divide by V^npowers */
  
@@ -100,7 +115,7 @@ mpn_remove (mp_ptr wp, mp_size_t *wn,
        if (((mp_bitcnt_t) 2 << npowers) - 1 > cap)
         break;
  
-      nn = 2 * pn - 1;         /* next power will be at least this many limbs */
+      nn = 2 * pn - 1;         /* next power will be at least this large */
        if (nn > qn)
         break;                  /* next power would be overlarge */
  
@@ -123,7 +138,8 @@ mpn_remove (mp_ptr wp, mp_size_t *wn,
        if (pwr + ((mp_bitcnt_t) 1 << i) > cap)
         continue;               /* V^i would bring us past cap */
  
-      mpn_bdiv_qr (qp2, tp, qp, qn, pp, pn, scratch_out);
+      qp[qn] = 0;
+      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
        if (!mpn_zero_p (tp, pn))
         continue;               /* could not divide by V^i */
  
diff --git a/mpn/generic/rootrem.c b/mpn/generic/rootrem.c

index 272b95c1346518f7d252f97f9c1f0a2ca3caa7aa..b3664121409fd0a57744451a036c5494fda4068a 100644 (file)
--- a/mpn/generic/rootrem.c
+++ b/mpn/generic/rootrem.c
@@ -8,7 +8,7 @@
     ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2002, 2005, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -79,14 +79,15 @@ mp_size_t
  mpn_rootrem (mp_ptr rootp, mp_ptr remp,
              mp_srcptr up, mp_size_t un, mp_limb_t k)
  {
+  mp_size_t m;
    ASSERT (un > 0);
    ASSERT (up[un - 1] != 0);
    ASSERT (k > 1);
  
-  if ((remp == NULL) && (un / k > 2))
-    /* call mpn_rootrem recursively, padding {up,un} with k zero limbs,
-       which will produce an approximate root with one more limb,
-       so that in most cases we can conclude. */
+  m = (un - 1) / k;            /* ceil(un/k) - 1 */
+  if (remp == NULL && m > 2)
+    /* Pad {up,un} with k zero limbs.  This will produce an approximate root
+       with one more limb, allowing us to compute the exact integral result. */
      {
        mp_ptr sp, wp;
        mp_size_t rn, sn, wn;
@@ -94,21 +95,21 @@ mpn_rootrem (mp_ptr rootp, mp_ptr remp,
        TMP_MARK;
        wn = un + k;
        wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */
-      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
+      sn = m + 2; /* ceil(un/k) + 1 */
        sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */
        MPN_COPY (wp + k, up, un);
        MPN_ZERO (wp, k);
        rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
-      /* the approximate root S = {sp,sn} is either the correct root of
-        {sp,sn}, or one too large. Thus unless the least significant limb
-        of S is 0 or 1, we can deduce the root of {up,un} is S truncated by
-        one limb. (In case sp[0]=1, we can deduce the root, but not decide
+      /* The approximate root S = {sp,sn} is either the correct root of
+        {sp,sn}, or 1 too large.  Thus unless the least significant limb of
+        S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
+        limb.  (In case sp[0]=1, we can deduce the root, but not decide
          whether it is exact or not.) */
        MPN_COPY (rootp, sp + 1, sn - 1);
        TMP_FREE;
        return rn;
      }
-  else /* remp <> NULL */
+  else
      {
        return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
      }
@@ -124,7 +125,6 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
    mp_limb_t save, save2, cy;
    unsigned long int unb; /* number of significant bits of {up,un} */
    unsigned long int xnb; /* number of significant bits of the result */
-  unsigned int cnt;
    unsigned long b, kk;
    unsigned long sizes[GMP_NUMB_BITS + 1];
    int ni, i;
@@ -134,16 +134,6 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
  
    TMP_MARK;
  
-  /* qp and wp need enough space to store S'^k where S' is an approximate
-     root. Since S' can be as large as S+2, the worst case is when S=2 and
-     S'=4. But then since we know the number of bits of S in advance, S'
-     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
-     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
-     fits in un limbs, the number of extra limbs needed is bounded by
-     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
-#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
-  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
-                                       of R/(k*S^(k-1)), and S^k */
    if (remp == NULL)
      {
        rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */
@@ -155,10 +145,8 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
        rp = remp;
      }
    sp = rootp;
-  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
-                                       and temporary for mpn_pow_1 */
-  count_leading_zeros (cnt, up[un - 1]);
-  unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
+
+  MPN_SIZEINBASE_2EXP(unb, up, un, 1);
    /* unb is the number of bits of the input U */
  
    xnb = (unb - 1) / k + 1;     /* ceil (unb / k) */
@@ -217,6 +205,19 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
       Newton iteration will first compute sizes[ni-1] extra bits,
       then sizes[ni-2], ..., then sizes[0] = b. */
  
+  /* qp and wp need enough space to store S'^k where S' is an approximate
+     root. Since S' can be as large as S+2, the worst case is when S=2 and
+     S'=4. But then since we know the number of bits of S in advance, S'
+     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+     fits in un limbs, the number of extra limbs needed is bounded by
+     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
+                                       of R/(k*S^(k-1)), and S^k */
+  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+                                       and temporary for mpn_pow_1 */
+
    wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
    wn = 1;
    for (i = ni; i != 0; i--)
@@ -292,12 +293,7 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
         }
        else
         {
-         mp_ptr tp;
           qn = rn - wn; /* expected quotient size */
-         /* tp must have space for wn limbs.
-            The quotient needs rn-wn+1 limbs, thus quotient+remainder
-            need altogether rn+1 limbs. */
-         tp = qp + qn + 1;     /* put remainder in Q buffer */
           mpn_div_q (qp, rp, rn, wp, wn, scratch);
           qn += qp[qn] != 0;
         }
@@ -393,7 +389,7 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
        ASSERT_ALWAYS (rn >= qn);
  
        /* R = R - Q = floor(U/2^kk) - S^k */
-      if ((i > 1) || (approx == 0))
+      if (i > 1 || approx == 0)
         {
           mpn_sub (rp, rp, rn, qp, qn);
           MPN_NORMALIZE (rp, rn);
diff --git a/mpn/generic/sb_div_sec.c b/mpn/generic/sb_div_sec.c

new file mode 100644 (file)

index 0000000..d47e7e2
--- /dev/null
+++ b/mpn/generic/sb_div_sec.c
@@ -0,0 +1,105 @@
+/* mpn_sb_div_qr_sec, mpn_sb_div_r_sec -- Compute Q = floor(U / V), U = U mod
+   V.  Side-channel silent under the assumption that the used instructions are
+   side-channel silent.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if OPERATION_sb_div_qr_sec
+/* Needs (nn + dn + 1) + mpn_sbpi1_div_qr_sec's needs of (2nn' - dn + 1) for a
+   total of 3nn + 4 limbs at tp.  Note that mpn_sbpi1_div_qr_sec's nn is one
+   greater than ours, therefore +4 and not just +2.  */
+#define FNAME mpn_sb_div_qr_sec
+#define Q(q) q,
+#endif
+#if OPERATION_sb_div_r_sec
+/* Needs (nn + dn + 1) + mpn_sbpi1_div_r_sec's needs of (dn + 1) for a total of
+   nn + 2dn + 2 limbs at tp.  */
+#define FNAME mpn_sb_div_r_sec
+#define Q(q)
+#endif
+
+void
+FNAME (Q(mp_ptr qp)
+       mp_ptr np, mp_size_t nn,
+       mp_srcptr dp, mp_size_t dn,
+       mp_ptr tp)
+{
+  mp_limb_t d1, d0, qh;
+  unsigned int cnt;
+  mp_ptr np2, dp2;
+  gmp_pi1_t dinv;
+  mp_limb_t inv32;
+  mp_limb_t cy;
+
+  ASSERT (dn >= 1);
+  ASSERT (nn >= dn);
+  ASSERT (dp[dn - 1] != 0);
+
+  d1 = dp[dn - 1];
+  count_leading_zeros (cnt, d1);
+
+  if (cnt != 0)
+    {
+      dp2 = tp;                                        /* dn limbs */
+      mpn_lshift (dp2, dp, dn, cnt);
+
+      np2 = tp + dn;                           /* (nn + 1) limbs */
+      cy = mpn_lshift (np2, np, nn, cnt);
+      np2[nn++] = cy;
+    }
+  else
+    {
+      /* FIXME: Consider copying np->np2 here, adding a 0-limb at the top.
+        That would simplify the underlying sbpi1 function, since then it
+        could assume nn > dn.  */
+      dp2 = (mp_ptr) dp;
+      np2 = np;
+    }
+
+  d0 = dp2[dn - 1];
+  d0 += (~d0 != 0);
+  invert_limb (inv32, d0);
+
+  /* We add nn + dn to tp here, not nn + 1 + dn, as expected.  This is since nn
+     here will have been incremented.  */
+#if OPERATION_sb_div_qr_sec
+  qh = mpn_sbpi1_div_qr_sec (qp, np2, nn, dp2, dn, inv32, tp + nn + dn);
+#else
+  mpn_sbpi1_div_r_sec (np2, nn, dp2, dn, inv32, tp + nn + dn);
+#endif
+
+  if (cnt == 0)
+    ;                          /* we have np = np2 here. */
+  else
+    mpn_rshift (np, np2, dn, cnt);
+
+#if OPERATION_sb_div_qr_sec
+  if (cnt == 0)
+    qp[nn - dn] = qh;
+#endif
+}
diff --git a/mpn/generic/sbpi1_bdiv_q.c b/mpn/generic/sbpi1_bdiv_q.c

index 3d2f743cde84a12a970e4d7be3fe7aa8b588a507..013eb81b5a5da47625b70875d4f9a7a26bb0ae26 100644 (file)
--- a/mpn/generic/sbpi1_bdiv_q.c
+++ b/mpn/generic/sbpi1_bdiv_q.c
@@ -7,7 +7,7 @@
     IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
     ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
  
-Copyright 2005, 2006, 2009 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -58,25 +58,27 @@ mpn_sbpi1_bdiv_q (mp_ptr qp,
    ASSERT (dn > 0);
    ASSERT (nn >= dn);
    ASSERT ((dp[0] & 1) != 0);
+  /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
+     but some over N/Q overlaps will not work.  */
  
    for (i = nn - dn; i > 0; i--)
      {
        q = dinv * np[0];
-      qp[0] = ~q;
-      qp++;
        cy = mpn_addmul_1 (np, dp, dn, q);
        mpn_add_1 (np + dn, np + dn, i, cy);
        ASSERT (np[0] == 0);
+      qp[0] = ~q;
+      qp++;
        np++;
      }
  
    for (i = dn; i > 1; i--)
      {
        q = dinv * np[0];
-      qp[0] = ~q;
-      qp++;
        mpn_addmul_1 (np, dp, i, q);
        ASSERT (np[0] == 0);
+      qp[0] = ~q;
+      qp++;
        np++;
      }
  
diff --git a/mpn/generic/sbpi1_bdiv_qr.c b/mpn/generic/sbpi1_bdiv_qr.c

index c20477a3d988262fa0b7d56e97b605c6907752ed..666f801d9b3b7dde35a8c02c31b5962ae8b53f4e 100644 (file)
--- a/mpn/generic/sbpi1_bdiv_qr.c
+++ b/mpn/generic/sbpi1_bdiv_qr.c
@@ -7,7 +7,7 @@
     IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
     ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
  
-Copyright 2006, 2009 Free Software Foundation, Inc.
+Copyright 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -53,6 +53,8 @@ mpn_sbpi1_bdiv_qr (mp_ptr qp,
    ASSERT (dn > 0);
    ASSERT (nn > dn);
    ASSERT ((dp[0] & 1) != 0);
+  /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
+     but some over N/Q overlaps will not work.  */
  
    qn = nn - dn;
  
@@ -67,9 +69,8 @@ mpn_sbpi1_bdiv_qr (mp_ptr qp,
           mp_limb_t q;
  
           q = dinv * np[i];
-         qp[i] = ~q;
-
           np[i] = mpn_addmul_1 (np + i, dp, dn, q);
+         qp[i] = ~q;
         }
        rh += mpn_add (np + dn, np + dn, qn, np, dn);
        ql = mpn_add_1 (qp, qp, dn, ql);
@@ -83,9 +84,8 @@ mpn_sbpi1_bdiv_qr (mp_ptr qp,
        mp_limb_t q;
  
        q = dinv * np[i];
-      qp[i] = ~q;
-
        np[i] = mpn_addmul_1 (np + i, dp, dn, q);
+      qp[i] = ~q;
      }
  
    rh += mpn_add_n (np + dn, np + dn, np, qn);
diff --git a/mpn/generic/sbpi1_div_sec.c b/mpn/generic/sbpi1_div_sec.c

new file mode 100644 (file)

index 0000000..60dfe6f
--- /dev/null
+++ b/mpn/generic/sbpi1_div_sec.c
@@ -0,0 +1,162 @@
+/* mpn_sbpi1_div_qr_sec, mpn_sbpi1_div_r_sec -- Compute Q = floor(U / V), U = U
+   mod V.  Side-channel silent under the assumption that the used instructions
+   are side-channel silent.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This side-channel silent division algorithm reduces the partial remainder by
+   GMP_NUMB_BITS/2 bits at a time, compared to GMP_NUMB_BITS for the main
+   division algorithm.  We do not insists on reducing by exactly
+   GMP_NUMB_BITS/2, but may leave a partial remainder that is D*B^i to 3D*B^i
+   too large (B is the limb base, D is the divisor, and i is the induction
+   variable); the subsequent step will handle the extra partial remainder bits.
+
+   With that partial remainder reduction, each step generates a quotient "half
+   limb".  The outer loop generates two quotient half limbs, an upper (q1h) and
+   a lower (q0h) which are stored sparsely in separate limb arrays.  These
+   arrays are added at the end; using separate arrays avoids data-dependent
+   carry propagation which could else pose a side-channel leakage problem.
+
+   The quotient half limbs may be between -3 to 0 from the accurate value
+   ("accurate" being the one which corresponds to a reduction to a principal
+   partial remainder).  Too small quotient half limbs correspond to too large
+   remainders, which we reduce later, as described above.
+
+   In order to keep quotients from getting too big, corresponding to a negative
+   partial remainder, we use an inverse which is slightly smaller than usually.
+*/
+
+#if OPERATION_sbpi1_div_qr_sec
+/* Needs (dn + 1) + (nn - dn) + (nn - dn) = 2nn - dn + 1 limbs at tp. */
+#define FNAME mpn_sbpi1_div_qr_sec
+#define Q(q) q,
+#define RETTYPE mp_limb_t
+#endif
+#if OPERATION_sbpi1_div_r_sec
+/* Needs (dn + 1) limbs at tp.  */
+#define FNAME mpn_sbpi1_div_r_sec
+#define Q(q)
+#define RETTYPE void
+#endif
+
+RETTYPE
+FNAME (Q(mp_ptr qp)
+       mp_ptr np, mp_size_t nn,
+       mp_srcptr dp, mp_size_t dn,
+       mp_limb_t dinv,
+       mp_ptr tp)
+{
+  mp_limb_t nh, cy, q1h, q0h, dummy, cnd;
+  mp_size_t i;
+  mp_ptr hp;
+#if OPERATION_sbpi1_div_qr_sec
+  mp_limb_t qh;
+  mp_ptr qlp, qhp;
+#endif
+
+  ASSERT (dn >= 1);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn - 1] & GMP_NUMB_HIGHBIT) != 0);
+
+  if (nn == dn)
+    {
+      cy = mpn_sub_n (np, np, dp, dn);
+      mpn_addcnd_n (np, np, dp, dn, cy);
+#if OPERATION_sbpi1_div_qr_sec
+      return 1 - cy;
+#else
+      return;
+#endif
+    }
+
+  /* Create a divisor copy shifted half a limb.  */
+  hp = tp;                                     /* (dn + 1) limbs */
+  hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
+
+#if OPERATION_sbpi1_div_qr_sec
+  qlp = tp + (dn + 1);                         /* (nn - dn) limbs */
+  qhp = tp + (nn + 1);                         /* (nn - dn) limbs */
+#endif
+
+  np += nn - dn;
+  nh = 0;
+
+  for (i = nn - dn - 1; i >= 0; i--)
+    {
+      np--;
+
+      nh = (nh << GMP_NUMB_BITS/2) + (np[dn] >> GMP_NUMB_BITS/2);
+      umul_ppmm (q1h, dummy, nh, dinv);
+      q1h += nh;
+#if OPERATION_sbpi1_div_qr_sec
+      qhp[i] = q1h;
+#endif
+      mpn_submul_1 (np, hp, dn + 1, q1h);
+
+      nh = np[dn];
+      umul_ppmm (q0h, dummy, nh, dinv);
+      q0h += nh;
+#if OPERATION_sbpi1_div_qr_sec
+      qlp[i] = q0h;
+#endif
+      nh -= mpn_submul_1 (np, dp, dn, q0h);
+    }
+
+  /* 1st adjustment depends on extra high remainder limb.  */
+  cnd = nh != 0;                               /* FIXME: cmp-to-int */
+#if OPERATION_sbpi1_div_qr_sec
+  qlp[0] += cnd;
+#endif
+  nh -= mpn_subcnd_n (np, np, dp, dn, cnd);
+
+  /* 2nd adjustment depends on remainder/divisor comparison as well as whether
+     extra remainder limb was nullified by previous subtract.  */
+  cy = mpn_sub_n (np, np, dp, dn);
+  cy = cy - nh;
+#if OPERATION_sbpi1_div_qr_sec
+  qlp[0] += 1 - cy;
+#endif
+  mpn_addcnd_n (np, np, dp, dn, cy);
+
+  /* 3rd adjustment depends on remainder/divisor comparison.  */
+  cy = mpn_sub_n (np, np, dp, dn);
+#if OPERATION_sbpi1_div_qr_sec
+  qlp[0] += 1 - cy;
+#endif
+  mpn_addcnd_n (np, np, dp, dn, cy);
+
+#if OPERATION_sbpi1_div_qr_sec
+  /* Combine quotient halves into final quotient.  */
+  qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
+  qh += mpn_add_n (qp, qhp, qlp, nn - dn);
+
+  return qh;
+#else
+  return;
+#endif
+}
diff --git a/mpn/generic/set_str.c b/mpn/generic/set_str.c

index 83f5ac550d7e7a17ed9a71860429fc00e466fa57..fd3c5957ce1e59d0f638e423679d0599ff2190e9 100644 (file)
--- a/mpn/generic/set_str.c
+++ b/mpn/generic/set_str.c
@@ -10,7 +10,7 @@
     GNU MP RELEASE.
  
  Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
-2008 Free Software Foundation, Inc.
+2008, 2012, 2013 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -131,8 +131,7 @@ mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, i
    long i, pi;
    mp_size_t n;
    mp_ptr p, t;
-  unsigned normalization_steps;
-  mp_limb_t big_base, big_base_inverted;
+  mp_limb_t big_base;
    int chars_per_limb;
    size_t digits_in_base;
    mp_size_t shift;
@@ -141,8 +140,6 @@ mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, i
  
    chars_per_limb = mp_bases[base].chars_per_limb;
    big_base = mp_bases[base].big_base;
-  big_base_inverted = mp_bases[base].big_base_inverted;
-  count_leading_zeros (normalization_steps, big_base);
  
    p = powtab_mem_ptr;
    powtab_mem_ptr += 1;
@@ -239,7 +236,9 @@ mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,
  
    if (hn == 0)
      {
-      MPN_ZERO (rp, powtab->n + sn);
+      /* Zero +1 limb here, to avoid reading an allocated but uninitialised
+        limb in mpn_incr_u below.  */
+      MPN_ZERO (rp, powtab->n + sn + 1);
      }
    else
      {
diff --git a/mpn/generic/sizeinbase.c b/mpn/generic/sizeinbase.c

index 303359a5cc19a1680a1cd15456744451ea9f44c0..27bb19c6ad5a54f047a51d4c4c2edcc74f56be9e 100644 (file)
--- a/mpn/generic/sizeinbase.c
+++ b/mpn/generic/sizeinbase.c
@@ -4,7 +4,8 @@
     CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
     FUTURE GNU MP RELEASES.
  
-Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2011, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -32,27 +33,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  size_t
  mpn_sizeinbase (mp_srcptr xp, mp_size_t xsize, int base)
  {
-  int lb_base, cnt;
-  mp_size_t totbits;
-
-  ASSERT (xsize >= 0);
-  ASSERT (base >= 2);
-  ASSERT (base < numberof (mp_bases));
-
-  /* Special case for X == 0.  */
-  if (xsize == 0)
-    return 1;
-
-  /* Calculate the total number of significant bits of X.  */
-  count_leading_zeros (cnt, xp[xsize-1]);
-  totbits = xsize * GMP_LIMB_BITS - cnt;
-
-  if (POW2_P (base))
-    {
-      /* Special case for powers of 2, giving exact result.  */
-      lb_base = mp_bases[base].big_base;
-      return (totbits + lb_base - 1) / lb_base;
-    }
-  else
-    return (size_t) (totbits * mp_bases[base].chars_per_bit_exactly) + 1;
+  size_t  result;
+  MPN_SIZEINBASE (result, xp, xsize, base);
+  return result;
  }
diff --git a/mpn/generic/sqr_basecase.c b/mpn/generic/sqr_basecase.c

index 548033d27bbc180678846261bd7d7d58835fcb98..660ab4c77a8a49210f99005bea4f9a8a0e44e14c 100644 (file)
--- a/mpn/generic/sqr_basecase.c
+++ b/mpn/generic/sqr_basecase.c
@@ -6,7 +6,7 @@
  
  
  Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
-2005, 2008 Free Software Foundation, Inc.
+2005, 2008, 2010, 2011 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -45,6 +45,30 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    } while (0)
  #endif
  
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)                            \
+  mpn_sqr_diag_addlsh1 (rp, tp, up, n)
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)                            \
+  do {                                                                 \
+    mp_limb_t cy;                                                      \
+    MPN_SQR_DIAGONAL (rp, up, n);                                      \
+    cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);                        \
+    rp[2 * n - 1] += cy;                                               \
+  } while (0)
+#else
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)                            \
+  do {                                                                 \
+    mp_limb_t cy;                                                      \
+    MPN_SQR_DIAGONAL (rp, up, n);                                      \
+    cy = mpn_lshift (tp, tp, 2 * n - 2, 1);                            \
+    cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);                   \
+    rp[2 * n - 1] += cy;                                               \
+  } while (0)
+#endif
+#endif
+
  
  #undef READY_WITH_mpn_sqr_basecase
  
@@ -84,9 +108,13 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
      {
        if (n == 2)
         {
+#if HAVE_NATIVE_mpn_mul_2
+         rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
           rp[0] = 0;
           rp[1] = 0;
           rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
           return;
         }
  
@@ -101,15 +129,7 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
        tp[2 * n - 3] = cy;
      }
  
-  MPN_SQR_DIAGONAL (rp, up, n);
-
-#if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#else
-  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
-  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#endif
-  rp[2 * n - 1] += cy;
+  MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
  }
  #define READY_WITH_mpn_sqr_basecase
  #endif
@@ -194,9 +214,13 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
  
        if (n == 2)
         {
+#if HAVE_NATIVE_mpn_mul_2
+         rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
           rp[0] = 0;
           rp[1] = 0;
           rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
           return;
         }
  
@@ -283,18 +307,8 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
           cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
           tp[n + i - 2] = cy;
         }
-      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
  
-      {
-       mp_limb_t cy;
-#if HAVE_NATIVE_mpn_addlsh1_n
-       cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#else
-       cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
-       cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#endif
-       rp[2 * n - 1] += cy;
-      }
+      MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
      }
  }
  #endif
diff --git a/mpn/generic/sqrmod_bnm1.c b/mpn/generic/sqrmod_bnm1.c

index 698bd6833af14824be2f05a499b3ebed58fb39ce..824cb1108910e1b67f8810c3e8f1170ff24f439b 100644 (file)
--- a/mpn/generic/sqrmod_bnm1.c
+++ b/mpn/generic/sqrmod_bnm1.c
@@ -7,7 +7,7 @@
     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
diff --git a/mpn/generic/sqrtrem.c b/mpn/generic/sqrtrem.c

index a609a4bef5528c00a93c84e47fd79bbe0cff790c..57041c9058bc61361713e9b421901b8f9c6c74da 100644 (file)
--- a/mpn/generic/sqrtrem.c
+++ b/mpn/generic/sqrtrem.c
@@ -8,7 +8,7 @@
     INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR
     DISAPPEAR IN A FUTURE GMP RELEASE.
  
-Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2010 Free Software
+Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2010, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -37,56 +37,56 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "longlong.h"
  
-static const unsigned short invsqrttab[384] =
+static const unsigned char invsqrttab[384] = /* The common 0x100 was removed */
  {
-  0x1ff,0x1fd,0x1fb,0x1f9,0x1f7,0x1f5,0x1f3,0x1f2, /* sqrt(1/80)..sqrt(1/87) */
-  0x1f0,0x1ee,0x1ec,0x1ea,0x1e9,0x1e7,0x1e5,0x1e4, /* sqrt(1/88)..sqrt(1/8f) */
-  0x1e2,0x1e0,0x1df,0x1dd,0x1db,0x1da,0x1d8,0x1d7, /* sqrt(1/90)..sqrt(1/97) */
-  0x1d5,0x1d4,0x1d2,0x1d1,0x1cf,0x1ce,0x1cc,0x1cb, /* sqrt(1/98)..sqrt(1/9f) */
-  0x1c9,0x1c8,0x1c6,0x1c5,0x1c4,0x1c2,0x1c1,0x1c0, /* sqrt(1/a0)..sqrt(1/a7) */
-  0x1be,0x1bd,0x1bc,0x1ba,0x1b9,0x1b8,0x1b7,0x1b5, /* sqrt(1/a8)..sqrt(1/af) */
-  0x1b4,0x1b3,0x1b2,0x1b0,0x1af,0x1ae,0x1ad,0x1ac, /* sqrt(1/b0)..sqrt(1/b7) */
-  0x1aa,0x1a9,0x1a8,0x1a7,0x1a6,0x1a5,0x1a4,0x1a3, /* sqrt(1/b8)..sqrt(1/bf) */
-  0x1a2,0x1a0,0x19f,0x19e,0x19d,0x19c,0x19b,0x19a, /* sqrt(1/c0)..sqrt(1/c7) */
-  0x199,0x198,0x197,0x196,0x195,0x194,0x193,0x192, /* sqrt(1/c8)..sqrt(1/cf) */
-  0x191,0x190,0x18f,0x18e,0x18d,0x18c,0x18c,0x18b, /* sqrt(1/d0)..sqrt(1/d7) */
-  0x18a,0x189,0x188,0x187,0x186,0x185,0x184,0x183, /* sqrt(1/d8)..sqrt(1/df) */
-  0x183,0x182,0x181,0x180,0x17f,0x17e,0x17e,0x17d, /* sqrt(1/e0)..sqrt(1/e7) */
-  0x17c,0x17b,0x17a,0x179,0x179,0x178,0x177,0x176, /* sqrt(1/e8)..sqrt(1/ef) */
-  0x176,0x175,0x174,0x173,0x172,0x172,0x171,0x170, /* sqrt(1/f0)..sqrt(1/f7) */
-  0x16f,0x16f,0x16e,0x16d,0x16d,0x16c,0x16b,0x16a, /* sqrt(1/f8)..sqrt(1/ff) */
-  0x16a,0x169,0x168,0x168,0x167,0x166,0x166,0x165, /* sqrt(1/100)..sqrt(1/107) */
-  0x164,0x164,0x163,0x162,0x162,0x161,0x160,0x160, /* sqrt(1/108)..sqrt(1/10f) */
-  0x15f,0x15e,0x15e,0x15d,0x15c,0x15c,0x15b,0x15a, /* sqrt(1/110)..sqrt(1/117) */
-  0x15a,0x159,0x159,0x158,0x157,0x157,0x156,0x156, /* sqrt(1/118)..sqrt(1/11f) */
-  0x155,0x154,0x154,0x153,0x153,0x152,0x152,0x151, /* sqrt(1/120)..sqrt(1/127) */
-  0x150,0x150,0x14f,0x14f,0x14e,0x14e,0x14d,0x14d, /* sqrt(1/128)..sqrt(1/12f) */
-  0x14c,0x14b,0x14b,0x14a,0x14a,0x149,0x149,0x148, /* sqrt(1/130)..sqrt(1/137) */
-  0x148,0x147,0x147,0x146,0x146,0x145,0x145,0x144, /* sqrt(1/138)..sqrt(1/13f) */
-  0x144,0x143,0x143,0x142,0x142,0x141,0x141,0x140, /* sqrt(1/140)..sqrt(1/147) */
-  0x140,0x13f,0x13f,0x13e,0x13e,0x13d,0x13d,0x13c, /* sqrt(1/148)..sqrt(1/14f) */
-  0x13c,0x13b,0x13b,0x13a,0x13a,0x139,0x139,0x139, /* sqrt(1/150)..sqrt(1/157) */
-  0x138,0x138,0x137,0x137,0x136,0x136,0x135,0x135, /* sqrt(1/158)..sqrt(1/15f) */
-  0x135,0x134,0x134,0x133,0x133,0x132,0x132,0x132, /* sqrt(1/160)..sqrt(1/167) */
-  0x131,0x131,0x130,0x130,0x12f,0x12f,0x12f,0x12e, /* sqrt(1/168)..sqrt(1/16f) */
-  0x12e,0x12d,0x12d,0x12d,0x12c,0x12c,0x12b,0x12b, /* sqrt(1/170)..sqrt(1/177) */
-  0x12b,0x12a,0x12a,0x129,0x129,0x129,0x128,0x128, /* sqrt(1/178)..sqrt(1/17f) */
-  0x127,0x127,0x127,0x126,0x126,0x126,0x125,0x125, /* sqrt(1/180)..sqrt(1/187) */
-  0x124,0x124,0x124,0x123,0x123,0x123,0x122,0x122, /* sqrt(1/188)..sqrt(1/18f) */
-  0x121,0x121,0x121,0x120,0x120,0x120,0x11f,0x11f, /* sqrt(1/190)..sqrt(1/197) */
-  0x11f,0x11e,0x11e,0x11e,0x11d,0x11d,0x11d,0x11c, /* sqrt(1/198)..sqrt(1/19f) */
-  0x11c,0x11b,0x11b,0x11b,0x11a,0x11a,0x11a,0x119, /* sqrt(1/1a0)..sqrt(1/1a7) */
-  0x119,0x119,0x118,0x118,0x118,0x118,0x117,0x117, /* sqrt(1/1a8)..sqrt(1/1af) */
-  0x117,0x116,0x116,0x116,0x115,0x115,0x115,0x114, /* sqrt(1/1b0)..sqrt(1/1b7) */
-  0x114,0x114,0x113,0x113,0x113,0x112,0x112,0x112, /* sqrt(1/1b8)..sqrt(1/1bf) */
-  0x112,0x111,0x111,0x111,0x110,0x110,0x110,0x10f, /* sqrt(1/1c0)..sqrt(1/1c7) */
-  0x10f,0x10f,0x10f,0x10e,0x10e,0x10e,0x10d,0x10d, /* sqrt(1/1c8)..sqrt(1/1cf) */
-  0x10d,0x10c,0x10c,0x10c,0x10c,0x10b,0x10b,0x10b, /* sqrt(1/1d0)..sqrt(1/1d7) */
-  0x10a,0x10a,0x10a,0x10a,0x109,0x109,0x109,0x109, /* sqrt(1/1d8)..sqrt(1/1df) */
-  0x108,0x108,0x108,0x107,0x107,0x107,0x107,0x106, /* sqrt(1/1e0)..sqrt(1/1e7) */
-  0x106,0x106,0x106,0x105,0x105,0x105,0x104,0x104, /* sqrt(1/1e8)..sqrt(1/1ef) */
-  0x104,0x104,0x103,0x103,0x103,0x103,0x102,0x102, /* sqrt(1/1f0)..sqrt(1/1f7) */
-  0x102,0x102,0x101,0x101,0x101,0x101,0x100,0x100  /* sqrt(1/1f8)..sqrt(1/1ff) */
+  0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */
+  0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */
+  0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */
+  0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */
+  0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */
+  0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */
+  0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */
+  0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */
+  0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */
+  0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */
+  0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */
+  0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */
+  0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */
+  0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */
+  0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */
+  0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */
+  0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */
+  0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */
+  0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */
+  0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */
+  0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */
+  0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */
+  0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */
+  0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */
+  0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */
+  0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */
+  0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */
+  0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */
+  0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */
+  0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */
+  0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */
+  0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */
+  0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */
+  0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */
+  0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */
+  0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */
+  0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */
+  0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */
+  0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */
+  0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */
+  0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */
+  0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */
+  0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */
+  0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */
+  0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */
+  0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */
+  0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */
+  0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00  /* sqrt(1/1f8)..sqrt(1/1ff) */
  };
  
  /* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2.  */
@@ -115,7 +115,7 @@ mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
       iteration convert from 1/sqrt(a) to sqrt(a).  */
  
    abits = a0 >> (GMP_LIMB_BITS - 1 - 8);       /* extract bits for table lookup */
-  x0 = invsqrttab[abits - 0x80];               /* initial 1/sqrt(a) */
+  x0 = 0x100 | invsqrttab[abits - 0x80];       /* initial 1/sqrt(a) */
  
    /* x0 is now an 8 bits approximation of 1/sqrt(a0) */
  
@@ -124,7 +124,7 @@ mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
    t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000  - a1 * x0 * x0) >> 16;
    x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));
  
-  /* x0 is now an 16 bits approximation of 1/sqrt(a0) */
+  /* x0 is now a 16 bits approximation of 1/sqrt(a0) */
  
    t2 = x0 * (a0 >> (32-8));
    t = t2 >> 25;
@@ -246,7 +246,11 @@ mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
  
        if (c < 0)
         {
+#if HAVE_NATIVE_mpn_addlsh1_n
+         c += mpn_addlsh1_n (np, np, sp, n) + 2 * q;
+#else
           c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
+#endif
           c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
           q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
         }
diff --git a/mpn/generic/sub_err1_n.c b/mpn/generic/sub_err1_n.c

new file mode 100644 (file)

index 0000000..ea901bf
--- /dev/null
+++ b/mpn/generic/sub_err1_n.c
@@ -0,0 +1,90 @@
+/* mpn_sub_err1_n -- sub_n with one error term
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+  return value is borrow out.
+
+  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+               mp_ptr ep, mp_srcptr yp,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+  yp += n - 1;
+  el = eh = 0;
+
+  do
+    {
+      yl = *yp--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary sub_n */
+      SUBC_LIMB (cy1, sl, ul, vl);
+      SUBC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh:el) */
+      zl = (-cy) & yl;
+      el += zl;
+      eh += el < zl;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+  el &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el;
+  ep[1] = eh;
+
+  return cy;
+}
diff --git a/mpn/generic/sub_err2_n.c b/mpn/generic/sub_err2_n.c

new file mode 100644 (file)

index 0000000..34c6198
--- /dev/null
+++ b/mpn/generic/sub_err2_n.c
@@ -0,0 +1,106 @@
+/* mpn_sub_err2_n -- sub_n with two error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+  return value is borrow out.
+
+  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+  stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary sub_n */
+      SUBC_LIMB (cy1, sl, ul, vl);
+      SUBC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+
+  return cy;
+}
diff --git a/mpn/generic/sub_err3_n.c b/mpn/generic/sub_err3_n.c

new file mode 100644 (file)

index 0000000..72371ec
--- /dev/null
+++ b/mpn/generic/sub_err3_n.c
@@ -0,0 +1,121 @@
+/* mpn_sub_err3_n -- sub_n with three error terms
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  Computes:
+
+  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+  return value is borrow out.
+
+  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+  Requires n >= 1.
+
+  None of the outputs may overlap each other or any of the inputs, except
+  that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+                mp_size_t n, mp_limb_t cy)
+{
+  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+  yp1 += n - 1;
+  yp2 += n - 1;
+  yp3 += n - 1;
+  el1 = eh1 = 0;
+  el2 = eh2 = 0;
+  el3 = eh3 = 0;
+
+  do
+    {
+      yl1 = *yp1--;
+      yl2 = *yp2--;
+      yl3 = *yp3--;
+      ul = *up++;
+      vl = *vp++;
+
+      /* ordinary sub_n */
+      SUBC_LIMB (cy1, sl, ul, vl);
+      SUBC_LIMB (cy2, rl, sl, cy);
+      cy = cy1 | cy2;
+      *rp++ = rl;
+
+      /* update (eh1:el1) */
+      zl1 = (-cy) & yl1;
+      el1 += zl1;
+      eh1 += el1 < zl1;
+
+      /* update (eh2:el2) */
+      zl2 = (-cy) & yl2;
+      el2 += zl2;
+      eh2 += el2 < zl2;
+
+      /* update (eh3:el3) */
+      zl3 = (-cy) & yl3;
+      el3 += zl3;
+      eh3 += el3 < zl3;
+    }
+  while (--n);
+
+#if GMP_NAIL_BITS != 0
+  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+  el1 &= GMP_NUMB_MASK;
+  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+  el2 &= GMP_NUMB_MASK;
+  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+  el3 &= GMP_NUMB_MASK;
+#endif
+
+  ep[0] = el1;
+  ep[1] = eh1;
+  ep[2] = el2;
+  ep[3] = eh2;
+  ep[4] = el3;
+  ep[5] = eh3;
+
+  return cy;
+}
diff --git a/mpn/generic/subcnd_n.c b/mpn/generic/subcnd_n.c

index 0dcc45641d6d84929d1b8da9136d48e369a192ff..89b719004568e09506e97cf538e781e163c50c34 100644 (file)
--- a/mpn/generic/subcnd_n.c
+++ b/mpn/generic/subcnd_n.c
@@ -1,9 +1,12 @@
  /* mpn_subcnd_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
+   Both cases should take the same time and perform the exact same memory
+   accesses, since this function is intended to be used where side-channel
+   attack resilience is relevant.
  
     THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
     SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
  
-Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2008, 2009 Free Software
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2008, 2009, 2011 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -24,9 +27,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp.h"
  #include "gmp-impl.h"
  
-
-#if GMP_NAIL_BITS == 0
-
  mp_limb_t
  mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
  {
@@ -42,44 +42,21 @@ mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
      {
        ul = *up++;
        vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
        sl = ul - vl;
        cy1 = sl > ul;
        rl = sl - cy;
        cy2 = rl > sl;
        cy = cy1 | cy2;
        *rp++ = rl;
-    }
-  while (--n != 0);
-
-  return cy;
-}
-
-#endif
-
-#if GMP_NAIL_BITS >= 1
-
-mp_limb_t
-mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
-{
-  mp_limb_t ul, vl, rl, cy, mask;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-
-  mask = -(mp_limb_t) (cnd != 0);
-  cy = 0;
-  do
-    {
-      ul = *up++;
-      vl = *vp++ & mask;
-      rl = ul - vl - cy;
+#else
+      rl = ul - vl;
+      rl -= cy;
        cy = rl >> (GMP_LIMB_BITS - 1);
        *rp++ = rl & GMP_NUMB_MASK;
+#endif
      }
    while (--n != 0);
  
    return cy;
  }
-
-#endif
diff --git a/mpn/generic/tabselect.c b/mpn/generic/tabselect.c

new file mode 100644 (file)

index 0000000..02e52fd
--- /dev/null
+++ b/mpn/generic/tabselect.c
@@ -0,0 +1,48 @@
+/* mpn_tabselect.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Select entry `which' from table `tab', which has nents entries, each `n'
+   limbs.  Store the selected entry at rp.  Reads entire table to avoid
+   side-channel information leaks.  O(n*nents).
+   FIXME: Move to its own file.  */
+void
+mpn_tabselect (volatile mp_limb_t *rp, volatile mp_limb_t *tab, mp_size_t n,
+              mp_size_t nents, mp_size_t which)
+{
+  mp_size_t k, i;
+  mp_limb_t mask;
+  volatile mp_limb_t *tp;
+
+  for (k = 0; k < nents; k++)
+    {
+      mask = -(mp_limb_t) (which == k);
+      tp = tab + n * k;
+      for (i = 0; i < n; i++)
+       {
+         rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
+       }
+    }
+}
diff --git a/mpn/generic/toom22_mul.c b/mpn/generic/toom22_mul.c

index fc296df93e9bc867825a9e4341bd2b8c7d29c7d9..36328d7d01a9559710d8a6ac06e34396e1f01194 100644 (file)
--- a/mpn/generic/toom22_mul.c
+++ b/mpn/generic/toom22_mul.c
@@ -7,7 +7,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -41,7 +41,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    vinf=      a1 *     b1   # A(inf)*B(inf)
  */
  
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
  #define MAYBE_mul_toom22   1
  #else
  #define MAYBE_mul_toom22                                               \
@@ -80,6 +80,7 @@ mpn_toom22_mul (mp_ptr pp,
                 mp_srcptr bp, mp_size_t bn,
                 mp_ptr scratch)
  {
+  const int __gmpn_cpuvec_initialized = 1;
    mp_size_t n, s, t;
    int vm1_neg;
    mp_limb_t cy, cy2;
diff --git a/mpn/generic/toom2_sqr.c b/mpn/generic/toom2_sqr.c

index 912feda8ae741e3c6908aabd9b55115eb0c1e4d2..60ec5e4fba4d92b405fe6f3283ba9afcd283dd61 100644 (file)
--- a/mpn/generic/toom2_sqr.c
+++ b/mpn/generic/toom2_sqr.c
@@ -6,7 +6,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -38,7 +38,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    vinf=      a1 ^2  # A(inf)^2
  */
  
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
  #define MAYBE_sqr_toom2   1
  #else
  #define MAYBE_sqr_toom2                                                        \
@@ -59,6 +59,7 @@ mpn_toom2_sqr (mp_ptr pp,
                mp_srcptr ap, mp_size_t an,
                mp_ptr scratch)
  {
+  const int __gmpn_cpuvec_initialized = 1;
    mp_size_t n, s;
    mp_limb_t cy, cy2;
    mp_ptr asm1;
diff --git a/mpn/generic/toom32_mul.c b/mpn/generic/toom32_mul.c

index 2f61fad2df9c4ea363ec2c615219ff0bb5ca6ba8..77a4ca44b91c39f4ef2efc31f18b676d6cc4300c 100644 (file)
--- a/mpn/generic/toom32_mul.c
+++ b/mpn/generic/toom32_mul.c
@@ -60,7 +60,7 @@ mpn_toom32_mul (mp_ptr pp,
    mp_size_t n, s, t;
    int vm1_neg;
    mp_limb_t cy;
-  int hi;
+  mp_limb_signed_t hi;
    mp_limb_t ap1_hi, bp1_hi;
  
  #define a0  ap
diff --git a/mpn/generic/toom33_mul.c b/mpn/generic/toom33_mul.c

index cb30df657d3da3c7e15bb04f79d8859ca22f05fe..8efa57d8ea2f220101dc98af10db4367e90804d7 100644 (file)
--- a/mpn/generic/toom33_mul.c
+++ b/mpn/generic/toom33_mul.c
@@ -8,7 +8,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -44,7 +44,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    vinf=          a2 *         b2  # A(inf)*B(inf)
  */
  
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
  #define MAYBE_mul_basecase 1
  #define MAYBE_mul_toom33   1
  #else
@@ -79,6 +79,7 @@ mpn_toom33_mul (mp_ptr pp,
                 mp_srcptr bp, mp_size_t bn,
                 mp_ptr scratch)
  {
+  const int __gmpn_cpuvec_initialized = 1;
    mp_size_t n, s, t;
    int vm1_neg;
    mp_limb_t cy, vinf0;
diff --git a/mpn/generic/toom3_sqr.c b/mpn/generic/toom3_sqr.c

index 5824b05d7090c8ad3526a7d3acb6ec260fc64eee..3d21851728551de1e4e1eee804c2e99f7d464817 100644 (file)
--- a/mpn/generic/toom3_sqr.c
+++ b/mpn/generic/toom3_sqr.c
@@ -7,7 +7,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -41,7 +41,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    vinf=          a2 ^2 # A(inf)^2
  */
  
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
  #define MAYBE_sqr_basecase 1
  #define MAYBE_sqr_toom3   1
  #else
@@ -68,6 +68,7 @@ mpn_toom3_sqr (mp_ptr pp,
                mp_srcptr ap, mp_size_t an,
                mp_ptr scratch)
  {
+  const int __gmpn_cpuvec_initialized = 1;
    mp_size_t n, s;
    mp_limb_t cy, vinf0;
    mp_ptr gp;
diff --git a/mpn/generic/toom42_mul.c b/mpn/generic/toom42_mul.c

index 99ac175a0439a1946a10afe6d3e00116e59b9e2a..138984ee9e781dfd280fd5355eb2adaa785c25d3 100644 (file)
--- a/mpn/generic/toom42_mul.c
+++ b/mpn/generic/toom42_mul.c
@@ -11,7 +11,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -61,7 +61,7 @@ mpn_toom42_mul (mp_ptr pp,
    mp_size_t n, s, t;
    int vm1_neg;
    mp_limb_t cy, vinf0;
-  mp_ptr a0_a2, a1_a3;
+  mp_ptr a0_a2;
    mp_ptr as1, asm1, as2;
    mp_ptr bs1, bsm1, bs2;
    TMP_DECL;
@@ -92,7 +92,6 @@ mpn_toom42_mul (mp_ptr pp,
    bs2 = TMP_SALLOC_LIMBS (n + 1);
  
    a0_a2 = pp;
-  a1_a3 = pp + n + 1;
  
    /* Compute as1 and asm1.  */
    vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
diff --git a/mpn/generic/toom42_mulmid.c b/mpn/generic/toom42_mulmid.c

new file mode 100644 (file)

index 0000000..c77b00f
--- /dev/null
+++ b/mpn/generic/toom42_mulmid.c
@@ -0,0 +1,227 @@
+/* mpn_toom42_mulmid -- toom42 middle product
+
+   Contributed by David Harvey.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+
+/*
+  Middle product of {ap,2n-1} and {bp,n}, output written to {rp,n+2}.
+
+  Neither ap nor bp may overlap rp.
+
+  Must have n >= 4.
+
+  Amount of scratch space required is given by mpn_toom42_mulmid_itch().
+
+  FIXME: this code assumes that n is small compared to GMP_NUMB_MAX. The exact
+  requirements should be clarified.
+*/
+void
+mpn_toom42_mulmid (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+                   mp_ptr scratch)
+{
+  mp_limb_t cy, e[12], zh, zl;
+  mp_size_t m;
+  int neg;
+
+  ASSERT (n >= 4);
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+  ap += n & 1;   /* handle odd row and diagonal later */
+  m = n / 2;
+
+  /* (e0h:e0l) etc are correction terms, in 2's complement */
+#define e0l (e[0])
+#define e0h (e[1])
+#define e1l (e[2])
+#define e1h (e[3])
+#define e2l (e[4])
+#define e2h (e[5])
+#define e3l (e[6])
+#define e3h (e[7])
+#define e4l (e[8])
+#define e4h (e[9])
+#define e5l (e[10])
+#define e5h (e[11])
+
+#define s (scratch + 2)
+#define t (rp + m + 2)
+#define p0 rp
+#define p1 scratch
+#define p2 (rp + m)
+#define next_scratch (scratch + 3*m + 1)
+
+  /*
+            rp                            scratch
+  |---------|-----------|    |---------|---------|----------|
+  0         m         2m+2   0         m         2m        3m+1
+            <----p2---->       <-------------s------------->
+  <----p0----><---t---->     <----p1---->
+  */
+
+  /* compute {s,3m-1} = {a,3m-1} + {a+m,3m-1} and error terms e0, e1, e2, e3 */
+  cy = mpn_add_err1_n (s, ap, ap + m, &e0l, bp + m, m - 1, 0);
+  cy = mpn_add_err2_n (s + m - 1, ap + m - 1, ap + 2*m - 1, &e1l,
+                      bp + m, bp, m, cy);
+  mpn_add_err1_n (s + 2*m - 1, ap + 2*m - 1, ap + 3*m - 1, &e3l, bp, m, cy);
+
+  /* compute t = (-1)^neg * ({b,m} - {b+m,m}) and error terms e4, e5 */
+  if (mpn_cmp (bp + m, bp, m) < 0)
+    {
+      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp, bp + m, &e4l,
+                                     ap + m - 1, ap + 2*m - 1, m, 0));
+      neg = 1;
+    }
+  else
+    {
+      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp + m, bp, &e4l,
+                                     ap + m - 1, ap + 2*m - 1, m, 0));
+      neg = 0;
+    }
+
+  /* recursive middle products. The picture is:
+
+      b[2m-1]   A   A   A   B   B   B   -   -   -   -   -
+      ...       -   A   A   A   B   B   B   -   -   -   -
+      b[m]      -   -   A   A   A   B   B   B   -   -   -
+      b[m-1]    -   -   -   C   C   C   D   D   D   -   -
+      ...       -   -   -   -   C   C   C   D   D   D   -
+      b[0]      -   -   -   -   -   C   C   C   D   D   D
+               a[0]   ...  a[m]  ...  a[2m]    ...    a[4m-2]
+  */
+
+  if (m < MULMID_TOOM42_THRESHOLD)
+    {
+      /* A + B */
+      mpn_mulmid_basecase (p0, s, 2*m - 1, bp + m, m);
+      /* accumulate high limbs of p0 into e1 */
+      ADDC_LIMB (cy, e1l, e1l, p0[m]);
+      e1h += p0[m + 1] + cy;
+      /* (-1)^neg * (B - C)   (overwrites first m limbs of s) */
+      mpn_mulmid_basecase (p1, ap + m, 2*m - 1, t, m);
+      /* C + D   (overwrites t) */
+      mpn_mulmid_basecase (p2, s + m, 2*m - 1, bp, m);
+    }
+  else
+    {
+      /* as above, but use toom42 instead */
+      mpn_toom42_mulmid (p0, s, bp + m, m, next_scratch);
+      ADDC_LIMB (cy, e1l, e1l, p0[m]);
+      e1h += p0[m + 1] + cy;
+      mpn_toom42_mulmid (p1, ap + m, t, m, next_scratch);
+      mpn_toom42_mulmid (p2, s + m, bp, m, next_scratch);
+    }
+
+  /* apply error terms */
+
+  /* -e0 at rp[0] */
+  SUBC_LIMB (cy, rp[0], rp[0], e0l);
+  SUBC_LIMB (cy, rp[1], rp[1], e0h + cy);
+  if (UNLIKELY (cy))
+    {
+      cy = (m > 2) ? mpn_sub_1 (rp + 2, rp + 2, m - 2, 1) : 1;
+      SUBC_LIMB (cy, e1l, e1l, cy);
+      e1h -= cy;
+    }
+
+  /* z = e1 - e2 + high(p0) */
+  SUBC_LIMB (cy, zl, e1l, e2l);
+  zh = e1h - e2h - cy;
+
+  /* z at rp[m] */
+  ADDC_LIMB (cy, rp[m], rp[m], zl);
+  zh = (zh + cy) & GMP_NUMB_MASK;
+  ADDC_LIMB (cy, rp[m + 1], rp[m + 1], zh);
+  cy -= (zh >> (GMP_NUMB_BITS - 1));
+  if (UNLIKELY (cy))
+    {
+      if (cy == 1)
+       mpn_add_1 (rp + m + 2, rp + m + 2, m, 1);
+      else /* cy == -1 */
+       mpn_sub_1 (rp + m + 2, rp + m + 2, m, 1);
+    }
+
+  /* e3 at rp[2*m] */
+  ADDC_LIMB (cy, rp[2*m], rp[2*m], e3l);
+  rp[2*m + 1] = (rp[2*m + 1] + e3h + cy) & GMP_NUMB_MASK;
+
+  /* e4 at p1[0] */
+  ADDC_LIMB (cy, p1[0], p1[0], e4l);
+  ADDC_LIMB (cy, p1[1], p1[1], e4h + cy);
+  if (UNLIKELY (cy))
+    mpn_add_1 (p1 + 2, p1 + 2, m, 1);
+
+  /* -e5 at p1[m] */
+  SUBC_LIMB (cy, p1[m], p1[m], e5l);
+  p1[m + 1] = (p1[m + 1] - e5h - cy) & GMP_NUMB_MASK;
+
+  /* adjustment if p1 ends up negative */
+  cy = (p1[m + 1] >> (GMP_NUMB_BITS - 1));
+
+  /* add (-1)^neg * (p1 - B^m * p1) to output */
+  if (neg)
+    {
+      mpn_sub_1 (rp + m + 2, rp + m + 2, m, cy);
+      mpn_add (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
+      mpn_sub_n (rp + m, rp + m, p1, m + 2);            /* B + D */
+    }
+  else
+    {
+      mpn_add_1 (rp + m + 2, rp + m + 2, m, cy);
+      mpn_sub (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
+      mpn_add_n (rp + m, rp + m, p1, m + 2);            /* B + D */
+    }
+
+  /* odd row and diagonal */
+  if (n & 1)
+    {
+      /*
+        Products marked E are already done. We need to do products marked O.
+
+        OOOOO----
+        -EEEEO---
+        --EEEEO--
+        ---EEEEO-
+        ----EEEEO
+       */
+
+      /* first row of O's */
+      cy = mpn_addmul_1 (rp, ap - 1, n, bp[n - 1]);
+      ADDC_LIMB (rp[n + 1], rp[n], rp[n], cy);
+
+      /* O's on diagonal */
+      /* FIXME: should probably define an interface "mpn_mulmid_diag_1"
+         that can handle the sum below. Currently we're relying on
+         mulmid_basecase being pretty fast for a diagonal sum like this,
+        which is true at least for the K8 asm verion, but surely false
+        for the generic version. */
+      mpn_mulmid_basecase (e, ap + n - 1, n - 1, bp, n - 1);
+      mpn_add_n (rp + n - 1, rp + n - 1, e, 3);
+    }
+}
diff --git a/mpn/generic/toom43_mul.c b/mpn/generic/toom43_mul.c

index 670049c39497ebf4d146b51b7bbdd7f999b8e76f..6723e29965c5e7fca0849569626fba92059fef55 100644 (file)
--- a/mpn/generic/toom43_mul.c
+++ b/mpn/generic/toom43_mul.c
@@ -101,7 +101,7 @@ mpn_toom43_mul (mp_ptr pp,
  #define b1d   bsm1
  
    /* Compute as2 and asm2.  */
-  flags = toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3);
+  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
  
    /* Compute bs2 and bsm2.  */
    b1d[n] = mpn_lshift (b1d, b1, n, 1);                 /*       2b1      */
@@ -115,7 +115,7 @@ mpn_toom43_mul (mp_ptr pp,
    if (mpn_cmp (b0b2, b1d, n+1) < 0)
      {
        mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
-      flags ^= toom6_vm2_neg;
+      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
      }
    else
      {
@@ -126,7 +126,7 @@ mpn_toom43_mul (mp_ptr pp,
    if (mpn_cmp (b0b2, b1d, n+1) < 0)
      {
        mpn_sub_n (bsm2, b1d, b0b2, n+1);
-      flags ^= toom6_vm2_neg;
+      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
      }
    else
      {
@@ -135,7 +135,7 @@ mpn_toom43_mul (mp_ptr pp,
  #endif
  
    /* Compute as1 and asm1.  */
-  flags ^= toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2);
+  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2));
  
    /* Compute bs1 and bsm1.  */
    bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
@@ -144,7 +144,7 @@ mpn_toom43_mul (mp_ptr pp,
      {
        cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
        bs1[n] = cy >> 1;
-      flags ^= toom6_vm1_neg;
+      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
      }
    else
      {
@@ -157,7 +157,7 @@ mpn_toom43_mul (mp_ptr pp,
    if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
      {
        mpn_sub_n (bsm1, b1, bsm1, n);
-      flags ^= toom6_vm1_neg;
+      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
      }
    else
      {
diff --git a/mpn/generic/toom44_mul.c b/mpn/generic/toom44_mul.c

index 01a6053b783276a4299ed45e4c708f2e63527ce5..c77e3592cb90ff65ddf84376942332ca34edf08c 100644 (file)
--- a/mpn/generic/toom44_mul.c
+++ b/mpn/generic/toom44_mul.c
@@ -7,7 +7,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2013 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -55,7 +55,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MAYBE_mul_toom22                                               \
    (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM33_THRESHOLD)
  #define MAYBE_mul_toom44                                               \
-  (MUL_FFT_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
+  (MUL_TOOM6H_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
  #endif
  
  #define TOOM44_MUL_N_REC(p, a, b, n, ws)                               \
@@ -148,10 +148,10 @@ mpn_toom44_mul (mp_ptr pp,
       gives roughly 32 n/3 + log term. */
  
    /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
-  flags = toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
+  flags = (enum toom7_flags) (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp));
  
    /* Compute bpx = b0 + 2 b1 + 4 b2 + 8 b3 and bmx = b0 - 2 b1 + 4 b2 - 8 b3.  */
-  flags ^= toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp);
+  flags = (enum toom7_flags) (flags ^ toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp));
  
    TOOM44_MUL_N_REC (v2, apx, bpx, n + 1, tp);  /* v2,  2n+1 limbs */
    TOOM44_MUL_N_REC (vm2, amx, bmx, n + 1, tp); /* vm2,  2n+1 limbs */
@@ -206,10 +206,10 @@ mpn_toom44_mul (mp_ptr pp,
    TOOM44_MUL_N_REC (vh, apx, bpx, n + 1, tp);  /* vh,  2n+1 limbs */
  
    /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
-  flags |= toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
+  flags = (enum toom7_flags) (flags | toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp));
  
    /* Compute bpx = b0 + b1 + b2 + b3 bnd bmx = b0 - b1 + b2 - b3.  */
-  flags ^= toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp);
+  flags = (enum toom7_flags) (flags ^ toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp));
  
    TOOM44_MUL_N_REC (vm1, amx, bmx, n + 1, tp); /* vm1,  2n+1 limbs */
    /* Clobbers amx, bmx. */
diff --git a/mpn/generic/toom4_sqr.c b/mpn/generic/toom4_sqr.c

index 4050c45807c3c01c1d20eddc3f3c91da0aad4a4e..a97202d48577dabf7e561de51ff235f1751adfd9 100644 (file)
--- a/mpn/generic/toom4_sqr.c
+++ b/mpn/generic/toom4_sqr.c
@@ -6,7 +6,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2009, 2010, 2013 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -52,7 +52,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MAYBE_sqr_toom2                                                        \
    (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM3_THRESHOLD)
  #define MAYBE_sqr_toom4                                                        \
-  (SQR_FFT_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
+  (SQR_TOOM6_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
  #endif
  
  #define TOOM4_SQR_REC(p, a, n, ws)                                     \
@@ -149,5 +149,5 @@ mpn_toom4_sqr (mp_ptr pp,
    TOOM4_SQR_REC (v0, a0, n, tp);
    TOOM4_SQR_REC (vinf, a3, s, tp);     /* vinf, 2s limbs */
  
-  mpn_toom_interpolate_7pts (pp, n, 0, vm2, vm1, v2, vh, 2*s, tp);
+  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) 0, vm2, vm1, v2, vh, 2*s, tp);
  }
diff --git a/mpn/generic/toom52_mul.c b/mpn/generic/toom52_mul.c

index 21040fdbd4707cc696d18019d2b76ecbcf6c18c6..24c4fdd88f32cac48de0f742c14fe2294253e3b6 100644 (file)
--- a/mpn/generic/toom52_mul.c
+++ b/mpn/generic/toom52_mul.c
@@ -102,7 +102,7 @@ mpn_toom52_mul (mp_ptr pp,
  #define a1a3  asm1
  
    /* Compute as2 and asm2.  */
-  flags = toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3);
+  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3));
  
    /* Compute bs1 and bsm1.  */
    if (t == n)
@@ -113,7 +113,7 @@ mpn_toom52_mul (mp_ptr pp,
        if (mpn_cmp (b0, b1, n) < 0)
         {
           cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
-         flags ^= toom6_vm1_neg;
+         flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
         }
        else
         {
@@ -125,7 +125,7 @@ mpn_toom52_mul (mp_ptr pp,
        if (mpn_cmp (b0, b1, n) < 0)
         {
           mpn_sub_n (bsm1, b1, b0, n);
-         flags ^= toom6_vm1_neg;
+         flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
         }
        else
         {
@@ -140,7 +140,7 @@ mpn_toom52_mul (mp_ptr pp,
         {
           mpn_sub_n (bsm1, b1, b0, t);
           MPN_ZERO (bsm1 + t, n - t);
-         flags ^= toom6_vm1_neg;
+         flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
         }
        else
         {
@@ -153,7 +153,7 @@ mpn_toom52_mul (mp_ptr pp,
    if (flags & toom6_vm1_neg )
      {
        bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
-      flags ^= toom6_vm2_neg;
+      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
      }
    else
      {
@@ -163,7 +163,7 @@ mpn_toom52_mul (mp_ptr pp,
           if (mpn_cmp (bsm1, b1, n) < 0)
             {
               mpn_sub_n (bsm2, b1, bsm1, n);
-             flags ^= toom6_vm2_neg;
+             flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
             }
           else
             {
@@ -176,7 +176,7 @@ mpn_toom52_mul (mp_ptr pp,
             {
               mpn_sub_n (bsm2, b1, bsm1, t);
               MPN_ZERO (bsm2 + t, n - t);
-             flags ^= toom6_vm2_neg;
+             flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
             }
           else
             {
@@ -186,7 +186,7 @@ mpn_toom52_mul (mp_ptr pp,
      }
  
    /* Compute as1 and asm1.  */
-  flags ^= toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2);
+  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2));
  
    ASSERT (as1[n] <= 4);
    ASSERT (bs1[n] <= 1);
diff --git a/mpn/generic/toom53_mul.c b/mpn/generic/toom53_mul.c

index 8a0807a4c6d37c88ebaf85bf571c46e2316f8d01..d0a9ab439062681a9aafdb81ac8d37d4ab165ae8 100644 (file)
--- a/mpn/generic/toom53_mul.c
+++ b/mpn/generic/toom53_mul.c
@@ -10,7 +10,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -96,10 +96,10 @@ mpn_toom53_mul (mp_ptr pp,
    gp = pp;
  
    /* Compute as1 and asm1.  */
-  flags = toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp);
+  flags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp));
  
    /* Compute as2 and asm2. */
-  flags |= toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp);
+  flags = (enum toom7_flags) (flags | toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp));
  
    /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
       = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4  */
@@ -134,7 +134,7 @@ mpn_toom53_mul (mp_ptr pp,
      {
        bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1;
        bsm1[n] = 0;
-      flags ^= toom7_w3_neg;
+      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
      }
    else
      {
@@ -147,7 +147,7 @@ mpn_toom53_mul (mp_ptr pp,
      {
        mpn_sub_n (bsm1, b1, bs1, n);
        bsm1[n] = 0;
-      flags ^= toom7_w3_neg;
+      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
      }
    else
      {
@@ -178,7 +178,7 @@ mpn_toom53_mul (mp_ptr pp,
    if (mpn_cmp (bs2, gp, n+1) < 0)
      {
        ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, gp, bs2, n+1));
-      flags ^= toom7_w1_neg;
+      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
      }
    else
      {
@@ -188,7 +188,7 @@ mpn_toom53_mul (mp_ptr pp,
    if (mpn_cmp (bs2, gp, n+1) < 0)
      {
        ASSERT_NOCARRY (mpn_sub_n (bsm2, gp, bs2, n+1));
-      flags ^= toom7_w1_neg;
+      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
      }
    else
      {
@@ -197,7 +197,7 @@ mpn_toom53_mul (mp_ptr pp,
    mpn_add_n (bs2, bs2, gp, n+1);
  #endif
  
-  /* Compute bsh = 4 b0 + 2 b1 + b0 = 2*(2*b0 + b1)+b0.  */
+  /* Compute bsh = 4 b0 + 2 b1 + b2 = 2*(2*b0 + b1)+b2.  */
  #if HAVE_NATIVE_mpn_addlsh1_n
    cy = mpn_addlsh1_n (bsh, b1, b0, n);
    if (t < n)
diff --git a/mpn/generic/toom54_mul.c b/mpn/generic/toom54_mul.c

new file mode 100644 (file)

index 0000000..afda6c2
--- /dev/null
+++ b/mpn/generic/toom54_mul.c
@@ -0,0 +1,132 @@
+/* Implementation of the algorithm for Toom-Cook 4.5-way.
+
+   Contributed to the GNU project by Marco Bodrato.
+
+   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Toom-4.5, the splitting 5x4 unbalanced version.
+   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+  <--s-><--n--><--n--><--n--><--n-->
+   ____ ______ ______ ______ ______
+  |_a4_|__a3__|__a2__|__a1__|__a0__|
+         |b3_|__b2__|__b1__|__b0__|
+         <-t-><--n--><--n--><--n-->
+
+*/
+#define TOOM_54_MUL_N_REC(p, a, b, n, ws)              \
+  do { mpn_mul_n (p, a, b, n);                         \
+  } while (0)
+
+#define TOOM_54_MUL_REC(p, a, na, b, nb, ws)           \
+  do { mpn_mul (p, a, na, b, nb);                      \
+  } while (0)
+
+void
+mpn_toom54_mul (mp_ptr pp,
+               mp_srcptr ap, mp_size_t an,
+               mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+  mp_size_t n, s, t;
+  int sign;
+
+  /***************************** decomposition *******************************/
+#define a4  (ap + 4 * n)
+#define b3  (bp + 3 * n)
+
+  ASSERT (an >= bn);
+  n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+
+  s = an - 4 * n;
+  t = bn - 3 * n;
+
+  ASSERT (0 < s && s <= n);
+  ASSERT (0 < t && t <= n);
+  /* Required by mpn_toom_interpolate_8pts. */
+  ASSERT ( s + t >= n );
+  ASSERT ( s + t > 4);
+  ASSERT ( n > 2);
+
+#define   r8    pp                             /* 2n   */
+#define   r7    scratch                                /* 3n+1 */
+#define   r5    (pp + 3*n)                     /* 3n+1 */
+#define   v0    (pp + 3*n)                     /* n+1 */
+#define   v1    (pp + 4*n+1)                   /* n+1 */
+#define   v2    (pp + 5*n+2)                   /* n+1 */
+#define   v3    (pp + 6*n+3)                   /* n+1 */
+#define   r3    (scratch + 3 * n + 1)          /* 3n+1 */
+#define   r1    (pp + 7*n)                     /* s+t <= 2*n */
+#define   ws    (scratch + 6 * n + 2)          /* ??? */
+
+  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+     need all of them, when DO_mpn_sublsh_n usea a scratch  */
+  /********************** evaluation and recursive calls *********************/
+  /* $\pm4$ */
+  sign = mpn_toom_eval_pm2exp (v2, v0, 4, ap, n, s, 2, pp)
+       ^ mpn_toom_eval_pm2exp (v3, v1, 3, bp, n, t, 2, pp);
+  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+  TOOM_54_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
+
+  /* $\pm1$ */
+  sign = mpn_toom_eval_pm1 (v2, v0, 4, ap, n, s,    pp)
+       ^ mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
+  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+  TOOM_54_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+  /* $\pm2$ */
+  sign = mpn_toom_eval_pm2 (v2, v0, 4, ap, n, s, pp)
+       ^ mpn_toom_eval_dgr3_pm2 (v3, v1, bp, n, t, pp);
+  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+  TOOM_54_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
+
+  /* A(0)*B(0) */
+  TOOM_54_MUL_N_REC(pp, ap, bp, n, ws);
+
+  /* Infinity */
+  if (s > t) {
+    TOOM_54_MUL_REC(r1, a4, s, b3, t, ws);
+  } else {
+    TOOM_54_MUL_REC(r1, b3, t, a4, s, ws);
+  };
+
+  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a4
+#undef b3
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}
diff --git a/mpn/generic/toom62_mul.c b/mpn/generic/toom62_mul.c

index c01cfba2c7c505ef2dacf94ad84a0f6c965178c5..bf55c9211ca53397260b97ecbcf5d94f13a5a80f 100644 (file)
--- a/mpn/generic/toom62_mul.c
+++ b/mpn/generic/toom62_mul.c
@@ -10,7 +10,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -97,10 +97,10 @@ mpn_toom62_mul (mp_ptr pp,
    gp = pp;
  
    /* Compute as1 and asm1.  */
-  aflags = toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp);
+  aflags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp));
  
    /* Compute as2 and asm2. */
-  aflags |= toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp);
+  aflags = (enum toom7_flags) (aflags | toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp));
  
    /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
       = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5  */
@@ -144,7 +144,7 @@ mpn_toom62_mul (mp_ptr pp,
        else
         {
           cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
-         bflags = 0;
+         bflags = (enum toom7_flags) 0;
         }
        bs1[n] = cy >> 1;
  #else
@@ -157,7 +157,7 @@ mpn_toom62_mul (mp_ptr pp,
        else
         {
           mpn_sub_n (bsm1, b0, b1, n);
-         bflags = 0;
+         bflags = (enum toom7_flags) 0;
         }
  #endif
      }
@@ -173,7 +173,7 @@ mpn_toom62_mul (mp_ptr pp,
        else
         {
           mpn_sub (bsm1, b0, n, b1, t);
-         bflags = 0;
+         bflags = (enum toom7_flags) 0;
         }
      }
  
@@ -183,7 +183,7 @@ mpn_toom62_mul (mp_ptr pp,
    if (bflags & toom7_w3_neg)
      {
        bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
-      bflags |= toom7_w1_neg;
+      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
      }
    else
      {
@@ -194,7 +194,7 @@ mpn_toom62_mul (mp_ptr pp,
             {
               ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, t));
               MPN_ZERO (bsm2 + t, n + 1 - t);
-             bflags |= toom7_w1_neg;
+             bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
             }
           else
             {
@@ -207,18 +207,18 @@ mpn_toom62_mul (mp_ptr pp,
           if (mpn_cmp (bsm1, b1, n) < 0)
             {
               ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, n));
-             bflags |= toom7_w1_neg;
+             bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
             }
           else
             {
-             ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, n));
+             ASSERT_NOCARRY (mpn_sub_n (bsm2, bsm1, b1, n));
             }
           bsm2[n] = 0;
         }
      }
  
-  /* Compute bsh, recycling bs1 and bsm1. bsh=bs1+b0;  */
-  mpn_add (bsh, bs1, n + 1, b0, n);
+  /* Compute bsh, recycling bs1. bsh=bs1+b0;  */
+  bsh[n] = bs1[n] + mpn_add_n (bsh, bs1, b0, n);
  
    ASSERT (as1[n] <= 5);
    ASSERT (bs1[n] <= 1);
@@ -293,7 +293,7 @@ mpn_toom62_mul (mp_ptr pp,
    if (s > t)  mpn_mul (vinf, a5, s, b1, t);
    else        mpn_mul (vinf, b1, t, a5, s);
  
-  mpn_toom_interpolate_7pts (pp, n, aflags ^ bflags,
+  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) (aflags ^ bflags),
                              vm2, vm1, v2, vh, s + t, scratch_out);
  
    TMP_FREE;
diff --git a/mpn/generic/toom6h_mul.c b/mpn/generic/toom6h_mul.c

index 91ff8330ef0e836436bd126546ce2bbb5408e48c..7090e41d8518b42b9189d3ca1fd508e6c784ce7b 100644 (file)
--- a/mpn/generic/toom6h_mul.c
+++ b/mpn/generic/toom6h_mul.c
@@ -6,7 +6,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -48,26 +48,37 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
  #endif
  
-#define TOOM6H_MUL_N_REC(p, a, b, n, ws)                               \
+#define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)                        \
    do {                                                                 \
      if (MAYBE_mul_basecase                                             \
-       && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))                   \
+       && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {                 \
        mpn_mul_basecase (p, a, n, b, n);                                        \
-    else if (MAYBE_mul_toom22                                          \
-            && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))              \
+      if (f)                                                           \
+       mpn_mul_basecase (p2, a2, n, b2, n);                            \
+    } else if (MAYBE_mul_toom22                                                \
+              && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {          \
        mpn_toom22_mul (p, a, n, b, n, ws);                              \
-    else if (MAYBE_mul_toom33                                          \
-            && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))              \
+      if (f)                                                           \
+       mpn_toom22_mul (p2, a2, n, b2, n, ws);                          \
+    } else if (MAYBE_mul_toom33                                                \
+              && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {          \
        mpn_toom33_mul (p, a, n, b, n, ws);                              \
-    else if (! MAYBE_mul_toom6h                                                \
-            || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))              \
+      if (f)                                                           \
+       mpn_toom33_mul (p2, a2, n, b2, n, ws);                          \
+    } else if (! MAYBE_mul_toom6h                                      \
+              || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {          \
        mpn_toom44_mul (p, a, n, b, n, ws);                              \
-    else                                                               \
+      if (f)                                                           \
+       mpn_toom44_mul (p2, a2, n, b2, n, ws);                          \
+    } else {                                                           \
        mpn_toom6h_mul (p, a, n, b, n, ws);                              \
+      if (f)                                                           \
+       mpn_toom6h_mul (p2, a2, n, b2, n, ws);                          \
+    }                                                                  \
    } while (0)
  
  #define TOOM6H_MUL_REC(p, a, na, b, nb, ws)            \
-  do { mpn_mul (p, a, na, b, nb);                      \
+  do { mpn_mul (p, a, na, b, nb);                      \
    } while (0)
  
  /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
@@ -92,41 +103,50 @@ mpn_toom6h_mul   (mp_ptr pp,
  
    /***************************** decomposition *******************************/
  
-  ASSERT( an >= bn);
+  ASSERT (an >= bn);
    /* Can not handle too much unbalancement */
-  ASSERT( bn >= 42 );
+  ASSERT (bn >= 42);
    /* Can not handle too much unbalancement */
-  ASSERT((an*3 <  bn * 8) || ( bn >= 46 && an*6 <  bn * 17 ));
+  ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
  
    /* Limit num/den is a rational number between
       (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
  #define LIMIT_numerator (18)
  #define LIMIT_denominat (17)
  
-  if( an * LIMIT_denominat < LIMIT_numerator * bn ) /* is 6*... < 6*... */
-    { p = q = 6; }
-  else if( an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn )
-    { p = 7; q = 6; }
-  else if( an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn )
-    { p = 7; q = 5; }
-  else if( an * LIMIT_numerator < LIMIT_denominat * 2 * bn )  /* is 4*... < 8*... */
-    { p = 8; q = 5; }
-  else if( an * LIMIT_denominat < LIMIT_numerator * 2 * bn )  /* is 4*... < 8*... */
-    { p = 8; q = 4; }
-  else
-    { p = 9; q = 4; }
-
-  half = (p ^ q) & 1;
-  n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
-  p--; q--;
-
-  s = an - p * n;
-  t = bn - q * n;
-
-  /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
-  if (half) { /* Recover from badly chosen splitting */
-    if (s<1) {p--; s+=n; half=0;}
-    else if (t<1) {q--; t+=n; half=0;}
+  if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
+    {
+      n = 1 + (an - 1) / (size_t) 6;
+      p = q = 5;
+      half = 0;
+
+      s = an - 5 * n;
+      t = bn - 5 * n;
+    }
+  else {
+    if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
+      { p = 7; q = 6; }
+    else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
+      { p = 7; q = 5; }
+    else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
+      { p = 8; q = 5; }
+    else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
+      { p = 8; q = 4; }
+    else
+      { p = 9; q = 4; }
+
+    half = (p ^ q) & 1;
+    n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+    p--; q--;
+
+    s = an - p * n;
+    t = bn - q * n;
+
+    /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
+    if (half) { /* Recover from badly chosen splitting */
+      if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+      else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
+    }
    }
  #undef LIMIT_numerator
  #undef LIMIT_denominat
@@ -160,39 +180,39 @@ mpn_toom6h_mul   (mp_ptr pp,
    /* $\pm1/2$ */
    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
          mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
-  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
-  TOOM6H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
  
    /* $\pm1$ */
    sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
-  if (q == 3)
+  if (UNLIKELY (q == 3))
      sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
    else
      sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
-  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
-  TOOM6H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(1)*B(1) */
+  /* A(-1)*B(-1) */ /* A(1)*B(1) */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
  
    /* $\pm4$ */
    sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
          mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
-  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
-  TOOM6H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
+  /* A(-4)*B(-4) */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
    mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
  
    /* $\pm1/4$ */
    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
          mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
-  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
-  TOOM6H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
  
    /* $\pm2$ */
    sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
          mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
-  TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
-  TOOM6H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
+  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
  
  #undef v0
@@ -202,11 +222,11 @@ mpn_toom6h_mul   (mp_ptr pp,
  #undef wse
  
    /* A(0)*B(0) */
-  TOOM6H_MUL_N_REC(pp, ap, bp, n, wsi);
+  TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
  
    /* Infinity */
-  if( half != 0) {
-    if(s>t) {
+  if (UNLIKELY (half != 0)) {
+    if (s > t) {
        TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
      } else {
        TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
diff --git a/mpn/generic/toom8_sqr.c b/mpn/generic/toom8_sqr.c

index e098d2e263ade76a0174ad313d1cf1c56bc9c3a3..d2208aa2c4b990f948f27f2805fe6cd58c333576 100644 (file)
--- a/mpn/generic/toom8_sqr.c
+++ b/mpn/generic/toom8_sqr.c
@@ -6,7 +6,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -82,25 +82,32 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
  #endif
  
-#define TOOM8_SQR_REC(p, a, n, ws)                                     \
+#define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws)                          \
    do {                                                                 \
      if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase              \
-       || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))                   \
+       || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) {                 \
        mpn_sqr_basecase (p, a, n);                                      \
-    else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2               \
-            || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))              \
+      if (f) mpn_sqr_basecase (p2, a2, n);                             \
+    } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2             \
+            || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) {            \
        mpn_toom2_sqr (p, a, n, ws);                                     \
-    else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3               \
-            || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))              \
+      if (f) mpn_toom2_sqr (p2, a2, n, ws);                            \
+    } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3             \
+            || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) {            \
        mpn_toom3_sqr (p, a, n, ws);                                     \
-    else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4               \
-            || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)))              \
+      if (f) mpn_toom3_sqr (p2, a2, n, ws);                            \
+    } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4             \
+            || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) {            \
        mpn_toom4_sqr (p, a, n, ws);                                     \
-    else if (! MAYBE_sqr_above_toom6                                   \
-            || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))               \
+      if (f) mpn_toom4_sqr (p2, a2, n, ws);                            \
+    } else if (! MAYBE_sqr_above_toom6                                 \
+            || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) {             \
        mpn_toom6_sqr (p, a, n, ws);                                     \
-    else                                                               \
+      if (f) mpn_toom6_sqr (p2, a2, n, ws);                            \
+    } else {                                                           \
        mpn_toom8_sqr (p, a, n, ws);                                     \
+      if (f) mpn_toom8_sqr (p2, a2, n, ws);                            \
+    }                                                                  \
    } while (0)
  
  void
@@ -139,51 +146,51 @@ mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
    /********************** evaluation and recursive calls *********************/
    /* $\pm1/8$ */
    mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
-  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */
-  TOOM8_SQR_REC(r7, v2, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */
+  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+  TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
    mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
  
    /* $\pm1/4$ */
    mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
-  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
-  TOOM8_SQR_REC(r5, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+  TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
    mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
  
    /* $\pm2$ */
    mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
-  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
-  TOOM8_SQR_REC(r3, v2, n + 1, wse); /* A(+2)*B(+2) */
+  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+  TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
    mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
  
    /* $\pm8$ */
    mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
-  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-8)*B(-8) */
-  TOOM8_SQR_REC(r1, v2, n + 1, wse); /* A(+8)*B(+8) */
+  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+  TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
    mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
  
    /* $\pm1/2$ */
    mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
-  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
-  TOOM8_SQR_REC(r6, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+  TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
    mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
  
    /* $\pm1$ */
    mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
-  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
-  TOOM8_SQR_REC(r4, v2, n + 1, wse); /* A(1)*B(1) */
+  /* A(-1)*B(-1) */ /* A(1)*B(1) */
+  TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
    mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
  
    /* $\pm4$ */
    mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
-  TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
-  TOOM8_SQR_REC(r2, v2, n + 1, wse); /* A(+4)*B(+4) */
+  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+  TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
    mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
  
  #undef v0
  #undef v2
  
    /* A(0)*B(0) */
-  TOOM8_SQR_REC(pp, ap, n, wse);
+  TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
  
    mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
  
diff --git a/mpn/generic/toom8h_mul.c b/mpn/generic/toom8h_mul.c

index c73cf6f5c0632bbb7a7f0b0792b85aed3b57c70f..2d8bddc773fdb54ea521e6d719fb2e19e6f2800f 100644 (file)
--- a/mpn/generic/toom8h_mul.c
+++ b/mpn/generic/toom8h_mul.c
@@ -6,7 +6,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -60,30 +60,36 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
    (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
  #endif
  
-#define TOOM8H_MUL_N_REC(p, a, b, n, ws)                               \
+#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)                        \
    do {                                                                 \
      if (MAYBE_mul_basecase                                             \
-       && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))                   \
+       && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {                 \
        mpn_mul_basecase (p, a, n, b, n);                                        \
-    else if (MAYBE_mul_toom22                                          \
-            && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))              \
+      if (f) mpn_mul_basecase (p2, a2, n, b2, n);                      \
+    } else if (MAYBE_mul_toom22                                                \
+            && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {            \
        mpn_toom22_mul (p, a, n, b, n, ws);                              \
-    else if (MAYBE_mul_toom33                                          \
-            && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))              \
+      if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);                    \
+    } else if (MAYBE_mul_toom33                                                \
+            && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {            \
        mpn_toom33_mul (p, a, n, b, n, ws);                              \
-    else if (MAYBE_mul_toom44                                          \
-            && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))              \
+      if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);                    \
+    } else if (MAYBE_mul_toom44                                                \
+            && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {            \
        mpn_toom44_mul (p, a, n, b, n, ws);                              \
-    else if (! MAYBE_mul_toom8h                                                \
-            || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))              \
+      if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);                    \
+    } else if (! MAYBE_mul_toom8h                                      \
+            || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {            \
        mpn_toom6h_mul (p, a, n, b, n, ws);                              \
-    else                                                               \
+      if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);                    \
+    } else {                                                           \
        mpn_toom8h_mul (p, a, n, b, n, ws);                              \
+      if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);                    \
+    }                                                                  \
    } while (0)
  
  #define TOOM8H_MUL_REC(p, a, na, b, nb, ws)            \
-  do { mpn_mul (p, a, na, b, nb);                      \
-  } while (0)
+  do { mpn_mul (p, a, na, b, nb); } while (0)
  
  /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
     With: an >= bn >= 86, an*5 <  bn * 11.
@@ -111,11 +117,10 @@ mpn_toom8h_mul   (mp_ptr pp,
    /* Can not handle too small operands */
    ASSERT (bn >= 86);
    /* Can not handle too much unbalancement */
-  ASSERT (an*4 <= bn*13);
-  ASSERT (GMP_NUMB_BITS > 12*3 || an*4 <= bn*12);
-  ASSERT (GMP_NUMB_BITS > 11*3 || an*5 <= bn*11);
-  ASSERT (GMP_NUMB_BITS > 10*3 || an*6 <= bn*10);
-  ASSERT (GMP_NUMB_BITS >  9*3 || an*7 <= bn* 9);
+  ASSERT (an <= bn*4);
+  ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
+  ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
+  ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
  
    /* Limit num/den is a rational number between
       (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
@@ -127,8 +132,8 @@ mpn_toom8h_mul   (mp_ptr pp,
        half = 0;
        n = 1 + ((an - 1)>>3);
        p = q = 7;
-      s = an - p * n;
-      t = bn - q * n;
+      s = an - 7 * n;
+      t = bn - 7 * n;
      }
    else
      {
@@ -147,7 +152,7 @@ mpn_toom8h_mul   (mp_ptr pp,
        else if (GMP_NUMB_BITS <= 11*3 ||
                an * 4 < 9 * bn)
         { p =11; q = 5; }
-      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn )  /* is 4*... <12*... */
+      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
         { p =12; q = 5; }
        else if (GMP_NUMB_BITS <= 12*3 ||
                an * 9 < 28 * bn )  /* is 4*... <12*... */
@@ -163,8 +168,8 @@ mpn_toom8h_mul   (mp_ptr pp,
        t = bn - q * n;
  
        if(half) { /* Recover from badly chosen splitting */
-       if (s<1) {p--; s+=n; half=0;}
-       else if (t<1) {q--; t+=n; half=0;}
+       if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+       else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
        }
      }
  #undef LIMIT_numerator
@@ -202,53 +207,53 @@ mpn_toom8h_mul   (mp_ptr pp,
    /* $\pm1/8$ */
    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
          mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
-  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */
-  TOOM8H_MUL_N_REC(r7, v2, v3, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */
+  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
  
    /* $\pm1/4$ */
    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
          mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
-  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
-  TOOM8H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
  
    /* $\pm2$ */
    sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
          mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
-  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
-  TOOM8H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
+  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
  
    /* $\pm8$ */
    sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
          mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
-  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-8)*B(-8) */
-  TOOM8H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+8)*B(+8) */
+  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
  
    /* $\pm1/2$ */
    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
          mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
-  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
-  TOOM8H_MUL_N_REC(r6, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
  
    /* $\pm1$ */
    sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
-  if (q == 3)
+  if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
      sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
    else
      sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
-  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
-  TOOM8H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(1)*B(1) */
+  /* A(-1)*B(-1) */ /* A(1)*B(1) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
  
    /* $\pm4$ */
    sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
          mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
-  TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
-  TOOM8H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
+  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
    mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
  
  #undef v0
@@ -258,11 +263,11 @@ mpn_toom8h_mul   (mp_ptr pp,
  #undef wse
  
    /* A(0)*B(0) */
-  TOOM8H_MUL_N_REC(pp, ap, bp, n, wsi);
+  TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
  
    /* Infinity */
-  if( half != 0) {
-    if(s>t) {
+  if (UNLIKELY (half != 0)) {
+    if (s > t) {
        TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
      } else {
        TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
diff --git a/mpn/generic/toom_eval_pm2.c b/mpn/generic/toom_eval_pm2.c

index 7795b0bc0e761753f336e1b1a6e8c53c180ec783..7f410d56e1668f2c82ee1b6c0bcfefbecfb7624b 100644 (file)
--- a/mpn/generic/toom_eval_pm2.c
+++ b/mpn/generic/toom_eval_pm2.c
@@ -43,7 +43,7 @@ do {                                  \
  } while (0)
  #else
  /* The following is not a general substitute for addlsh2.
-   It is correct if d == b, but it is not if d == a.   */
+   It is correct if d == b, but it is not if d == a.  */
  #define DO_addlsh2(d, a, b, n, cy)     \
  do {                                   \
    (cy) <<= 2;                          \
diff --git a/mpn/generic/toom_interpolate_12pts.c b/mpn/generic/toom_interpolate_12pts.c

index 57becc3cc995c2df90d849b540e6ef917dd60585..e4a765bed6d2db55bfd39a8b22f66c6665730a0f 100644 (file)
--- a/mpn/generic/toom_interpolate_12pts.c
+++ b/mpn/generic/toom_interpolate_12pts.c
@@ -6,7 +6,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -240,7 +240,7 @@ mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5,
    mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
    DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
  #endif
-  /* A division by 2835x4 followsi. Warning: the operand can be negative! */
+  /* A division by 2835x4 follows. Warning: the operand can be negative! */
    mpn_divexact_by2835x4(r4, r4, n3p1);
    if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
      r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
diff --git a/mpn/generic/toom_interpolate_16pts.c b/mpn/generic/toom_interpolate_16pts.c

index 36ed15dab71fef11814c3845e00e2223e070e97e..445479cd4d321cd6bffba4ea06d70e34ff67cedb 100644 (file)
--- a/mpn/generic/toom_interpolate_16pts.c
+++ b/mpn/generic/toom_interpolate_16pts.c
@@ -378,7 +378,7 @@ mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r
    mpn_divexact_by255x188513325(r7, r7, n3p1);
  
    mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
-  /* A division by 2835x64 followsi. Warning: the operand can be negative! */
+  /* A division by 2835x64 follows. Warning: the operand can be negative! */
    mpn_divexact_by2835x64(r5, r5, n3p1);
    if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
      r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
@@ -395,7 +395,7 @@ mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r
    DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
    DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
  #endif
-  /* A division by 255x4 followsi. Warning: the operand can be negative! */
+  /* A division by 255x4 follows. Warning: the operand can be negative! */
    mpn_divexact_by255x4(r6, r6, n3p1);
    if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
      r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
diff --git a/mpn/generic/toom_interpolate_5pts.c b/mpn/generic/toom_interpolate_5pts.c

index 1806127940bd956fe41e6b162f9c850e854c329a..8416b641c6479a385948bf1efb0ca5225a51572c 100644 (file)
--- a/mpn/generic/toom_interpolate_5pts.c
+++ b/mpn/generic/toom_interpolate_5pts.c
@@ -126,8 +126,8 @@ mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
       result is v2 >= 0 */
    saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
    vinf[0] = vinf0;       /* Set the right value for vinf0                     */
-#ifdef HAVE_NATIVE_mpn_sublsh1_n
-  cy = mpn_sublsh1_n (v2, v2, vinf, twor);
+#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
+  cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
  #else
    /* Overwrite unused vm1 */
    cy = mpn_lshift (vm1, vinf, twor, 1);
diff --git a/mpn/generic/toom_interpolate_6pts.c b/mpn/generic/toom_interpolate_6pts.c

index fc9ee230ee94c8ed39cdd3b6c04abd825d0e2075..542fb2a610fa39afdab0bb9c079a3d73edb76df7 100644 (file)
--- a/mpn/generic/toom_interpolate_6pts.c
+++ b/mpn/generic/toom_interpolate_6pts.c
@@ -6,7 +6,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -167,11 +167,11 @@ mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
    MPN_INCR_U (pp + 3 * n + 1, n, cy);
  
    /* W2 -= W0<<2 */
-#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n
-#if HAVE_NATIVE_mpn_sublsh2_n
-  cy = mpn_sublsh2_n(w2, w2, w0, w0n);
+#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+  cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
  #else
-  cy = mpn_sublsh_n(w2, w2, w0, w0n, 2);
+  cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
  #endif
  #else
    /* {W4,2*n+1} is now free and can be overwritten. */
@@ -210,7 +210,7 @@ mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
    embankment = w0[w0n - 1] - 1;
    w0[w0n - 1] = 1;
    if (LIKELY (w0n > n)) {
-    if ( LIKELY(cy4 > cy6) )
+    if (cy4 > cy6)
        MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
      else
        MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
diff --git a/mpn/generic/toom_interpolate_8pts.c b/mpn/generic/toom_interpolate_8pts.c

index b11af25c2b938ba264bd31cc1e346d111cdf2660..8e9825a6e1800cb88788082f7d7261be31cca22e 100644 (file)
--- a/mpn/generic/toom_interpolate_8pts.c
+++ b/mpn/generic/toom_interpolate_8pts.c
@@ -6,7 +6,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -54,18 +54,24 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #endif
  #endif
  
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
+#else
+#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
+#endif
+
  #if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,src,n,s)
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
  #else
  static mp_limb_t
  DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
  {
-#if USE_MUL_1
+#if USE_MUL_1 && 0
    return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
  #else
    mp_limb_t __cy;
    __cy = mpn_lshift (ws,src,n,s);
-  return    __cy + mpn_sub_n (dst,dst,ws,n);
+  return __cy + mpn_sub_n (dst,dst,ws,n);
  #endif
  }
  #endif
@@ -146,7 +152,7 @@ mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
  
    ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
  
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r5, r3, 3 * n + 1, 2, ws));
+  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
  
    /* last interpolation steps... */
    /* ... are mixed with recomposition */
@@ -187,9 +193,9 @@ mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
  
    cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
    MPN_INCR_U (r3 + 2*n, n + 1, cy);
-  cy = r3[3*n] + mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
+  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
    if (LIKELY(spt != n))
-    MPN_INCR_U (pp + 8*n, spt - n, cy);
+    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
    else
-    ASSERT (cy == 0);
+    ASSERT (r3[3*n] | cy == 0);
  }
diff --git a/mpn/generic/trialdiv.c b/mpn/generic/trialdiv.c

index c8f3c5d37666176d9321b3818d7f821af49d9f29..e61bc69fd53c1e44553049ec4b9e811936130d6a 100644 (file)
--- a/mpn/generic/trialdiv.c
+++ b/mpn/generic/trialdiv.c
@@ -6,7 +6,7 @@
     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
  
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,20 +21,26 @@ or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  License for more details.
  
  You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.  */
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /*
-   Fast, division-free trial division for GMP.
-
-   This function will find the first (smallest) factor represented in
+   This function finds the first (smallest) factor represented in
     trialdivtab.h.  It does not stop the factoring effort just because it has
     reached some sensible limit, such as the square root of the input number.
  
     The caller can limit the factoring effort by passing NPRIMES.  The function
-   well then divide to *at least* that limit.  A position which only
-   mpn_trialdiv can make sense of is returned in the WHERE parameter.  It can
-   be used for restarting the factoring effort; the first call should pass 0
-   here.
+   will then divide until that limit, or perhaps a few primes more.  A position
+   which only mpn_trialdiv can make sense of is returned in the WHERE
+   parameter.  It can be used for restarting the factoring effort; the first
+   call should pass 0 here.
+
+   Input:        1. A non-negative number T = {tp,tn}
+                 2. NPRIMES as described above,
+                 3. *WHERE as described above.
+   Output:       1. *WHERE updated as described above.
+                 2. Return value is non-zero if we found a factor, else zero
+                    To get the actual prime factor, compute the mod B inverse
+                    of the return value.
  */
  
  #include "gmp.h"
@@ -58,10 +64,6 @@ struct gmp_primes_ptab {
  
  #define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0]))
  
-/* Attempt to find a factor of T using trial division.
-   Input: A non-negative number T.
-   Output: non-zero if we found a factor, zero otherwise.  To get the actual
-   prime factor, compute the mod B inverse of the return value.  */
  /* FIXME: We could optimize out one of the outer loop conditions if we
     had a final ptab entry with a huge nd field.  */
  mp_limb_t
@@ -80,12 +82,7 @@ mpn_trialdiv (mp_srcptr tp, mp_size_t tn, mp_size_t nprimes, int *where)
        ppp = gmp_primes_ptab[i].ppp;
        cps = gmp_primes_ptab[i].cps;
  
-#if __GNU_MP_VERSION == 4 && __GNU_MP_VERSION_MINOR < 4
-      if (tn < 4)
-       r = mpn_mod_1 (tp, tn, ppp); /* FIXME */
-      else
-#endif
-       r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
+      r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
  
        idx = gmp_primes_ptab[i].idx;
        np = gmp_primes_ptab[i].np;
diff --git a/mpn/ia64/add_n_sub_n.asm b/mpn/ia64/add_n_sub_n.asm

new file mode 100644 (file)

index 0000000..58dd45d
--- /dev/null
+++ b/mpn/ia64/add_n_sub_n.asm
@@ -0,0 +1,297 @@
+dnl  IA-64 mpn_add_n_sub_n -- mpn parallel addition and subtraction.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      ?
+C Itanium 2:    2.25
+
+C INPUT PARAMETERS
+define(`sp', `r32')
+define(`dp', `r33')
+define(`up', `r34')
+define(`vp', `r35')
+define(`n',  `r36')
+
+C Some useful aliases for registers we use
+define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')
+define(`v0',`r20') define(`v1',`r21') define(`v2',`r22') define(`v3',`r23')
+define(`s0',`r24') define(`s1',`r25') define(`s2',`r26') define(`s3',`r27')
+define(`d0',`r28') define(`d1',`r29') define(`d2',`r30') define(`d3',`r31')
+define(`up0',`up')
+define(`up1',`r14')
+define(`vp0',`vp')
+define(`vp1',`r15')
+
+define(`cmpltu',  `cmp.ltu')
+define(`cmpeqor', `cmp.eq.or')
+
+ASM_START()
+PROLOGUE(mpn_add_n_sub_n)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',`
+       addp4   sp = 0, sp              C                               M I
+       addp4   dp = 0, dp              C                               M I
+       addp4   up = 0, up              C                               M I
+       addp4   vp = 0, vp              C                               M I
+       zxt4    n = n                   C                               I
+       ;;
+')
+
+       and     r9 = 3, n               C                               M I
+       mov.i   r2 = ar.lc              C                               I0
+       add     up1 = 8, up0            C                               M I
+       add     vp1 = 8, vp0            C                               M I
+       add     r8 = -2, n              C                               M I
+       add     r10 = 256, up           C                               M I
+       ;;
+       shr.u   r8 = r8, 2              C                               I0
+       cmp.eq  p10, p0 = 0, r9         C                               M I
+       cmp.eq  p11, p0 = 2, r9         C                               M I
+       cmp.eq  p12, p0 = 3, r9         C                               M I
+       add     r11 = 256, vp           C                               M I
+       ;;
+       mov.i   ar.lc = r8              C                               I0
+  (p10)        br      L(b0)                   C                               B
+  (p11)        br      L(b2)                   C                               B
+  (p12)        br      L(b3)                   C                               B
+
+L(b1): ld8     u3 = [up0], 8           C                               M01
+       add     up1 = 8, up1            C                               M I
+       cmpltu  p14, p15 = 4, n         C                               M I
+       ld8     v3 = [vp0], 8           C                               M01
+       add     vp1 = 8, vp1            C                               M I
+       ;;
+       add     s3 = u3, v3             C                               M I
+       sub     d3 = u3, v3             C                               M I
+       mov     r8 = 0                  C                               M I
+       ;;
+       cmpltu  p9, p0 = s3, v3         C  carry from add3              M I
+       cmpltu  p13, p0 = u3, v3        C borrow from sub3              M I
+  (p15)        br      L(cj1)                  C                               B
+       st8     [sp] = s3, 8            C                               M23
+       st8     [dp] = d3, 8            C                               M23
+       br      L(c0)                   C                               B
+
+L(b0): cmp.ne  p9, p0 = r0, r0         C                               M I
+       cmp.ne  p13, p0 = r0, r0        C                               M I
+L(c0): ld8     u0 = [up0], 16          C                               M01
+       ld8     u1 = [up1], 16          C                               M01
+       ;;
+       ld8     v0 = [vp0], 16          C                               M01
+       ld8     v1 = [vp1], 16          C                               M01
+       ;;
+       ld8     u2 = [up0], 16          C                               M01
+       ld8     u3 = [up1], 16          C                               M01
+       ;;
+       ld8     v2 = [vp0], 16          C                               M01
+       ld8     v3 = [vp1], 16          C                               M01
+       ;;
+       add     s0 = u0, v0             C                               M I
+       add     s1 = u1, v1             C                               M I
+       sub     d0 = u0, v0             C                               M I
+       sub     d1 = u1, v1             C                               M I
+       ;;
+       cmpltu  p6, p0 = s0, v0         C  carry from add0              M I
+       cmpltu  p7, p0 = s1, v1         C  carry from add1              M I
+       cmpltu  p10, p0 = u0, v0        C borrow from sub0              M I
+       cmpltu  p11, p0 = u1, v1        C borrow from sub1              M I
+       ;;
+       nop     0                       C
+       br.cloop.dptk   L(top)          C                               B
+       br      L(end)                  C                               B
+
+L(b3): ld8     u1 = [up0], 8           C                               M01
+       add     up1 = 8, up1            C                               M I
+       ld8     v1 = [vp0], 8           C                               M01
+       ;;
+       add     vp1 = 8, vp1            C                               M I
+       add     s1 = u1, v1             C                               M I
+       sub     d1 = u1, v1             C                               M I
+       ;;
+       cmpltu  p7, p0 = s1, v1         C  carry from add1              M I
+       cmpltu  p11, p0 = u1, v1        C borrow from sub1              M I
+       ;;
+       st8     [sp] = s1, 8            C                               M23
+       st8     [dp] = d1, 8            C                               M23
+       br      L(c2)                   C                               B
+
+       ALIGN(32)
+L(b2): cmp.ne  p7, p0 = r0, r0         C                               M I
+       cmp.ne  p11, p0 = r0, r0        C                               M I
+       nop     0
+L(c2): ld8     u2 = [up0], 16          C                               M01
+       ld8     u3 = [up1], 16          C                               M01
+       cmpltu  p14, p0 = 4, n          C                               M I
+       ;;
+       ld8     v2 = [vp0], 16          C                               M01
+       ld8     v3 = [vp1], 16          C                               M01
+  (p14)        br      L(gt4)                  C                               B
+       ;;
+       add     s2 = u2, v2             C                               M I
+       add     s3 = u3, v3             C                               M I
+       sub     d2 = u2, v2             C                               M I
+       sub     d3 = u3, v3             C                               M I
+       ;;
+       cmpltu  p8, p0 = s2, v2         C  carry from add0              M I
+       cmpltu  p9, p0 = s3, v3         C  carry from add3              M I
+       cmpltu  p12, p0 = u2, v2        C borrow from sub2              M I
+       cmpltu  p13, p0 = u3, v3        C borrow from sub3              M I
+       br      L(cj2)                  C                               B
+       ;;
+L(gt4):        ld8     u0 = [up0], 16          C                               M01
+       ld8     u1 = [up1], 16          C                               M01
+       ;;
+       ld8     v0 = [vp0], 16          C                               M01
+       ld8     v1 = [vp1], 16          C                               M01
+       ;;
+       add     s2 = u2, v2             C                               M I
+       add     s3 = u3, v3             C                               M I
+       sub     d2 = u2, v2             C                               M I
+       sub     d3 = u3, v3             C                               M I
+       ;;
+       cmpltu  p8, p0 = s2, v2         C  carry from add0              M I
+       cmpltu  p9, p0 = s3, v3         C  carry from add1              M I
+       cmpltu  p12, p0 = u2, v2        C borrow from sub0              M I
+       cmpltu  p13, p0 = u3, v3        C borrow from sub1              M I
+       br.cloop.dptk   L(mid)          C                               B
+
+       ALIGN(32)
+L(top):
+       ld8     u0 = [up0], 16          C                               M01
+       ld8     u1 = [up1], 16          C                               M01
+   (p9)        cmpeqor p6, p0 = -1, s0         C                               M I
+   (p9)        add     s0 = 1, s0              C                               M I
+  (p13)        cmpeqor p10, p0 = 0, d0         C                               M I
+  (p13)        add     d0 = -1, d0             C                               M I
+       ;;
+       ld8     v0 = [vp0], 16          C                               M01
+       ld8     v1 = [vp1], 16          C                               M01
+   (p6)        cmpeqor p7, p0 = -1, s1         C                               M I
+   (p6)        add     s1 = 1, s1              C                               M I
+  (p10)        cmpeqor p11, p0 = 0, d1         C                               M I
+  (p10)        add     d1 = -1, d1             C                               M I
+       ;;
+       st8     [sp] = s0, 8            C                               M23
+       st8     [dp] = d0, 8            C                               M23
+       add     s2 = u2, v2             C                               M I
+       add     s3 = u3, v3             C                               M I
+       sub     d2 = u2, v2             C                               M I
+       sub     d3 = u3, v3             C                               M I
+       ;;
+       st8     [sp] = s1, 8            C                               M23
+       st8     [dp] = d1, 8            C                               M23
+       cmpltu  p8, p0 = s2, v2         C  carry from add2              M I
+       cmpltu  p9, p0 = s3, v3         C  carry from add3              M I
+       cmpltu  p12, p0 = u2, v2        C borrow from sub2              M I
+       cmpltu  p13, p0 = u3, v3        C borrow from sub3              M I
+       ;;
+L(mid):
+       ld8     u2 = [up0], 16          C                               M01
+       ld8     u3 = [up1], 16          C                               M01
+   (p7)        cmpeqor p8, p0 = -1, s2         C                               M I
+   (p7)        add     s2 = 1, s2              C                               M I
+  (p11)        cmpeqor p12, p0 = 0, d2         C                               M I
+  (p11)        add     d2 = -1, d2             C                               M I
+       ;;
+       ld8     v2 = [vp0], 16          C                               M01
+       ld8     v3 = [vp1], 16          C                               M01
+   (p8)        cmpeqor p9, p0 = -1, s3         C                               M I
+   (p8)        add     s3 = 1, s3              C                               M I
+  (p12)        cmpeqor p13, p0 = 0, d3         C                               M I
+  (p12)        add     d3 = -1, d3             C                               M I
+       ;;
+       st8     [sp] = s2, 8            C                               M23
+       st8     [dp] = d2, 8            C                               M23
+       add     s0 = u0, v0             C                               M I
+       add     s1 = u1, v1             C                               M I
+       sub     d0 = u0, v0             C                               M I
+       sub     d1 = u1, v1             C                               M I
+       ;;
+       st8     [sp] = s3, 8            C                               M23
+       st8     [dp] = d3, 8            C                               M23
+       cmpltu  p6, p0 = s0, v0         C  carry from add0              M I
+       cmpltu  p7, p0 = s1, v1         C  carry from add1              M I
+       cmpltu  p10, p0 = u0, v0        C borrow from sub0              M I
+       cmpltu  p11, p0 = u1, v1        C borrow from sub1              M I
+       ;;
+       lfetch  [r10], 32               C                               M?
+       lfetch  [r11], 32               C                               M?
+       br.cloop.dptk   L(top)          C                               B
+       ;;
+
+L(end):
+       nop     0
+       nop     0
+   (p9)        cmpeqor p6, p0 = -1, s0         C                               M I
+   (p9)        add     s0 = 1, s0              C                               M I
+  (p13)        cmpeqor p10, p0 = 0, d0         C                               M I
+  (p13)        add     d0 = -1, d0             C                               M I
+       ;;
+       nop     0
+       nop     0
+   (p6)        cmpeqor p7, p0 = -1, s1         C                               M I
+   (p6)        add     s1 = 1, s1              C                               M I
+  (p10)        cmpeqor p11, p0 = 0, d1         C                               M I
+  (p10)        add     d1 = -1, d1             C                               M I
+       ;;
+       st8     [sp] = s0, 8            C                               M23
+       st8     [dp] = d0, 8            C                               M23
+       add     s2 = u2, v2             C                               M I
+       add     s3 = u3, v3             C                               M I
+       sub     d2 = u2, v2             C                               M I
+       sub     d3 = u3, v3             C                               M I
+       ;;
+       st8     [sp] = s1, 8            C                               M23
+       st8     [dp] = d1, 8            C                               M23
+       cmpltu  p8, p0 = s2, v2         C  carry from add2              M I
+       cmpltu  p9, p0 = s3, v3         C  carry from add3              M I
+       cmpltu  p12, p0 = u2, v2        C borrow from sub2              M I
+       cmpltu  p13, p0 = u3, v3        C borrow from sub3              M I
+       ;;
+L(cj2):
+   (p7)        cmpeqor p8, p0 = -1, s2         C                               M I
+   (p7)        add     s2 = 1, s2              C                               M I
+  (p11)        cmpeqor p12, p0 = 0, d2         C                               M I
+  (p11)        add     d2 = -1, d2             C                               M I
+       mov     r8 = 0                  C                               M I
+       nop     0
+       ;;
+       st8     [sp] = s2, 8            C                               M23
+       st8     [dp] = d2, 8            C                               M23
+   (p8)        cmpeqor p9, p0 = -1, s3         C                               M I
+   (p8)        add     s3 = 1, s3              C                               M I
+  (p12)        cmpeqor p13, p0 = 0, d3         C                               M I
+  (p12)        add     d3 = -1, d3             C                               M I
+       ;;
+L(cj1):
+   (p9)        mov     r8 = 2                  C                               M I
+       ;;
+       mov.i   ar.lc = r2              C                               I0
+  (p13)        add     r8 = 1, r8              C                               M I
+       st8     [sp] = s3               C                               M23
+       st8     [dp] = d3               C                               M23
+       br.ret.sptk.many b0             C                               B
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/addmul_1.asm b/mpn/ia64/addmul_1.asm

index 6cd9d2b755d53005a869bae4d8eee9f33f9951ba..27c229de888b61805b3e75f93b267c4765f794aa 100644 (file)
--- a/mpn/ia64/addmul_1.asm
+++ b/mpn/ia64/addmul_1.asm
@@ -1,6 +1,8 @@
  dnl  IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
  dnl  result to a second limb vector.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software
  dnl  Foundation, Inc.
  
diff --git a/mpn/ia64/addmul_2.asm b/mpn/ia64/addmul_2.asm

index 2c258022aea21b17f1c716f4d4f2bfa63253cf93..5d06fa0da3d0291814c288809628e03bb4096a52 100644 (file)
--- a/mpn/ia64/addmul_2.asm
+++ b/mpn/ia64/addmul_2.asm
@@ -1,7 +1,9 @@
  dnl  IA-64 mpn_addmul_2 -- Multiply a n-limb number with a 2-limb number and
  dnl  add the result to a (n+1)-limb number.
  
-dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2004, 2005, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -24,16 +26,11 @@ C         cycles/limb
  C Itanium:    3.65
  C Itanium 2:  1.625
  
-C Note that this is very similar to mul_2.asm.  If you change this file,
-C please change that file too.
-
  C TODO
  C  * Clean up variable names, and try to decrease the number of distinct
  C    registers used.
-C  * Cleanup feed-in code to not require zeroing several registers.
-C  * Make sure we don't depend on uninitialized predicate registers.
-C  * We currently cross-jump very aggressively, at the expense of a few cycles
-C    per operation.  Consider changing that.
+C  * Clean up feed-in code to not require zeroing several registers.
+C  * Make sure we don't depend on uninitialised predicate registers.
  C  * Could perhaps save a few cycles by using 1 c/l carry propagation in
  C    wind-down code.
  C  * Ultimately rewrite.  The problem with this code is that it first uses a
@@ -94,564 +91,607 @@ define(`ry',`f50')
  define(`uy',`f51')
  
  ASM_START()
+PROLOGUE(mpn_addmul_2s)
+       .prologue
+       .save   ar.lc, r2
+       .body
+
+ifdef(`HAVE_ABI_32',`
+.mmi;          addp4   rp = 0, rp              C                       M I
+               addp4   up = 0, up              C                       M I
+               addp4   vp = 0, vp              C                       M I
+.mmi;          nop     1
+               nop     1
+               zxt4    n = n                   C                       I
+       ;;')
+
+.mmi;          ldf8    ux = [up], 8            C                       M
+               ldf8    v0 = [vp], 8            C                       M
+               mov     r2 = ar.lc              C                       I0
+.mmi;          ldf8    rx = [rp], 8            C                       M
+               and     r14 = 3, n              C                       M I
+               add     n = -2, n               C                       M I
+       ;;
+.mmi;          ldf8    uy = [up], 8            C                       M
+               ldf8    v1 = [vp]               C                       M
+               shr.u   n = n, 2                C                       I0
+.mmi;          ldf8    ry = [rp], -8           C                       M
+               cmp.eq  p14, p0 = 1, r14        C                       M I
+               cmp.eq  p11, p0 = 2, r14        C                       M I
+       ;;
+.mmi;          add     srp = 16, rp            C                       M I
+               cmp.eq  p15, p0 = 3, r14        C                       M I
+               mov     ar.lc = n               C                       I0
+.bbb;  (p14)   br.dptk L(x01)                  C                       B
+       (p11)   br.dptk L(x10)                  C                       B
+       (p15)   br.dptk L(x11)                  C                       B
+       ;;
+
+L(x00):                cmp.ne  p6, p0 = r0, r0         C suppress initial xma pair
+               mov     fp2a_3 = f0
+               br      L(b00)
+L(x01):                cmp.ne  p14, p0 = r0, r0        C suppress initial xma pair
+               mov     fp2a_2 = f0
+               br      L(b01)
+L(x10):                cmp.ne  p11, p0 = r0, r0        C suppress initial xma pair
+               mov     fp2a_1 = f0
+               br      L(b10)
+L(x11):                cmp.ne  p15, p0 = r0, r0        C suppress initial xma pair
+               mov     fp2a_0 = f0
+               br      L(b11)
+
+EPILOGUE()
+
  PROLOGUE(mpn_addmul_2)
         .prologue
         .save   ar.lc, r2
         .body
  
-ifdef(`HAVE_ABI_32',
-`      addp4           rp = 0, rp              C                       M I
-       addp4           up = 0, up              C                       M I
-       addp4           vp = 0, vp              C                       M I
-       zxt4            n = n                   C                       I
+ifdef(`HAVE_ABI_32',`
+.mmi;          addp4   rp = 0, rp              C                       M I
+               addp4   up = 0, up              C                       M I
+               addp4   vp = 0, vp              C                       M I
+.mmi;          nop     1
+               nop     1
+               zxt4    n = n                   C                       I
         ;;')
  
-{.mmi          C 00
-       ldf8            ux = [up], 8            C                       M
-       ldf8            v0 = [vp], 8            C                       M
-       mov.i           r2 = ar.lc              C                       I0
-}{.mmi
-       ldf8            rx = [rp], 8            C                       M
-       and             r14 = 3, n              C                       M I
-       add             n = -2, n               C                       M I
-       ;;
-}{.mmi         C 01
-       ldf8            uy = [up], 8            C                       M
-       ldf8            v1 = [vp]               C                       M
-       shr.u           n = n, 2                C                       I0
-}{.mmi
-       ldf8            ry = [rp], -8           C                       M
-       cmp.eq          p10, p0 = 1, r14        C                       M I
-       cmp.eq          p11, p0 = 2, r14        C                       M I
-       ;;
-}{.mmi         C 02
-       add             srp = 16, rp            C                       M I
-       cmp.eq          p12, p0 = 3, r14        C                       M I
-       mov.i           ar.lc = n               C                       I0
-}{.bbb
-  (p10) br.dptk                .Lb01                   C                       B
-  (p11) br.dptk                .Lb10                   C                       B
-  (p12) br.dptk                .Lb11                   C                       B
-       ;;
-}
+.mmi;          ldf8    ux = [up], 8            C                       M
+               ldf8    v0 = [vp], 8            C                       M
+               mov     r2 = ar.lc              C                       I0
+.mmi;          ldf8    rx = [rp], 8            C                       M
+               and     r14 = 3, n              C                       M I
+               add     n = -2, n               C                       M I
+       ;;
+.mmi;          ldf8    uy = [up], 8            C                       M
+               ldf8    v1 = [vp]               C                       M
+               shr.u   n = n, 2                C                       I0
+.mmi;          ldf8    ry = [rp], -8           C                       M
+               cmp.eq  p14, p0 = 1, r14        C                       M I
+               cmp.eq  p11, p0 = 2, r14        C                       M I
+       ;;
+.mmi;          add     srp = 16, rp            C                       M I
+               cmp.eq  p15, p6 = 3, r14        C                       M I
+               mov     ar.lc = n               C                       I0
+.bbb;  (p14)   br.dptk L(b01)                  C                       B
+       (p11)   br.dptk L(b10)                  C                       B
+       (p15)   br.dptk L(b11)                  C                       B
+       ;;
  
         ALIGN(32)
-.Lb00: ldf8            r_1 = [srp], 8
-       ldf8            u_1 = [up], 8
-       mov             acc1_2 = 0
-       mov             pr1_2 = 0
-       mov             pr0_3 = 0
-       cmp.ne          p8, p9 = r0, r0
-       ;;
-       ldf8            r_2 = [srp], 8
-       xma.l           fp0b_3 = ux, v0, rx
-       cmp.ne          p12, p13 = r0, r0
-       ldf8            u_2 = [up], 8
-       xma.hu          fp1a_3 = ux, v0, rx
-       br.cloop.dptk   .grt4
-
-       xma.l           fp0b_0 = uy, v0, ry
-       xma.hu          fp1a_0 = uy, v0, ry
-       ;;
-       getf.sig        acc0 = fp0b_3
-       xma.l           fp1b_3 = ux, v1, fp1a_3
-       xma.hu          fp2a_3 = ux, v1, fp1a_3
-       ;;
-       xma.l           fp0b_1 = u_1, v0, r_1
-       xma.hu          fp1a_1 = u_1, v0, r_1
-       ;;
-       getf.sig        pr0_0 = fp0b_0
-       xma.l           fp1b_0 = uy, v1, fp1a_0
-       xma.hu          fp2a_0 = uy, v1, fp1a_0
-       ;;
-       getf.sig        pr1_3 = fp1b_3
-       getf.sig        acc1_3 = fp2a_3
-       xma.l           fp0b_2 = u_2, v0, r_2
-       xma.hu          fp1a_2 = u_2, v0, r_2
-       br              .Lcj4
-
-.grt4: xma.l           fp0b_0 = uy, v0, ry
-       xma.hu          fp1a_0 = uy, v0, ry
-       ;;
-       ldf8            r_3 = [srp], 8
-       getf.sig        acc0 = fp0b_3
-       xma.l           fp1b_3 = ux, v1, fp1a_3
-       ldf8            u_3 = [up], 8
-       xma.hu          fp2a_3 = ux, v1, fp1a_3
-       ;;
-       xma.l           fp0b_1 = u_1, v0, r_1
-       xma.hu          fp1a_1 = u_1, v0, r_1
-       ;;
-       ldf8            r_0 = [srp], 8
-       getf.sig        pr0_0 = fp0b_0
-       xma.l           fp1b_0 = uy, v1, fp1a_0
-       xma.hu          fp2a_0 = uy, v1, fp1a_0
-       ;;
-       ldf8            u_0 = [up], 8
-       getf.sig        pr1_3 = fp1b_3
-       ;;
-       getf.sig        acc1_3 = fp2a_3
-       xma.l           fp0b_2 = u_2, v0, r_2
-       xma.hu          fp1a_2 = u_2, v0, r_2
-       br              .LL00
+L(b00):
+.mmi;          ldf8    r_1 = [srp], 8
+               ldf8    u_1 = [up], 8
+               mov     acc1_2 = 0
+.mmi;          mov     pr1_2 = 0
+               mov     pr0_3 = 0
+               cmp.ne  p8, p9 = r0, r0
+       ;;
+.mfi;          ldf8    r_2 = [srp], 8
+               xma.l   fp0b_3 = ux, v0, rx
+               cmp.ne  p12, p13 = r0, r0
+.mfb;          ldf8    u_2 = [up], 8
+               xma.hu  fp1b_3 = ux, v0, rx
+               br.cloop.dptk   L(gt4)
+
+               xma.l   fp0b_0 = uy, v0, ry
+               xma.hu  fp1a_0 = uy, v0, ry
+       ;;
+               getfsig acc0 = fp0b_3
+       (p6)    xma.hu  fp2a_3 = ux, v1, fp1b_3         C suppressed for addmul_2s
+       (p6)    xma.l   fp1b_3 = ux, v1, fp1b_3         C suppressed for addmul_2s
+       ;;
+               xma.l   fp0b_1 = u_1, v0, r_1
+               xma.hu  fp1a_1 = u_1, v0, r_1
+       ;;
+               getfsig pr0_0 = fp0b_0
+               xma.l   fp1b_0 = uy, v1, fp1a_0
+               xma.hu  fp2a_0 = uy, v1, fp1a_0
+       ;;
+               getfsig pr1_3 = fp1b_3
+               getfsig acc1_3 = fp2a_3
+               xma.l   fp0b_2 = u_2, v0, r_2
+               xma.hu  fp1a_2 = u_2, v0, r_2
+               br      L(cj4)
+
+L(gt4):                xma.l   fp0b_0 = uy, v0, ry
+               xma.hu  fp1a_0 = uy, v0, ry
+       ;;
+               ldf8    r_3 = [srp], 8
+               getfsig acc0 = fp0b_3
+       (p6)    xma.hu  fp2a_3 = ux, v1, fp1b_3         C suppressed for addmul_2s
+               ldf8    u_3 = [up], 8
+       (p6)    xma.l   fp1b_3 = ux, v1, fp1b_3         C suppressed for addmul_2s
+       ;;
+               xma.l   fp0b_1 = u_1, v0, r_1
+               xma.hu  fp1a_1 = u_1, v0, r_1
+       ;;
+               ldf8    r_0 = [srp], 8
+               getfsig pr0_0 = fp0b_0
+               xma.l   fp1b_0 = uy, v1, fp1a_0
+               xma.hu  fp2a_0 = uy, v1, fp1a_0
+       ;;
+               ldf8    u_0 = [up], 8
+               getfsig pr1_3 = fp1b_3
+               xma.l   fp0b_2 = u_2, v0, r_2
+       ;;
+               getfsig acc1_3 = fp2a_3
+               xma.hu  fp1a_2 = u_2, v0, r_2
+               br      L(00)
  
  
         ALIGN(32)
-.Lb01: ldf8            r_0 = [srp], 8          C M
-       ldf8            u_0 = [up], 8           C M
-       mov             acc1_1 = 0              C M I
-       mov             pr1_1 = 0               C M I
-       mov             pr0_2 = 0               C M I
-       cmp.ne          p6, p7 = r0, r0         C M I
-       ;;
-       ldf8            r_1 = [srp], 8          C M
-       xma.l           fp0b_2 = ux, v0, rx     C F
-       cmp.ne          p10, p11 = r0, r0       C M I
-       ldf8            u_1 = [up], 8           C M
-       xma.hu          fp1a_2 = ux, v0, rx     C F
-       ;;
-       xma.l           fp0b_3 = uy, v0, ry     C F
-       xma.hu          fp1a_3 = uy, v0, ry     C F
-       ;;
-       getf.sig        acc0 = fp0b_2           C M
-       ldf8            r_2 = [srp], 8          C M
-       xma.l           fp1b_2 = ux, v1,fp1a_2  C F
-       xma.hu          fp2a_2 = ux, v1,fp1a_2  C F
-       ldf8            u_2 = [up], 8           C M
-       br.cloop.dptk   .grt5
-
-       xma.l           fp0b_0 = u_0, v0, r_0   C F
-       xma.hu          fp1a_0 = u_0, v0, r_0   C F
-       ;;
-       getf.sig        pr0_3 = fp0b_3          C M
-       xma.l           fp1b_3 = uy, v1,fp1a_3  C F
-       xma.hu          fp2a_3 = uy, v1,fp1a_3  C F
-       ;;
-       getf.sig        pr1_2 = fp1b_2          C M
-       getf.sig        acc1_2 = fp2a_2         C M
-       xma.l           fp0b_1 = u_1, v0, r_1   C F
-       xma.hu          fp1a_1 = u_1, v0, r_1   C F
-       br              .Lcj5
-
-.grt5: xma.l           fp0b_0 = u_0, v0, r_0
-       xma.hu          fp1a_0 = u_0, v0, r_0
-       ;;
-       getf.sig        pr0_3 = fp0b_3
-       ldf8            r_3 = [srp], 8
-       xma.l           fp1b_3 = uy, v1, fp1a_3
-       xma.hu          fp2a_3 = uy, v1, fp1a_3
-       ;;
-       ldf8            u_3 = [up], 8
-       getf.sig        pr1_2 = fp1b_2
-       ;;
-       getf.sig        acc1_2 = fp2a_2
-       xma.l           fp0b_1 = u_1, v0, r_1
-       xma.hu          fp1a_1 = u_1, v0, r_1
-       br              .LL01
+L(b01):
+.mmi;          ldf8    r_0 = [srp], 8          C M
+               ldf8    u_0 = [up], 8           C M
+               mov     acc1_1 = 0              C M I
+.mmi;          mov     pr1_1 = 0               C M I
+               mov     pr0_2 = 0               C M I
+               cmp.ne  p6, p7 = r0, r0         C M I
+       ;;
+.mfi;          ldf8    r_1 = [srp], 8          C M
+               xma.l   fp0b_2 = ux, v0, rx     C F
+               cmp.ne  p10, p11 = r0, r0       C M I
+.mfi;          ldf8    u_1 = [up], 8           C M
+               xma.hu  fp1b_2 = ux, v0, rx     C F
+               nop     1
+       ;;
+               xma.l   fp0b_3 = uy, v0, ry     C F
+               xma.hu  fp1a_3 = uy, v0, ry     C F
+       ;;
+.mmf;          getfsig acc0 = fp0b_2           C M
+               ldf8    r_2 = [srp], 8          C M
+       (p14)   xma.hu  fp2a_2 = ux, v1,fp1b_2  C F     suppressed for addmul_2s
+.mfb;          ldf8    u_2 = [up], 8           C M
+       (p14)   xma.l   fp1b_2 = ux, v1,fp1b_2  C F     suppressed for addmul_2s
+               br.cloop.dptk   L(gt5)
+
+               xma.l   fp0b_0 = u_0, v0, r_0   C F
+               xma.hu  fp1a_0 = u_0, v0, r_0   C F
+       ;;
+               getfsig pr0_3 = fp0b_3          C M
+               xma.l   fp1b_3 = uy, v1,fp1a_3  C F
+               xma.hu  fp2a_3 = uy, v1,fp1a_3  C F
+       ;;
+               getfsig pr1_2 = fp1b_2          C M
+               getfsig acc1_2 = fp2a_2         C M
+               xma.l   fp0b_1 = u_1, v0, r_1   C F
+               xma.hu  fp1a_1 = u_1, v0, r_1   C F
+               br      L(cj5)
+
+L(gt5):                xma.l   fp0b_0 = u_0, v0, r_0
+               xma.hu  fp1a_0 = u_0, v0, r_0
+       ;;
+               getfsig pr0_3 = fp0b_3
+               ldf8    r_3 = [srp], 8
+               xma.l   fp1b_3 = uy, v1, fp1a_3
+               xma.hu  fp2a_3 = uy, v1, fp1a_3
+       ;;
+               ldf8    u_3 = [up], 8
+               getfsig pr1_2 = fp1b_2
+               xma.l   fp0b_1 = u_1, v0, r_1
+       ;;
+               getfsig acc1_2 = fp2a_2
+               xma.hu  fp1a_1 = u_1, v0, r_1
+               br      L(01)
  
  
         ALIGN(32)
-.Lb10:         C 03
-       br.cloop.dptk   .grt2
-               C 04
-               C 05
-               C 06
-       xma.l           fp0b_1 = ux, v0, rx
-       xma.hu          fp1a_1 = ux, v0, rx
-       ;;      C 07
-       xma.l           fp0b_2 = uy, v0, ry
-       xma.hu          fp1a_2 = uy, v0, ry
-       ;;      C 08
-               C 09
-               C 10
-       stf8            [rp] = fp0b_1, 8
-       xma.l           fp1b_1 = ux, v1, fp1a_1
-       xma.hu          fp2a_1 = ux, v1, fp1a_1
-       ;;      C 11
-       getf.sig        acc0 = fp0b_2
-       xma.l           fp1b_2 = uy, v1, fp1a_2
-       xma.hu          fp2a_2 = uy, v1, fp1a_2
-       ;;      C 12
-               C 13
-               C 14
-       getf.sig        pr1_1 = fp1b_1
-               C 15
-       getf.sig        acc1_1 = fp2a_1
-               C 16
-       getf.sig        pr1_2 = fp1b_2
-               C 17
-       getf.sig        r8 = fp2a_2
-       ;;      C 18
-               C 19
-       add             s0 = pr1_1, acc0
-       ;;      C 20
-       st8             [rp] = s0, 8
-       cmp.ltu         p8, p9 = s0, pr1_1
-       sub             r31 = -1, acc1_1
-       ;;      C 21
-       .pred.rel "mutex", p8, p9
-  (p8) add             acc0 = pr1_2, acc1_1, 1
-  (p9) add             acc0 = pr1_2, acc1_1
-  (p8) cmp.leu         p10, p0 = r31, pr1_2
-  (p9) cmp.ltu         p10, p0 = r31, pr1_2
-       ;;      C 22
-       st8             [rp] = acc0, 8
-       mov.i           ar.lc = r2
-  (p10)        add             r8 = 1, r8
-       br.ret.sptk.many b0
-
-
-.grt2: ldf8            r_3 = [srp], 8
-       ldf8            u_3 = [up], 8
-       mov             acc1_0 = 0
-       ;;
-       ldf8            r_0 = [srp], 8
-       xma.l           fp0b_1 = ux, v0, rx
-       mov             pr1_0 = 0
-       ldf8            u_0 = [up], 8
-       xma.hu          fp1a_1 = ux, v0, rx
-       mov             pr0_1 = 0
-       ;;
-       xma.l           fp0b_2 = uy, v0, ry
-       xma.hu          fp1a_2 = uy, v0, ry
-       ;;
-       getf.sig        acc0 = fp0b_1
-       ldf8            r_1 = [srp], 8
-       xma.l           fp1b_1 = ux, v1, fp1a_1
-       xma.hu          fp2a_1 = ux, v1, fp1a_1
-       ;;
-       ldf8            u_1 = [up], 8
-       xma.l           fp0b_3 = u_3, v0, r_3
-       xma.hu          fp1a_3 = u_3, v0, r_3
-       ;;
-       getf.sig        pr0_2 = fp0b_2
-       ldf8            r_2 = [srp], 8
-       xma.l           fp1b_2 = uy, v1, fp1a_2
-       xma.hu          fp2a_2 = uy, v1, fp1a_2
-       ;;
-       ldf8            u_2 = [up], 8
-       getf.sig        pr1_1 = fp1b_1
-       ;;
-       getf.sig        acc1_1 = fp2a_1
-       xma.l           fp0b_0 = u_0, v0, r_0
-       cmp.ne          p8, p9 = r0, r0
-       cmp.ne          p12, p13 = r0, r0
-       xma.hu          fp1a_0 = u_0, v0, r_0
-       br              .LL10
+L(b10):                br.cloop.dptk   L(gt2)
+               xma.l   fp0b_1 = ux, v0, rx
+               xma.hu  fp1b_1 = ux, v0, rx
+       ;;
+               xma.l   fp0b_2 = uy, v0, ry
+               xma.hu  fp1a_2 = uy, v0, ry
+       ;;
+               stf8    [rp] = fp0b_1, 8
+       (p11)   xma.hu  fp2a_1 = ux, v1, fp1b_1         C suppressed for addmul_2s
+       (p11)   xma.l   fp1b_1 = ux, v1, fp1b_1         C suppressed for addmul_2s
+       ;;
+               getfsig acc0 = fp0b_2
+               xma.l   fp1b_2 = uy, v1, fp1a_2
+               xma.hu  fp2a_2 = uy, v1, fp1a_2
+       ;;
+               getfsig pr1_1 = fp1b_1
+               getfsig acc1_1 = fp2a_1
+               mov     ar.lc = r2
+               getfsig pr1_2 = fp1b_2
+               getfsig r8 = fp2a_2
+       ;;
+               add     s0 = pr1_1, acc0
+       ;;
+               st8     [rp] = s0, 8
+               cmp.ltu p8, p9 = s0, pr1_1
+               sub     r31 = -1, acc1_1
+       ;;
+               .pred.rel "mutex", p8, p9
+       (p8)    add     acc0 = pr1_2, acc1_1, 1
+       (p9)    add     acc0 = pr1_2, acc1_1
+       (p8)    cmp.leu p10, p0 = r31, pr1_2
+       (p9)    cmp.ltu p10, p0 = r31, pr1_2
+       ;;
+               st8     [rp] = acc0, 8
+       (p10)   add     r8 = 1, r8
+               br.ret.sptk.many b0
+
+
+L(gt2):
+.mmi;          ldf8    r_3 = [srp], 8
+               ldf8    u_3 = [up], 8
+               mov     acc1_0 = 0
+       ;;
+.mfi;          ldf8    r_0 = [srp], 8
+               xma.l   fp0b_1 = ux, v0, rx
+               mov     pr1_0 = 0
+.mfi;          ldf8    u_0 = [up], 8
+               xma.hu  fp1b_1 = ux, v0, rx
+               mov     pr0_1 = 0
+       ;;
+               xma.l   fp0b_2 = uy, v0, ry
+               xma.hu  fp1a_2 = uy, v0, ry
+       ;;
+               getfsig acc0 = fp0b_1
+               ldf8    r_1 = [srp], 8
+       (p11)   xma.hu  fp2a_1 = ux, v1, fp1b_1         C suppressed for addmul_2s
+       (p11)   xma.l   fp1b_1 = ux, v1, fp1b_1         C suppressed for addmul_2s
+       ;;
+               ldf8    u_1 = [up], 8
+               xma.l   fp0b_3 = u_3, v0, r_3
+               xma.hu  fp1a_3 = u_3, v0, r_3
+       ;;
+               getfsig pr0_2 = fp0b_2
+               ldf8    r_2 = [srp], 8
+               xma.l   fp1b_2 = uy, v1, fp1a_2
+               xma.hu  fp2a_2 = uy, v1, fp1a_2
+       ;;
+               ldf8    u_2 = [up], 8
+               getfsig pr1_1 = fp1b_1
+       ;;
+.mfi;          getfsig acc1_1 = fp2a_1
+               xma.l   fp0b_0 = u_0, v0, r_0
+               cmp.ne  p8, p9 = r0, r0
+.mfb;          cmp.ne  p12, p13 = r0, r0
+               xma.hu  fp1a_0 = u_0, v0, r_0
+               br.cloop.sptk.clr       L(top)
+               br.many L(end)
  
  
         ALIGN(32)
-.Lb11: mov             acc1_3 = 0
-       mov             pr1_3 = 0
-       mov             pr0_0 = 0
-       cmp.ne          p6, p7 = r0, r0
+L(b11):                ldf8    r_2 = [srp], 8
+               mov     pr1_3 = 0
+               mov     pr0_0 = 0
         ;;
-       ldf8            r_2 = [srp], 8
-       ldf8            u_2 = [up], 8
-       br.cloop.dptk   .grt3
+               ldf8    u_2 = [up], 8
+               mov     acc1_3 = 0
+               br.cloop.dptk   L(gt3)
         ;;
-       xma.l           fp0b_0 = ux, v0, rx
-       xma.hu          fp1a_0 = ux, v0, rx
+               cmp.ne  p6, p7 = r0, r0
+               xma.l   fp0b_0 = ux, v0, rx
+               xma.hu  fp1b_0 = ux, v0, rx
         ;;
-       cmp.ne          p10, p11 = r0, r0
-       xma.l           fp0b_1 = uy, v0, ry
-       xma.hu          fp1a_1 = uy, v0, ry
+               cmp.ne  p10, p11 = r0, r0
+               xma.l   fp0b_1 = uy, v0, ry
+               xma.hu  fp1a_1 = uy, v0, ry
         ;;
-       getf.sig        acc0 = fp0b_0
-       xma.l           fp1b_0 = ux, v1, fp1a_0
-       xma.hu          fp2a_0 = ux, v1, fp1a_0
+               getfsig acc0 = fp0b_0
+       (p15)   xma.hu  fp2a_0 = ux, v1, fp1b_0         C suppressed for addmul_2s
+       (p15)   xma.l   fp1b_0 = ux, v1, fp1b_0         C suppressed for addmul_2s
         ;;
-       xma.l           fp0b_2 = u_2, v0, r_2
-       xma.hu          fp1a_2 = u_2, v0, r_2
+               xma.l   fp0b_2 = uy, v1, r_2
+               xma.hu  fp1a_2 = uy, v1, r_2
         ;;
-       getf.sig        pr0_1 = fp0b_1
-       xma.l           fp1b_1 = uy, v1, fp1a_1
-       xma.hu          fp2a_1 = uy, v1, fp1a_1
+               getfsig pr0_1 = fp0b_1
+               xma.l   fp1b_1 = u_2, v0, fp1a_1
+               xma.hu  fp2a_1 = u_2, v0, fp1a_1
         ;;
-       getf.sig        pr1_0 = fp1b_0
-       getf.sig        acc1_0 = fp2a_0
-       br              .Lcj3
+               getfsig pr1_0 = fp1b_0
+               getfsig acc1_0 = fp2a_0
+               br      L(cj3)
  
-.grt3: ldf8            r_3 = [srp], 8
-       xma.l           fp0b_0 = ux, v0, rx
-       cmp.ne          p10, p11 = r0, r0
-       ldf8            u_3 = [up], 8
-       xma.hu          fp1a_0 = ux, v0, rx
+L(gt3):                ldf8    r_3 = [srp], 8
+               xma.l   fp0b_0 = ux, v0, rx
+               cmp.ne  p10, p11 = r0, r0
+               ldf8    u_3 = [up], 8
+               xma.hu  fp1b_0 = ux, v0, rx
+               cmp.ne  p6, p7 = r0, r0
         ;;
-       xma.l           fp0b_1 = uy, v0, ry
-       xma.hu          fp1a_1 = uy, v0, ry
+               xma.l   fp0b_1 = uy, v0, ry
+               xma.hu  fp1a_1 = uy, v0, ry
         ;;
-       getf.sig        acc0 = fp0b_0
-       ldf8            r_0 = [srp], 8
-       xma.l           fp1b_0 = ux, v1, fp1a_0
-       ldf8            u_0 = [up], 8
-       xma.hu          fp2a_0 = ux, v1, fp1a_0
+               getfsig acc0 = fp0b_0
+               ldf8    r_0 = [srp], 8
+       (p15)   xma.hu  fp2a_0 = ux, v1, fp1b_0         C suppressed for addmul_2s
+               ldf8    u_0 = [up], 8
+       (p15)   xma.l   fp1b_0 = ux, v1, fp1b_0         C suppressed for addmul_2s
         ;;
-       xma.l           fp0b_2 = u_2, v0, r_2
-       xma.hu          fp1a_2 = u_2, v0, r_2
+               xma.l   fp0b_2 = u_2, v0, r_2
+               xma.hu  fp1a_2 = u_2, v0, r_2
         ;;
-       getf.sig        pr0_1 = fp0b_1
-       ldf8            r_1 = [srp], 8
-       xma.l           fp1b_1 = uy, v1, fp1a_1
-       xma.hu          fp2a_1 = uy, v1, fp1a_1
+               getfsig pr0_1 = fp0b_1
+               ldf8    r_1 = [srp], 8
+               xma.l   fp1b_1 = uy, v1, fp1a_1
+               xma.hu  fp2a_1 = uy, v1, fp1a_1
         ;;
-       ldf8            u_1 = [up], 8
-       getf.sig        pr1_0 = fp1b_0
+               ldf8    u_1 = [up], 8
+               getfsig pr1_0 = fp1b_0
         ;;
-       getf.sig        acc1_0 = fp2a_0
-       xma.l           fp0b_3 = u_3, v0, r_3
-       xma.hu          fp1a_3 = u_3, v0, r_3
-       br              .LL11
+               getfsig acc1_0 = fp2a_0
+               xma.l   fp0b_3 = u_3, v0, r_3
+               xma.hu  fp1a_3 = u_3, v0, r_3
+               br      L(11)
  
  
  C *** MAIN LOOP START ***
         ALIGN(32)
-.Loop:                                         C 00
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr0_3 = fp0b_3
-       ldf8            r_3 = [srp], 8
-       xma.l           fp1b_3 = u_3, v1, fp1a_3
-  (p12)        add             s0 = pr1_0, acc0, 1
-  (p13)        add             s0 = pr1_0, acc0
-       xma.hu          fp2a_3 = u_3, v1, fp1a_3
+L(top):                                                C 00
+               .pred.rel "mutex", p12, p13
+               getfsig pr0_3 = fp0b_3
+               ldf8    r_3 = [srp], 8
+               xma.l   fp1b_3 = u_3, v1, fp1a_3
+       (p12)   add     s0 = pr1_0, acc0, 1
+       (p13)   add     s0 = pr1_0, acc0
+               xma.hu  fp2a_3 = u_3, v1, fp1a_3
         ;;                                      C 01
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       ldf8            u_3 = [up], 8
-       getf.sig        pr1_2 = fp1b_2
-  (p8) cmp.leu         p6, p7 = acc0, pr0_1
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
-  (p12)        cmp.leu         p10, p11 = s0, pr1_0
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+               ldf8    u_3 = [up], 8
+               getfsig pr1_2 = fp1b_2
+       (p8)    cmp.leu p6, p7 = acc0, pr0_1
+       (p9)    cmp.ltu p6, p7 = acc0, pr0_1
+       (p12)   cmp.leu p10, p11 = s0, pr1_0
+       (p13)   cmp.ltu p10, p11 = s0, pr1_0
         ;;                                      C 02
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_2 = fp2a_2
-       st8             [rp] = s0, 8
-       xma.l           fp0b_1 = u_1, v0, r_1
-  (p6) add             acc0 = pr0_2, acc1_0, 1
-  (p7) add             acc0 = pr0_2, acc1_0
-       xma.hu          fp1a_1 = u_1, v0, r_1
+               .pred.rel "mutex", p6, p7
+               getfsig acc1_2 = fp2a_2
+               st8     [rp] = s0, 8
+               xma.l   fp0b_1 = u_1, v0, r_1
+       (p6)    add     acc0 = pr0_2, acc1_0, 1
+       (p7)    add     acc0 = pr0_2, acc1_0
+               xma.hu  fp1a_1 = u_1, v0, r_1
         ;;                                      C 03
-.LL01:
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr0_0 = fp0b_0
-       ldf8            r_0 = [srp], 8
-       xma.l           fp1b_0 = u_0, v1, fp1a_0
-  (p10)        add             s0 = pr1_1, acc0, 1
-  (p11)        add             s0 = pr1_1, acc0
-       xma.hu          fp2a_0 = u_0, v1, fp1a_0
+L(01):
+               .pred.rel "mutex", p10, p11
+               getfsig pr0_0 = fp0b_0
+               ldf8    r_0 = [srp], 8
+               xma.l   fp1b_0 = u_0, v1, fp1a_0
+       (p10)   add     s0 = pr1_1, acc0, 1
+       (p11)   add     s0 = pr1_1, acc0
+               xma.hu  fp2a_0 = u_0, v1, fp1a_0
         ;;                                      C 04
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-       ldf8            u_0 = [up], 8
-       getf.sig        pr1_3 = fp1b_3
-  (p6) cmp.leu         p8, p9 = acc0, pr0_2
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
-  (p10)        cmp.leu         p12, p13 = s0, pr1_1
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
+               .pred.rel "mutex", p6, p7
+               .pred.rel "mutex", p10, p11
+               ldf8    u_0 = [up], 8
+               getfsig pr1_3 = fp1b_3
+       (p6)    cmp.leu p8, p9 = acc0, pr0_2
+       (p7)    cmp.ltu p8, p9 = acc0, pr0_2
+       (p10)   cmp.leu p12, p13 = s0, pr1_1
+       (p11)   cmp.ltu p12, p13 = s0, pr1_1
         ;;                                      C 05
-       .pred.rel "mutex", p8, p9
-       getf.sig        acc1_3 = fp2a_3
-       st8             [rp] = s0, 8
-       xma.l           fp0b_2 = u_2, v0, r_2
-  (p8) add             acc0 = pr0_3, acc1_1, 1
-  (p9) add             acc0 = pr0_3, acc1_1
-       xma.hu          fp1a_2 = u_2, v0, r_2
+               .pred.rel "mutex", p8, p9
+               getfsig acc1_3 = fp2a_3
+               st8     [rp] = s0, 8
+               xma.l   fp0b_2 = u_2, v0, r_2
+       (p8)    add     acc0 = pr0_3, acc1_1, 1
+       (p9)    add     acc0 = pr0_3, acc1_1
+               xma.hu  fp1a_2 = u_2, v0, r_2
         ;;                                      C 06
-.LL00:
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr0_1 = fp0b_1
-       ldf8            r_1 = [srp], 8
-       xma.l           fp1b_1 = u_1, v1, fp1a_1
-  (p12)        add             s0 = pr1_2, acc0, 1
-  (p13)        add             s0 = pr1_2, acc0
-       xma.hu          fp2a_1 = u_1, v1, fp1a_1
+L(00):
+               .pred.rel "mutex", p12, p13
+               getfsig pr0_1 = fp0b_1
+               ldf8    r_1 = [srp], 8
+               xma.l   fp1b_1 = u_1, v1, fp1a_1
+       (p12)   add     s0 = pr1_2, acc0, 1
+       (p13)   add     s0 = pr1_2, acc0
+               xma.hu  fp2a_1 = u_1, v1, fp1a_1
         ;;                                      C 07
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       ldf8            u_1 = [up], 8
-       getf.sig        pr1_0 = fp1b_0
-  (p8) cmp.leu         p6, p7 = acc0, pr0_3
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_3
-  (p12)        cmp.leu         p10, p11 = s0, pr1_2
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_2
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+               ldf8    u_1 = [up], 8
+               getfsig pr1_0 = fp1b_0
+       (p8)    cmp.leu p6, p7 = acc0, pr0_3
+       (p9)    cmp.ltu p6, p7 = acc0, pr0_3
+       (p12)   cmp.leu p10, p11 = s0, pr1_2
+       (p13)   cmp.ltu p10, p11 = s0, pr1_2
         ;;                                      C 08
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_0 = fp2a_0
-       st8             [rp] = s0, 8
-       xma.l           fp0b_3 = u_3, v0, r_3
-  (p6) add             acc0 = pr0_0, acc1_2, 1
-  (p7) add             acc0 = pr0_0, acc1_2
-       xma.hu          fp1a_3 = u_3, v0, r_3
+               .pred.rel "mutex", p6, p7
+               getfsig acc1_0 = fp2a_0
+               st8     [rp] = s0, 8
+               xma.l   fp0b_3 = u_3, v0, r_3
+       (p6)    add     acc0 = pr0_0, acc1_2, 1
+       (p7)    add     acc0 = pr0_0, acc1_2
+               xma.hu  fp1a_3 = u_3, v0, r_3
         ;;                                      C 09
-.LL11:
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr0_2 = fp0b_2
-       ldf8            r_2 = [srp], 8
-       xma.l           fp1b_2 = u_2, v1, fp1a_2
-  (p10)        add             s0 = pr1_3, acc0, 1
-  (p11)        add             s0 = pr1_3, acc0
-       xma.hu          fp2a_2 = u_2, v1, fp1a_2
+L(11):
+               .pred.rel "mutex", p10, p11
+               getfsig pr0_2 = fp0b_2
+               ldf8    r_2 = [srp], 8
+               xma.l   fp1b_2 = u_2, v1, fp1a_2
+       (p10)   add     s0 = pr1_3, acc0, 1
+       (p11)   add     s0 = pr1_3, acc0
+               xma.hu  fp2a_2 = u_2, v1, fp1a_2
         ;;                                      C 10
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-       ldf8            u_2 = [up], 8
-       getf.sig        pr1_1 = fp1b_1
-  (p6) cmp.leu         p8, p9 = acc0, pr0_0
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_0
-  (p10)        cmp.leu         p12, p13 = s0, pr1_3
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_3
+               .pred.rel "mutex", p6, p7
+               .pred.rel "mutex", p10, p11
+               ldf8    u_2 = [up], 8
+               getfsig pr1_1 = fp1b_1
+       (p6)    cmp.leu p8, p9 = acc0, pr0_0
+       (p7)    cmp.ltu p8, p9 = acc0, pr0_0
+       (p10)   cmp.leu p12, p13 = s0, pr1_3
+       (p11)   cmp.ltu p12, p13 = s0, pr1_3
         ;;                                      C 11
-       .pred.rel "mutex", p8, p9
-       getf.sig        acc1_1 = fp2a_1
-       st8             [rp] = s0, 8
-       xma.l           fp0b_0 = u_0, v0, r_0
-  (p8) add             acc0 = pr0_1, acc1_3, 1
-  (p9) add             acc0 = pr0_1, acc1_3
-       xma.hu          fp1a_0 = u_0, v0, r_0
-.LL10: br.cloop.dptk   .Loop                   C 12
+               .pred.rel "mutex", p8, p9
+               getfsig acc1_1 = fp2a_1
+               st8     [rp] = s0, 8
+               xma.l   fp0b_0 = u_0, v0, r_0
+       (p8)    add     acc0 = pr0_1, acc1_3, 1
+       (p9)    add     acc0 = pr0_1, acc1_3
+               xma.hu  fp1a_0 = u_0, v0, r_0
+L(10):         br.cloop.sptk.clr       L(top)                  C 12
         ;;
  C *** MAIN LOOP END ***
-
-.Lcj6:
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr0_3 = fp0b_3
-       xma.l           fp1b_3 = u_3, v1, fp1a_3
-  (p12)        add             s0 = pr1_0, acc0, 1
-  (p13)        add             s0 = pr1_0, acc0
-       xma.hu          fp2a_3 = u_3, v1, fp1a_3
-       ;;
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr1_2 = fp1b_2
-  (p8) cmp.leu         p6, p7 = acc0, pr0_1
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
-  (p12)        cmp.leu         p10, p11 = s0, pr1_0
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
-       ;;
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_2 = fp2a_2
-       st8             [rp] = s0, 8
-       xma.l           fp0b_1 = u_1, v0, r_1
-  (p6) add             acc0 = pr0_2, acc1_0, 1
-  (p7) add             acc0 = pr0_2, acc1_0
-       xma.hu          fp1a_1 = u_1, v0, r_1
-       ;;
-.Lcj5:
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr0_0 = fp0b_0
-       xma.l           fp1b_0 = u_0, v1, fp1a_0
-  (p10)        add             s0 = pr1_1, acc0, 1
-  (p11)        add             s0 = pr1_1, acc0
-       xma.hu          fp2a_0 = u_0, v1, fp1a_0
-       ;;
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr1_3 = fp1b_3
-  (p6) cmp.leu         p8, p9 = acc0, pr0_2
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
-  (p10)        cmp.leu         p12, p13 = s0, pr1_1
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
-       ;;
-       .pred.rel "mutex", p8, p9
-       getf.sig        acc1_3 = fp2a_3
-       st8             [rp] = s0, 8
-       xma.l           fp0b_2 = u_2, v0, r_2
-  (p8) add             acc0 = pr0_3, acc1_1, 1
-  (p9) add             acc0 = pr0_3, acc1_1
-       xma.hu          fp1a_2 = u_2, v0, r_2
-       ;;
-.Lcj4:
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr0_1 = fp0b_1
-       xma.l           fp1b_1 = u_1, v1, fp1a_1
-  (p12)        add             s0 = pr1_2, acc0, 1
-  (p13)        add             s0 = pr1_2, acc0
-       xma.hu          fp2a_1 = u_1, v1, fp1a_1
-       ;;
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr1_0 = fp1b_0
-  (p8) cmp.leu         p6, p7 = acc0, pr0_3
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_3
-  (p12)        cmp.leu         p10, p11 = s0, pr1_2
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_2
-       ;;
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_0 = fp2a_0
-       st8             [rp] = s0, 8
-  (p6) add             acc0 = pr0_0, acc1_2, 1
-  (p7) add             acc0 = pr0_0, acc1_2
-       ;;
-.Lcj3:
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr0_2 = fp0b_2
-       xma.l           fp1b_2 = u_2, v1, fp1a_2
-  (p10)        add             s0 = pr1_3, acc0, 1
-  (p11)        add             s0 = pr1_3, acc0
-       xma.hu          fp2a_2 = u_2, v1, fp1a_2
-       ;;
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr1_1 = fp1b_1
-  (p6) cmp.leu         p8, p9 = acc0, pr0_0
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_0
-  (p10)        cmp.leu         p12, p13 = s0, pr1_3
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_3
-       ;;
-       .pred.rel "mutex", p8, p9
-       getf.sig        acc1_1 = fp2a_1
-       st8             [rp] = s0, 8
-  (p8) add             acc0 = pr0_1, acc1_3, 1
-  (p9) add             acc0 = pr0_1, acc1_3
-       ;;
-.Lcj2:
-       .pred.rel "mutex", p12, p13
-  (p12)        add             s0 = pr1_0, acc0, 1
-  (p13)        add             s0 = pr1_0, acc0
-       ;;
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr1_2 = fp1b_2
-  (p8) cmp.leu         p6, p7 = acc0, pr0_1
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
-  (p12)        cmp.leu         p10, p11 = s0, pr1_0
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
-       ;;
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_2 = fp2a_2
-       st8             [rp] = s0, 8
-  (p6) add             acc0 = pr0_2, acc1_0, 1
-  (p7) add             acc0 = pr0_2, acc1_0
-       ;;
-       .pred.rel "mutex", p10, p11
-  (p10)        add             s0 = pr1_1, acc0, 1
-  (p11)        add             s0 = pr1_1, acc0
-       ;;
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-  (p6) cmp.leu         p8, p9 = acc0, pr0_2
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
-  (p10)        cmp.leu         p12, p13 = s0, pr1_1
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
-       ;;
-       .pred.rel "mutex", p8, p9
-       st8             [rp] = s0, 8
-  (p8) add             acc0 = pr1_2, acc1_1, 1
-  (p9) add             acc0 = pr1_2, acc1_1
-       ;;
-       .pred.rel "mutex", p8, p9
-  (p8) cmp.leu         p10, p11 = acc0, pr1_2
-  (p9) cmp.ltu         p10, p11 = acc0, pr1_2
-  (p12)        add             acc0 = 1, acc0
-       ;;
-       st8             [rp] = acc0, 8
-  (p12)        cmp.eq.or       p10, p0 = 0, acc0
-       mov             r8 = acc1_2
-       ;;
-       .pred.rel "mutex", p10, p11
-  (p10)        add             r8 = 1, r8
-       mov.i           ar.lc = r2
-       br.ret.sptk.many b0
+L(end):
+               .pred.rel "mutex", p12, p13
+.mfi;          getfsig pr0_3 = fp0b_3
+               xma.l   fp1b_3 = u_3, v1, fp1a_3
+       (p12)   add     s0 = pr1_0, acc0, 1
+.mfi;  (p13)   add     s0 = pr1_0, acc0
+               xma.hu  fp2a_3 = u_3, v1, fp1a_3
+               nop     1
+       ;;
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+.mmi;          getfsig pr1_2 = fp1b_2
+               st8     [rp] = s0, 8
+       (p8)    cmp.leu p6, p7 = acc0, pr0_1
+.mmi;  (p9)    cmp.ltu p6, p7 = acc0, pr0_1
+       (p12)   cmp.leu p10, p11 = s0, pr1_0
+       (p13)   cmp.ltu p10, p11 = s0, pr1_0
+       ;;
+               .pred.rel "mutex", p6, p7
+.mfi;          getfsig acc1_2 = fp2a_2
+               xma.l   fp0b_1 = u_1, v0, r_1
+               nop     1
+.mmf;  (p6)    add     acc0 = pr0_2, acc1_0, 1
+       (p7)    add     acc0 = pr0_2, acc1_0
+               xma.hu  fp1a_1 = u_1, v0, r_1
+       ;;
+L(cj5):
+               .pred.rel "mutex", p10, p11
+.mfi;          getfsig pr0_0 = fp0b_0
+               xma.l   fp1b_0 = u_0, v1, fp1a_0
+       (p10)   add     s0 = pr1_1, acc0, 1
+.mfi;  (p11)   add     s0 = pr1_1, acc0
+               xma.hu  fp2a_0 = u_0, v1, fp1a_0
+               nop     1
+       ;;
+               .pred.rel "mutex", p6, p7
+               .pred.rel "mutex", p10, p11
+.mmi;          getfsig pr1_3 = fp1b_3
+               st8     [rp] = s0, 8
+       (p6)    cmp.leu p8, p9 = acc0, pr0_2
+.mmi;  (p7)    cmp.ltu p8, p9 = acc0, pr0_2
+       (p10)   cmp.leu p12, p13 = s0, pr1_1
+       (p11)   cmp.ltu p12, p13 = s0, pr1_1
+       ;;
+               .pred.rel "mutex", p8, p9
+.mfi;          getfsig acc1_3 = fp2a_3
+               xma.l   fp0b_2 = u_2, v0, r_2
+               nop     1
+.mmf;  (p8)    add     acc0 = pr0_3, acc1_1, 1
+       (p9)    add     acc0 = pr0_3, acc1_1
+               xma.hu  fp1a_2 = u_2, v0, r_2
+       ;;
+L(cj4):
+               .pred.rel "mutex", p12, p13
+.mfi;          getfsig pr0_1 = fp0b_1
+               xma.l   fp1b_1 = u_1, v1, fp1a_1
+       (p12)   add     s0 = pr1_2, acc0, 1
+.mfi;  (p13)   add     s0 = pr1_2, acc0
+               xma.hu  fp2a_1 = u_1, v1, fp1a_1
+               nop     1
+       ;;
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+.mmi;          getfsig pr1_0 = fp1b_0
+               st8     [rp] = s0, 8
+       (p8)    cmp.leu p6, p7 = acc0, pr0_3
+.mmi;  (p9)    cmp.ltu p6, p7 = acc0, pr0_3
+       (p12)   cmp.leu p10, p11 = s0, pr1_2
+       (p13)   cmp.ltu p10, p11 = s0, pr1_2
+       ;;
+               .pred.rel "mutex", p6, p7
+.mmi;          getfsig acc1_0 = fp2a_0
+       (p6)    add     acc0 = pr0_0, acc1_2, 1
+       (p7)    add     acc0 = pr0_0, acc1_2
+       ;;
+L(cj3):
+               .pred.rel "mutex", p10, p11
+.mfi;          getfsig pr0_2 = fp0b_2
+               xma.l   fp1b_2 = u_2, v1, fp1a_2
+       (p10)   add     s0 = pr1_3, acc0, 1
+.mfi;  (p11)   add     s0 = pr1_3, acc0
+               xma.hu  fp2a_2 = u_2, v1, fp1a_2
+               nop     1
+       ;;
+               .pred.rel "mutex", p6, p7
+               .pred.rel "mutex", p10, p11
+.mmi;          getfsig pr1_1 = fp1b_1
+               st8     [rp] = s0, 8
+       (p6)    cmp.leu p8, p9 = acc0, pr0_0
+.mmi;  (p7)    cmp.ltu p8, p9 = acc0, pr0_0
+       (p10)   cmp.leu p12, p13 = s0, pr1_3
+       (p11)   cmp.ltu p12, p13 = s0, pr1_3
+       ;;
+               .pred.rel "mutex", p8, p9
+.mmi;          getfsig acc1_1 = fp2a_1
+       (p8)    add     acc0 = pr0_1, acc1_3, 1
+       (p9)    add     acc0 = pr0_1, acc1_3
+       ;;
+               .pred.rel "mutex", p12, p13
+.mmi;  (p12)   add     s0 = pr1_0, acc0, 1
+       (p13)   add     s0 = pr1_0, acc0
+               nop     1
+       ;;
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+.mmi;          getfsig pr1_2 = fp1b_2
+               st8     [rp] = s0, 8
+       (p8)    cmp.leu p6, p7 = acc0, pr0_1
+.mmi;  (p9)    cmp.ltu p6, p7 = acc0, pr0_1
+       (p12)   cmp.leu p10, p11 = s0, pr1_0
+       (p13)   cmp.ltu p10, p11 = s0, pr1_0
+       ;;
+               .pred.rel "mutex", p6, p7
+.mmi;          getfsig r8 = fp2a_2
+       (p6)    add     acc0 = pr0_2, acc1_0, 1
+       (p7)    add     acc0 = pr0_2, acc1_0
+       ;;
+               .pred.rel "mutex", p10, p11
+.mmi;  (p10)   add     s0 = pr1_1, acc0, 1
+       (p11)   add     s0 = pr1_1, acc0
+       (p6)    cmp.leu p8, p9 = acc0, pr0_2
+       ;;
+               .pred.rel "mutex", p10, p11
+.mmi;  (p7)    cmp.ltu p8, p9 = acc0, pr0_2
+       (p10)   cmp.leu p12, p13 = s0, pr1_1
+       (p11)   cmp.ltu p12, p13 = s0, pr1_1
+       ;;
+               .pred.rel "mutex", p8, p9
+.mmi;          st8     [rp] = s0, 8
+       (p8)    add     acc0 = pr1_2, acc1_1, 1
+       (p9)    add     acc0 = pr1_2, acc1_1
+       ;;
+               .pred.rel "mutex", p8, p9
+.mmi;  (p8)    cmp.leu p10, p11 = acc0, pr1_2
+       (p9)    cmp.ltu p10, p11 = acc0, pr1_2
+       (p12)   add     acc0 = 1, acc0
+       ;;
+.mmi;          st8     [rp] = acc0, 8
+       (p12)   cmpeqor p10, p0 = 0, acc0
+               nop     1
+       ;;
+.mib;  (p10)   add     r8 = 1, r8
+               mov     ar.lc = r2
+               br.ret.sptk.many b0
  EPILOGUE()
  ASM_END()
diff --git a/mpn/ia64/aors_n.asm b/mpn/ia64/aors_n.asm

index fd3aaac460e228fc586b88f9cc38989f02d91ed1..d75fef89a3caa370b3e5534cee61f6d4a70ff19c 100644 (file)
--- a/mpn/ia64/aors_n.asm
+++ b/mpn/ia64/aors_n.asm
@@ -1,6 +1,8 @@
  dnl  IA-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
  
-dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2003, 2004, 2005, 2010, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -26,586 +28,816 @@ C Itanium 2:    1.25
  C TODO
  C  * Consider using special code for small n, using something like
  C    "switch (8 * (n >= 8) + (n mod 8))" to enter it and feed-in code.
+C  * The non-nc code was trimmed cycle for cycle to its current state.  It is
+C    probably hard to save more that an odd cycle there.  The nc code is much
+C    rawer (since tune/speed doesn't have any applicable direct measurements).
+C  * Without the nc entry points, this becomes around 1800 bytes of object
+C    code; the nc code adds over 1000 bytes.  We should perhaps sacrifice a
+C    few cycles for the non-nc code and let it fall into the nc code.
  
  C INPUT PARAMETERS
-define(`rp',`r32')
-define(`up',`r33')
-define(`vp',`r34')
-define(`n',`r35')
+define(`rp', `r32')
+define(`up', `r33')
+define(`vp', `r34')
+define(`n',  `r35')
+define(`cy', `r36')
  
  ifdef(`OPERATION_add_n',`
    define(ADDSUB,       add)
-  define(PRED,         ltu)
+  define(CND,          ltu)
    define(INCR,         1)
    define(LIM,          -1)
-  define(func, mpn_add_n)
+  define(LIM2,         0)
+  define(func,    mpn_add_n)
+  define(func_nc, mpn_add_nc)
  ')
  ifdef(`OPERATION_sub_n',`
    define(ADDSUB,       sub)
-  define(PRED,         gtu)
+  define(CND,          gtu)
    define(INCR,         -1)
    define(LIM,          0)
-  define(func, mpn_sub_n)
+  define(LIM2,         -1)
+  define(func,    mpn_sub_n)
+  define(func_nc, mpn_sub_nc)
  ')
  
+define(cmpeqor, `cmp.eq.or')
+define(PFDIST, 500)
+
  C Some useful aliases for registers we use
  define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
-define(`u4',`r18') define(`u5',`r19') define(`u6',`r20') define(`u7',`r21')
  define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')
-define(`v4',`r28') define(`v5',`r29') define(`v6',`r30') define(`v7',`r31')
-define(`w0',`r22') define(`w1',`r9') define(`w2',`r8') define(`w3',`r23')
-define(`w4',`r22') define(`w5',`r9') define(`w6',`r8') define(`w7',`r23')
+define(`w0',`r28') define(`w1',`r29') define(`w2',`r30') define(`w3',`r31')
  define(`rpx',`r3')
+define(`upadv',`r20') define(`vpadv',`r21')
  
-MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
  
  ASM_START()
-PROLOGUE(func)
+PROLOGUE(func_nc)
         .prologue
         .save   ar.lc, r2
         .body
  ifdef(`HAVE_ABI_32',`
-       addp4           rp = 0, rp              C                       M I
-       addp4           up = 0, up              C                       M I
-       addp4           vp = 0, vp              C                       M I
-       zxt4            n = n                   C                       I
+       addp4   rp = 0, rp              C                       M I
+       addp4   up = 0, up              C                       M I
+       addp4   vp = 0, vp              C                       M I
+       zxt4    n = n                   C                       I
         ;;
  ')
-{.mmi          C 00
-       ld8             r11 = [vp], 8           C                       M01
-       ld8             r10 = [up], 8           C                       M01
-       mov.i           r2 = ar.lc              C                       I0
-}
-{.mmi
-       and             r14 = 7, n              C                       M I
-       cmp.lt          p15, p14 = 8, n         C                       M I
-       add             n = -8, n               C                       M I
+
+ {.mmi;        ld8     r11 = [vp], 8           C                       M01
+       ld8     r10 = [up], 8           C                       M01
+       mov     r2 = ar.lc              C                       I0
+}{.mmi;        and     r14 = 7, n              C                       M I
+       cmp.lt  p15, p14 = 8, n         C                       M I
+       add     n = -6, n               C                       M I
         ;;
  }
-{.mmi          C 01
-       cmp.eq          p6, p0 = 1, r14         C                       M I
-       cmp.eq          p7, p0 = 2, r14         C                       M I
-       cmp.eq          p8, p0 = 3, r14         C                       M I
-}
-{.bbb
-   (p6)        br.dptk         .Lb001                  C                       B
-   (p7)        br.dptk         .Lb010                  C                       B
-   (p8)        br.dptk         .Lb011                  C                       B
+.mmi;  add     upadv = PFDIST, up      C Merging these lines into the feed-in
+       add     vpadv = PFDIST, vp      C code could save a cycle per call at
+       mov     r23 = cy                C the expense of code size.
+       ;;
+{.mmi; cmp.eq  p6, p0 = 1, r14         C                       M I
+       cmp.eq  p7, p0 = 2, r14         C                       M I
+       cmp.eq  p8, p0 = 3, r14         C                       M I
+}{.bbb
+   (p6)        br.dptk .Lc001                  C                       B
+   (p7)        br.dptk .Lc010                  C                       B
+   (p8)        br.dptk .Lc011                  C                       B
         ;;
  }
-{.mmi          C 02
-       cmp.eq          p9, p0 = 4, r14         C                       M I
-       cmp.eq          p10, p0 = 5, r14        C                       M I
-       cmp.eq          p11, p0 = 6, r14        C                       M I
-}
-{.bbb
-   (p9)        br.dptk         .Lb100                  C                       B
-  (p10)        br.dptk         .Lb101                  C                       B
-  (p11)        br.dptk         .Lb110                  C                       B
-       ;;
-}              C 03
-{.mmb
-       cmp.eq          p12, p0 = 7, r14        C                       M I
-       add             n = -1, n               C loop count            M I
-  (p12)        br.dptk         .Lb111                  C                       B
+{.mmi; cmp.eq  p9, p0 = 4, r14         C                       M I
+       cmp.eq  p10, p0 = 5, r14        C                       M I
+       cmp.eq  p11, p0 = 6, r14        C                       M I
+}{.bbb
+   (p9)        br.dptk .Lc100                  C                       B
+  (p10)        br.dptk .Lc101                  C                       B
+  (p11)        br.dptk .Lc110                  C                       B
+       ;;
+}{.mmi;        ld8     r19 = [vp], 8           C                       M01
+       ld8     r18 = [up], 8           C                       M01
+       cmp.ne  p13, p0 = 0, cy         C copy cy to p13        M I
+}{.mmb;        cmp.eq  p12, p0 = 7, r14        C                       M I
+       nop     0
+  (p12)        br.dptk .Lc111                  C                       B
+       ;;
  }
  
+.Lc000:
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  add     vpadv = PFDIST, vp      C                       M I
+       ld8     v0 = [vp], 8            C                       M01
+       mov     ar.lc = n               C                       I0
+.mmi;  ld8     u0 = [up], 8            C                       M01
+       ADDSUB  w1 = r10, r11           C                       M I
+       nop     0
+       ;;
+.mmi;  add     upadv = PFDIST, up      C                       M I
+       ld8     v1 = [vp], 8            C                       M01
+       cmp.CND p7, p0 = w1, r10        C                       M I
+.mmi;  ld8     u1 = [up], 8            C                       M01
+       ADDSUB  w2 = r18, r19           C                       M I
+       add     rpx = 8, rp             C                       M I
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w2, r18        C                       M I
+  (p13)        cmpeqor p7, p0 = LIM, w1        C                       M I
+.mmi;  ld8     u2 = [up], 8            C                       M01
+  (p13)        add     w1 = INCR, w1           C                       M I
+       ADDSUB  w3 = u3, v3             C                       M I
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, u3         C                       M I
+   (p7)        cmpeqor p8, p0 = LIM, w2        C                       M I
+.mmb;  ld8     u3 = [up], 8            C                       M01
+   (p7)        add     w2 = INCR, w2           C                       M I
+       br      L(m0)
+
+
+.Lc001:
+.mmi;
+  (p15)        ld8     v1 = [vp], 8            C                       M01
+  (p15)        ld8     u1 = [up], 8            C                       M01
+       ADDSUB  w0 = r10, r11           C                       M I
+.mmb;  nop     0
+       nop     0
+  (p15)        br      1f
+       ;;
+.mmi;  cmp.ne  p9, p0 = 0, r23         C                       M I
+       mov     r8 = 0
+       cmp.CND p6, p0 = w0, r10        C                       M I
+       ;;
+.mmb;
+   (p9)        cmpeqor p6, p0 = LIM, w0        C                       M I
+   (p9)        add     w0 = INCR, w0           C                       M I
+       br      L(cj1)                  C                       B
+1:
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       mov     ar.lc = n               C                       I0
+.mmi;  nop     0
+       cmp.ne  p9, p0 = 0, r23         C                       M I
+       nop     0
+       ;;
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       cmp.CND p6, p0 = w0, r10        C                       M I
+       add     rpx = 16, rp            C                       M I
+.mmb;  ld8     u0 = [up], 8            C                       M01
+       ADDSUB  w1 = u1, v1             C                       M I
+       br      L(c1)                   C                       B
+
+
+.Lc010:
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       mov     r8 = 0                  C                       M I
+.mmb;  ADDSUB  w3 = r10, r11           C                       M I
+       cmp.ne  p8, p0 = 0, r23         C                       M I
+  (p15)        br      1f                      C                       B
+       ;;
+.mmi;  cmp.CND p9, p0 = w3, r10        C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+   (p8)        add     w3 = INCR, w3           C                       M I
+       ;;
+.mmb;  cmp.CND p6, p0 = w0, u0         C                       M I
+   (p8)        cmpeqor p9, p0 = LIM2, w3       C                       M I
+       br      L(cj2)                  C                       B
+1:
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       mov     ar.lc = n               C                       I0
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       cmp.CND p9, p0 = w3, r10        C                       M I
+       ;;
+.mmi;
+   (p8)        cmpeqor p9, p0 = LIM, w3        C                       M I
+   (p8)        add     w3 = INCR, w3           C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+.mmb;  add     rpx = 24, rp            C                       M I
+       nop     0
+       br      L(m23)                  C                       B
+
+
+.Lc011:
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+.mmi;  ADDSUB  w2 = r10, r11           C                       M I
+       cmp.ne  p7, p0 = 0, r23         C                       M I
+       nop     0
+       ;;
+.mmb;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+  (p15)        br      1f                      C                       B
+.mmi;  cmp.CND p8, p0 = w2, r10        C                       M I
+       ADDSUB  w3 = u3, v3             C                       M I
+       nop     0
+       ;;
+.mmb;
+   (p7)        cmpeqor p8, p0 = LIM, w2        C                       M I
+   (p7)        add     w2 = INCR, w2           C                       M I
+       br      L(cj3)                  C                       B
+1:
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       ADDSUB  w3 = u3, v3             C                       M I
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       cmp.CND p8, p0 = w2, r10        C                       M I
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, u3         C                       M I
+       mov     ar.lc = n               C                       I0
+.mmi;  ld8     u3 = [up], 8            C                       M01
+   (p7)        cmpeqor p8, p0 = LIM, w2        C                       M I
+   (p7)        add     w2 = INCR, w2           C                       M I
+       ;;
+.mmi;  add     rpx = 32, rp            C                       M I
+       st8     [rp] = w2, 8            C                       M23
+   (p8)        cmpeqor p9, p0 = LIM, w3        C                       M I
+.mmb;
+   (p8)        add     w3 = INCR, w3           C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+       br      L(m23)
+
+
+.Lc100:
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+.mmi;  ADDSUB  w1 = r10, r11           C                       M I
+       nop     0
+       nop     0
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       add     rpx = 8, rp             C                       M I
+.mmi;  cmp.ne  p6, p0 = 0, r23         C                       M I
+       cmp.CND p7, p0 = w1, r10        C                       M I
+       nop     0
+       ;;
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       ADDSUB  w2 = u2, v2             C                       M I
+.mmb;
+   (p6)        cmpeqor p7, p0 = LIM, w1        C                       M I
+   (p6)        add     w1 = INCR, w1           C                       M I
+  (p14)        br      L(cj4)
+       ;;
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       mov     ar.lc = n               C                       I0
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w2, u2         C                       M I
+       nop     0
+.mmi;  ld8     u2 = [up], 8            C                       M01
+       nop     0
+       ADDSUB  w3 = u3, v3             C                       M I
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, u3         C                       M I
+   (p7)        cmpeqor p8, p0 = LIM, w2        C                       M I
+.mmb;  ld8     u3 = [up], 8            C                       M01
+   (p7)        add     w2 = INCR, w2           C                       M I
+       br      L(m4)
+
+
+.Lc101:
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       mov     ar.lc = n               C                       I0
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       ADDSUB  w0 = r10, r11           C                       M I
+.mmi;  cmp.ne  p9, p0 = 0, r23         C                       M I
+       add     rpx = 16, rp            C                       M I
+       nop     0
+       ;;
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       cmp.CND p6, p0 = w0, r10        C                       M I
+       ld8     u0 = [up], 8            C                       M01
+.mbb;  ADDSUB  w1 = u1, v1             C                       M I
+  (p15)        br      L(c5)                   C                       B
+       br      L(end)                  C                       B
+
+
+.Lc110:
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  add     upadv = PFDIST, up      C                       M I
+       add     vpadv = PFDIST, vp      C                       M I
+       mov     ar.lc = n               C                       I0
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       ADDSUB  w3 = r10, r11           C                       M I
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       ADDSUB  w0 = u0, v0             C                       M I
+.mmi;  cmp.CND p9, p0 = w3, r10        C                       M I
+       cmp.ne  p8, p0 = 0, r23         C                       M I
+       add     rpx = 24, rp            C                       M I
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       nop     0
+.mmb;
+   (p8)        cmpeqor p9, p0 = LIM, w3        C                       M I
+   (p8)        add     w3 = INCR, w3           C                       M I
+       br      L(m67)                  C                       B
+
+
+.Lc111:
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  add     upadv = PFDIST, up      C                       M I
+       ld8     v1 = [vp], 8            C                       M01
+       mov     ar.lc = n               C                       I0
+.mmi;  ld8     u1 = [up], 8            C                       M01
+       ADDSUB  w2 = r10, r11           C                       M I
+       nop     0
+       ;;
+.mmi;  add     vpadv = PFDIST, vp      C                       M I
+       ld8     v2 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w2, r10        C                       M I
+.mmi;  ld8     u2 = [up], 8            C                       M01
+       ADDSUB  w3 = r18, r19           C                       M I
+       nop     0
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, r18        C                       M I
+  (p13)        cmpeqor p8, p0 = LIM, w2        C                       M I
+.mmi;  ld8     u3 = [up], 8            C                       M01
+  (p13)        add     w2 = INCR, w2           C                       M I
+       nop     0
+       ;;
+.mmi;  add     rpx = 32, rp            C                       M I
+       st8     [rp] = w2, 8            C                       M23
+   (p8)        cmpeqor p9, p0 = LIM, w3        C                       M I
+.mmb;
+   (p8)        add     w3 = INCR, w3           C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+       br      L(m67)
  
-.Lb000:        ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       add             rpx = 8, rp             C                       M I
-       ;;
-       ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       ADDSUB          w1 = r10, r11           C                       M I
-       ;;
-       ld8             v4 = [vp], 8            C                       M01
-       ld8             u4 = [up], 8            C                       M01
-       cmp.PRED        p7, p0 = w1, r10        C                       M I
-       ;;
-       ld8             v5 = [vp], 8            C                       M01
-       ld8             u5 = [up], 8            C                       M01
-       ADDSUB          w2 = u2, v2             C                       M I
-       ;;
-       ld8             v6 = [vp], 8            C                       M01
-       ld8             u6 = [up], 8            C                       M01
-       cmp.PRED        p8, p0 = w2, u2         C                       M I
-       ;;
-       ld8             v7 = [vp], 8            C                       M01
-       ld8             u7 = [up], 8            C                       M01
-       ADDSUB          w3 = u3, v3             C                       M I
+EPILOGUE()
+
+ASM_START()
+PROLOGUE(func)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',`
+       addp4   rp = 0, rp              C                       M I
+       addp4   up = 0, up              C                       M I
+       addp4   vp = 0, vp              C                       M I
+       zxt4    n = n                   C                       I
         ;;
-       ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-       cmp.PRED        p9, p0 = w3, u3         C                       M I
-   (p7)        cmp.eq.or       p8, p0 = LIM, w2        C                       M I
-   (p7)        add             w2 = INCR, w2           C                       M I
-  (p14)        br.cond.dptk    .Lcj8                   C                       B
-       ;;
-
-.grt8: ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       shr.u           n = n, 3                C                       I0
-       ;;
-       add             r11 = 512, vp
-       ld8             v2 = [vp], 8            C                       M01
-       add             r10 = 512, up
-       ld8             u2 = [up], 8            C                       M01
-       nop.i           0
-       nop.b           0
-       ;;
-       ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       br              .LL000                  C                       B
+')
  
-.Lb001:        add             rpx = 16, rp            C                       M I
-       ADDSUB          w0 = r10, r11           C                       M I
-  (p15)        br.cond.dpnt    .grt1                   C                       B
+ {.mmi;        ld8     r11 = [vp], 8           C                       M01
+       ld8     r10 = [up], 8           C                       M01
+       mov     r2 = ar.lc              C                       I0
+}{.mmi;        and     r14 = 7, n              C                       M I
+       cmp.lt  p15, p14 = 8, n         C                       M I
+       add     n = -6, n               C                       M I
+       ;;
+}{.mmi;        cmp.eq  p6, p0 = 1, r14         C                       M I
+       cmp.eq  p7, p0 = 2, r14         C                       M I
+       cmp.eq  p8, p0 = 3, r14         C                       M I
+}{.bbb
+   (p6)        br.dptk .Lb001                  C                       B
+   (p7)        br.dptk .Lb010                  C                       B
+   (p8)        br.dptk .Lb011                  C                       B
+       ;;
+}{.mmi;        cmp.eq  p9, p0 = 4, r14         C                       M I
+       cmp.eq  p10, p0 = 5, r14        C                       M I
+       cmp.eq  p11, p0 = 6, r14        C                       M I
+}{.bbb
+   (p9)        br.dptk .Lb100                  C                       B
+  (p10)        br.dptk .Lb101                  C                       B
+  (p11)        br.dptk .Lb110                  C                       B
+       ;;
+}{.mmi;        ld8     r19 = [vp], 8           C                       M01
+       ld8     r18 = [up], 8           C                       M01
+       cmp.ne  p13, p0 = r0, r0        C clear "CF"            M I
+}{.mmb;        cmp.eq  p12, p0 = 7, r14        C                       M I
+       mov     r23 = 0                 C                       M I
+  (p12)        br.dptk .Lb111                  C                       B
         ;;
-       cmp.PRED        p6, p0 = w0, r10        C                       M I
-       mov             r8 = 0                  C                       M I
-       br              .Lcj1                   C                       B
+}
+
+.Lb000:
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       ADDSUB  w1 = r10, r11           C                       M I
+       ;;
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       cmp.CND p7, p0 = w1, r10        C                       M I
+       mov     ar.lc = n               C                       I0
+.mmi;  ld8     u1 = [up], 8            C                       M01
+       ADDSUB  w2 = r18, r19           C                       M I
+       add     rpx = 8, rp             C                       M I
+       ;;
+.mmi;  add     upadv = PFDIST, up
+       add     vpadv = PFDIST, vp
+       cmp.CND p8, p0 = w2, r18        C                       M I
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       ADDSUB  w3 = u3, v3             C                       M I
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, u3         C                       M I
+   (p7)        cmpeqor p8, p0 = LIM, w2        C                       M I
+.mmb;  ld8     u3 = [up], 8            C                       M01
+   (p7)        add     w2 = INCR, w2           C                       M I
+       br      L(m0)                   C                       B
+
+
+       ALIGN(32)
+.Lb001:
+.mmi;  ADDSUB  w0 = r10, r11           C                       M I
+  (p15)        ld8     v1 = [vp], 8            C                       M01
+       mov     r8 = 0                  C                       M I
+       ;;
+.mmb;  cmp.CND p6, p0 = w0, r10        C                       M I
+  (p15)        ld8     u1 = [up], 8            C                       M01
+  (p14)        br      L(cj1)                  C                       B
+       ;;
+.mmi;  add     upadv = PFDIST, up
+       add     vpadv = PFDIST, vp
+       shr.u   n = n, 3                C                       I0
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       cmp.CND p6, p0 = w0, r10        C                       M I
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       mov     ar.lc = n               C                       I0
+       ;;
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       ADDSUB  w1 = u1, v1             C                       M I
+       ;;
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       cmp.CND p7, p0 = w1, u1         C                       M I
+       ADDSUB  w2 = u2, v2             C                       M I
+.mmb;  ld8     u1 = [up], 8            C                       M01
+       add     rpx = 16, rp            C                       M I
+       br      L(m1)                   C                       B
+
+
+       ALIGN(32)
+.Lb010:
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+.mmb;  ADDSUB  w3 = r10, r11           C                       M I
+       nop     0
+  (p15)        br      L(gt2)                  C                       B
+       ;;
+.mmi;  cmp.CND p9, p0 = w3, r10        C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+       mov     r8 = 0                  C                       M I
+       ;;
+.mmb;  nop     0
+       cmp.CND p6, p0 = w0, u0         C                       M I
+       br      L(cj2)                  C                       B
+L(gt2):
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       nop     0
+       ;;
+.mmi;  add     upadv = PFDIST, up
+       add     vpadv = PFDIST, vp
+       mov     ar.lc = n               C                       I0
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       nop     0
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, r10        C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+.mmb;  ld8     u3 = [up], 8            C                       M01
+       add     rpx = 24, rp            C                       M I
+       br      L(m23)                  C                       B
+
+
+       ALIGN(32)
+.Lb011:
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       ADDSUB  w2 = r10, r11           C                       M I
+       ;;
+.mmb;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+  (p15)        br      1f                      C                       B
+.mmb;  cmp.CND p8, p0 = w2, r10        C                       M I
+       ADDSUB  w3 = u3, v3             C                       M I
+       br      L(cj3)                  C                       B
+1:
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  add     upadv = PFDIST, up
+       add     vpadv = PFDIST, vp
+       ADDSUB  w3 = u3, v3             C                       M I
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       cmp.CND p8, p0 = w2, r10        C                       M I
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, u3         C                       M I
+       mov     ar.lc = n               C                       I0
+.mmi;  ld8     u3 = [up], 8            C                       M01
+       nop     0
+       nop     0
+       ;;
+.mmi;  add     rpx = 32, rp            C                       M I
+       st8     [rp] = w2, 8            C                       M23
+   (p8)        cmpeqor p9, p0 = LIM, w3        C                       M I
+.mmb;
+   (p8)        add     w3 = INCR, w3           C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+       br      L(m23)                  C                       B
+
+
+       ALIGN(32)
+.Lb100:
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       ADDSUB  w1 = r10, r11           C                       M I
+       ;;
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       cmp.CND p7, p0 = w1, r10        C                       M I
+.mmb;  nop     0
+       ADDSUB  w2 = u2, v2             C                       M I
+  (p14)        br      L(cj4)                  C                       B
+       ;;
+L(gt4):
+.mmi;  add     upadv = PFDIST, up
+       add     vpadv = PFDIST, vp
+       mov     ar.lc = n               C                       I0
+       ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       nop     0
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w2, u2         C                       M I
+       nop     0
+.mmi;  ld8     u2 = [up], 8            C                       M01
+       ADDSUB  w3 = u3, v3             C                       M I
+       add     rpx = 8, rp             C                       M I
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, u3         C                       M I
+   (p7)        cmpeqor p8, p0 = LIM, w2        C                       M I
+.mmb;  ld8     u3 = [up], 8            C                       M01
+   (p7)        add     w2 = INCR, w2           C                       M I
+       br      L(m4)                   C                       B
+
+
+       ALIGN(32)
+.Lb101:
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       ADDSUB  w0 = r10, r11           C                       M I
+       ;;
+.mmi;  add     upadv = PFDIST, up
+       add     vpadv = PFDIST, vp
+       add     rpx = 16, rp            C                       M I
+       ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       nop     0
+       ;;
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       cmp.CND p6, p0 = w0, r10        C                       M I
+       nop     0
+.mmb;  ld8     u0 = [up], 8            C                       M01
+       ADDSUB  w1 = u1, v1             C                       M I
+  (p14)        br      L(cj5)                  C                       B
+       ;;
+L(gt5):
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       cmp.CND p7, p0 = w1, u1         C                       M I
+       mov     ar.lc = n               C                       I0
+.mmb;  ld8     u1 = [up], 8            C                       M01
+       ADDSUB  w2 = u2, v2             C                       M I
+       br      L(m5)                   C                       B
+
+
+       ALIGN(32)
+.Lb110:
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       ADDSUB  w3 = r10, r11           C                       M I
+       ;;
+.mmi;  add     upadv = PFDIST, up
+       add     vpadv = PFDIST, vp
+       mov     ar.lc = n               C                       I0
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       nop     0
+       ;;
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, r10        C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+.mmb;  ld8     u3 = [up], 8            C                       M01
+       add     rpx = 24, rp            C                       M I
+       br      L(m67)                  C                       B
+
+
+       ALIGN(32)
+.Lb111:
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       shr.u   n = n, 3                C                       I0
+       ;;
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       ADDSUB  w2 = r10, r11           C                       M I
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w2, r10        C                       M I
+       mov     ar.lc = n               C                       I0
+.mmi;  ld8     u2 = [up], 8            C                       M01
+       ADDSUB  w3 = r18, r19           C                       M I
+       nop     0
+       ;;
+.mmi;  add     upadv = PFDIST, up
+       add     vpadv = PFDIST, vp
+       nop     0
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       cmp.CND p9, p0 = w3, r18        C                       M I
+       ;;
+.mmi;  add     rpx = 32, rp            C                       M I
+       st8     [rp] = w2, 8            C                       M23
+   (p8)        cmpeqor p9, p0 = LIM, w3        C                       M I
+.mmb;
+   (p8)        add     w3 = INCR, w3           C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+       br      L(m67)                  C                       B
  
-.grt1: ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       shr.u           n = n, 3                C                       I0
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       cmp.ne          p9, p0 = r0, r0         C read near Loop
-       ;;
-       ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       ;;
-       ld8             v4 = [vp], 8            C                       M01
-       ld8             u4 = [up], 8            C                       M01
-       cmp.PRED        p6, p0 = w0, r10        C                       M I
-       ;;
-       ld8             v5 = [vp], 8            C                       M01
-       ld8             u5 = [up], 8            C                       M01
-       ADDSUB          w1 = u1, v1             C                       M I
-       ;;
-       ld8             v6 = [vp], 8            C                       M01
-       ld8             u6 = [up], 8            C                       M01
-       cmp.PRED        p7, p0 = w1, u1         C                       M I
-       ;;
-       ld8             v7 = [vp], 8            C                       M01
-       ld8             u7 = [up], 8            C                       M01
-       ADDSUB          w2 = u2, v2             C                       M I
-       ;;
-       add             r11 = 512, vp
-       ld8             v0 = [vp], 8            C                       M01
-       add             r10 = 512, up
-       ld8             u0 = [up], 8            C                       M01
-       br.cloop.dptk   .Loop                   C                       B
-       br              .Lcj9                   C                       B
-
-.Lb010:        ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-       add             rpx = 24, rp            C                       M I
-       ADDSUB          w7 = r10, r11           C                       M I
-  (p15)        br.cond.dpnt    .grt2                   C                       B
-       ;;
-       cmp.PRED        p9, p0 = w7, r10        C                       M I
-       ADDSUB          w0 = u0, v0             C                       M I
-       br              .Lcj2                   C                       B
-
-.grt2: ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       shr.u           n = n, 3                C                       I0
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       ;;
-       ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       ;;
-       ld8             v4 = [vp], 8            C                       M01
-       ld8             u4 = [up], 8            C                       M01
-       ;;
-       ld8             v5 = [vp], 8            C                       M01
-       ld8             u5 = [up], 8            C                       M01
-       cmp.PRED        p9, p0 = w7, r10        C                       M I
-       ;;
-       ld8             v6 = [vp], 8            C                       M01
-       ld8             u6 = [up], 8            C                       M01
-       ADDSUB          w0 = u0, v0             C                       M I
-       ;;
-       add             r11 = 512, vp
-       ld8             v7 = [vp], 8            C                       M01
-       add             r10 = 512, up
-       ld8             u7 = [up], 8            C                       M01
-       br              .LL01x                  C                       B
-
-.Lb011:        ld8             v7 = [vp], 8            C                       M01
-       ld8             u7 = [up], 8            C                       M01
-       ADDSUB          w6 = r10, r11           C                       M I
-       ;;
-       ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-  (p15)        br.cond.dpnt    .grt3                   C                       B
-       ;;
-       cmp.PRED        p8, p0 = w6, r10        C                       M I
-       ADDSUB          w7 = u7, v7             C                       M I
-       ;;
-       st8             [rp] = w6, 8            C                       M23
-       cmp.PRED        p9, p0 = w7, u7         C                       M I
-       br              .Lcj3                   C                       B
-
-.grt3: ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       add             rpx = 32, rp            C                       M I
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       shr.u           n = n, 3                C                       I0
-       ;;
-       ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       cmp.PRED        p8, p0 = w6, r10        C                       M I
-       ;;
-       ld8             v4 = [vp], 8            C                       M01
-       ld8             u4 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       ADDSUB          w7 = u7, v7             C                       M I
-       nop.i           0
-       nop.b           0
-       ;;
-       ld8             v5 = [vp], 8            C                       M01
-       ld8             u5 = [up], 8            C                       M01
-       cmp.PRED        p9, p0 = w7, u7         C                       M I
-       ;;
-       add             r11 = 512, vp
-       ld8             v6 = [vp], 8            C                       M01
-       add             r10 = 512, up
-       ld8             u6 = [up], 8            C                       M01
-   (p8)        cmp.eq.or       p9, p0 = LIM, w7        C                       M I
-       ;;
-       ld8             v7 = [vp], 8            C                       M01
-       ld8             u7 = [up], 8            C                       M01
-   (p8)        add             w7 = INCR, w7           C                       M I
-       st8             [rp] = w6, 8            C                       M23
-       ADDSUB          w0 = u0, v0             C                       M I
-       br              .LL01x                  C                       B
-
-.Lb100:        ld8             v6 = [vp], 8            C                       M01
-       ld8             u6 = [up], 8            C                       M01
-       add             rpx = 8, rp             C                       M I
-       ;;
-       ld8             v7 = [vp], 8            C                       M01
-       ld8             u7 = [up], 8            C                       M01
-       ADDSUB          w5 = r10, r11           C                       M I
-       ;;
-       ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-  (p15)        br.cond.dpnt    .grt4                   C                       B
-       ;;
-       cmp.PRED        p7, p0 = w5, r10        C                       M I
-       ADDSUB          w6 = u6, v6             C                       M I
-       ;;
-       cmp.PRED        p8, p0 = w6, u6         C                       M I
-       ADDSUB          w7 = u7, v7             C                       M I
-       br              .Lcj4                   C                       B
-
-.grt4: ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       shr.u           n = n, 3                C                       I0
-       cmp.PRED        p7, p0 = w5, r10        C                       M I
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       ADDSUB          w6 = u6, v6             C                       M I
-       ;;
-       ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       cmp.PRED        p8, p0 = w6, u6         C                       M I
-       ;;
-       ld8             v4 = [vp], 8            C                       M01
-       ld8             u4 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       ;;
-       ld8             v5 = [vp], 8            C                       M01
-       ld8             u5 = [up], 8            C                       M01
-       ADDSUB          w7 = u7, v7             C                       M I
-       ;;
-       add             r11 = 512, vp
-       ld8             v6 = [vp], 8            C                       M01
-       add             r10 = 512, up
-       ld8             u6 = [up], 8            C                       M01
-       cmp.PRED        p9, p0 = w7, u7         C                       M I
-       ;;
-       ld8             v7 = [vp], 8            C                       M01
-       ld8             u7 = [up], 8            C                       M01
-   (p7)        cmp.eq.or       p8, p0 = LIM, w6        C                       M I
-   (p7)        add             w6 = INCR, w6           C                       M I
-       br              .LL100                  C                       B
-
-.Lb101:        ld8             v5 = [vp], 8            C                       M01
-       ld8             u5 = [up], 8            C                       M01
-       add             rpx = 16, rp            C                       M I
-       ;;
-       ld8             v6 = [vp], 8            C                       M01
-       ld8             u6 = [up], 8            C                       M01
-       ADDSUB          w4 = r10, r11           C                       M I
-       ;;
-       ld8             v7 = [vp], 8            C                       M01
-       ld8             u7 = [up], 8            C                       M01
-       cmp.PRED        p6, p0 = w4, r10        C                       M I
-       ;;
-       ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-       ADDSUB          w5 = u5, v5             C                       M I
-       shr.u           n = n, 3                C                       I0
-  (p15)        br.cond.dpnt    .grt5                   C                       B
-       ;;
-       cmp.PRED        p7, p0 = w5, u5         C                       M I
-       ADDSUB          w6 = u6, v6             C                       M I
-       br              .Lcj5                   C                       B
-
-.grt5: ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       ;;
-       ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       cmp.PRED        p7, p0 = w5, u5         C                       M I
-       ;;
-       ld8             v4 = [vp], 8            C                       M01
-       ld8             u4 = [up], 8            C                       M01
-       ADDSUB          w6 = u6, v6             C                       M I
-       ;;
-       add             r11 = 512, vp
-       ld8             v5 = [vp], 8            C                       M01
-       add             r10 = 512, up
-       ld8             u5 = [up], 8            C                       M01
-       br              .LL101                  C                       B
-
-.Lb110:        ld8             v4 = [vp], 8            C                       M01
-       ld8             u4 = [up], 8            C                       M01
-       add             rpx = 24, rp            C                       M I
-       ;;
-       ld8             v5 = [vp], 8            C                       M01
-       ld8             u5 = [up], 8            C                       M01
-       ADDSUB          w3 = r10, r11           C                       M I
-       ;;
-       ld8             v6 = [vp], 8            C                       M01
-       ld8             u6 = [up], 8            C                       M01
-       shr.u           n = n, 3                C                       I0
-       ;;
-       ld8             v7 = [vp], 8            C                       M01
-       ld8             u7 = [up], 8            C                       M01
-       cmp.PRED        p9, p0 = w3, r10        C                       M I
-       ;;
-       ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-       ADDSUB          w4 = u4, v4             C                       M I
-  (p14)        br.cond.dptk    .Lcj67                  C                       B
-       ;;
-
-.grt6: ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       cmp.PRED        p9, p0 = w3, r10        C                       M I
-       nop.i           0
-       nop.b           0
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       ADDSUB          w4 = u4, v4             C                       M I
-       ;;
-       add             r11 = 512, vp
-       ld8             v3 = [vp], 8            C                       M01
-       add             r10 = 512, up
-       ld8             u3 = [up], 8            C                       M01
-       br              .LL11x                  C                       B
-
-.Lb111:        ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       add             rpx = 32, rp            C                       M I
-       ;;
-       ld8             v4 = [vp], 8            C                       M01
-       ld8             u4 = [up], 8            C                       M01
-       ADDSUB          w2 = r10, r11           C                       M I
-       ;;
-       ld8             v5 = [vp], 8            C                       M01
-       ld8             u5 = [up], 8            C                       M01
-       cmp.PRED        p8, p0 = w2, r10        C                       M I
-       ;;
-       ld8             v6 = [vp], 8            C                       M01
-       ld8             u6 = [up], 8            C                       M01
-       ADDSUB          w3 = u3, v3             C                       M I
-       ;;
-       ld8             v7 = [vp], 8            C                       M01
-       ld8             u7 = [up], 8            C                       M01
-       cmp.PRED        p9, p0 = w3, u3         C                       M I
-       ;;
-       ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-  (p15)        br.cond.dpnt    .grt7                   C                       B
-       ;;
-       st8             [rp] = w2, 8            C                       M23
-   (p8)        cmp.eq.or       p9, p0 = LIM, w3        C                       M I
-   (p8)        add             w3 = INCR, w3           C                       M I
-       ADDSUB          w4 = u4, v4             C                       M I
-       br              .Lcj67                  C                       B
-
-.grt7: ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       shr.u           n = n, 3                C                       I0
-   (p8)        cmp.eq.or       p9, p0 = LIM, w3        C                       M I
-       nop.i           0
-       nop.b           0
-       ;;
-       add             r11 = 512, vp
-       ld8             v2 = [vp], 8            C                       M01
-       add             r10 = 512, up
-       ld8             u2 = [up], 8            C                       M01
-   (p8)        add             w3 = INCR, w3           C                       M I
-       nop.b           0
-       ;;
-       ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       st8             [rp] = w2, 8            C                       M23
-       ADDSUB          w4 = u4, v4             C                       M I
-       br              .LL11x                  C                       B
  
  C *** MAIN LOOP START ***
         ALIGN(32)
-.Loop: ld8             v1 = [vp], 8            C                       M01
-       cmp.PRED        p7, p0 = w1, u1         C                       M I
-   (p9)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
-       ld8             u1 = [up], 8            C                       M01
-   (p9)        add             w0 = INCR, w0           C                       M I
-       ADDSUB          w2 = u2, v2             C                       M I
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       cmp.PRED        p8, p0 = w2, u2         C                       M I
-   (p6)        cmp.eq.or       p7, p0 = LIM, w1        C                       M I
-       ld8             u2 = [up], 8            C                       M01
-   (p6)        add             w1 = INCR, w1           C                       M I
-       ADDSUB          w3 = u3, v3             C                       M I
-       ;;
-       st8             [rp] = w0, 8            C                       M23
-       ld8             v3 = [vp], 8            C                       M01
-       cmp.PRED        p9, p0 = w3, u3         C                       M I
-   (p7)        cmp.eq.or       p8, p0 = LIM, w2        C                       M I
-       ld8             u3 = [up], 8            C                       M01
-   (p7)        add             w2 = INCR, w2           C                       M I
-       ;;
-.LL000:        st8             [rp] = w1, 16           C                       M23
-       st8             [rpx] = w2, 32          C                       M23
-   (p8)        cmp.eq.or       p9, p0 = LIM, w3        C                       M I
-       lfetch          [r10], 64
-   (p8)        add             w3 = INCR, w3           C                       M I
-       ADDSUB          w4 = u4, v4             C                       M I
-       ;;
-.LL11x:        st8             [rp] = w3, 8            C                       M23
-       ld8             v4 = [vp], 8            C                       M01
-       cmp.PRED        p6, p0 = w4, u4         C                       M I
-       ld8             u4 = [up], 8            C                       M01
-       ADDSUB          w5 = u5, v5             C                       M I
-       ;;
-       ld8             v5 = [vp], 8            C                       M01
-       cmp.PRED        p7, p0 = w5, u5         C                       M I
-   (p9)        cmp.eq.or       p6, p0 = LIM, w4        C                       M I
-       ld8             u5 = [up], 8            C                       M01
-   (p9)        add             w4 = INCR, w4           C                       M I
-       ADDSUB          w6 = u6, v6             C                       M I
-       ;;
-.LL101:        ld8             v6 = [vp], 8            C                       M01
-       cmp.PRED        p8, p0 = w6, u6         C                       M I
-   (p6)        cmp.eq.or       p7, p0 = LIM, w5        C                       M I
-       ld8             u6 = [up], 8            C                       M01
-   (p6)        add             w5 = INCR, w5           C                       M I
-       ADDSUB          w7 = u7, v7             C                       M I
-       ;;
-       st8             [rp] = w4, 8            C                       M23
-       ld8             v7 = [vp], 8            C                       M01
-       cmp.PRED        p9, p0 = w7, u7         C                       M I
-   (p7)        cmp.eq.or       p8, p0 = LIM, w6        C                       M I
-       ld8             u7 = [up], 8            C                       M01
-   (p7)        add             w6 = INCR, w6           C                       M I
-       ;;
-.LL100:        st8             [rp] = w5, 16           C                       M23
-       st8             [rpx] = w6, 32          C                       M23
-   (p8)        cmp.eq.or       p9, p0 = LIM, w7        C                       M I
-       lfetch          [r11], 64
-   (p8)        add             w7 = INCR, w7           C                       M I
-       ADDSUB          w0 = u0, v0             C                       M I
-       ;;
-.LL01x:        st8             [rp] = w7, 8            C                       M23
-       ld8             v0 = [vp], 8            C                       M01
-       cmp.PRED        p6, p0 = w0, u0         C                       M I
-       ld8             u0 = [up], 8            C                       M01
-       ADDSUB          w1 = u1, v1             C                       M I
-       br.cloop.dptk   .Loop                   C                       B
+L(top):
+L(c5): ld8     v1 = [vp], 8            C                       M01
+       cmp.CND p7, p0 = w1, u1         C                       M I
+   (p9)        cmpeqor p6, p0 = LIM, w0        C                       M I
+       ld8     u1 = [up], 8            C                       M01
+   (p9)        add     w0 = INCR, w0           C                       M I
+       ADDSUB  w2 = u2, v2             C                       M I
+       ;;
+L(m5): ld8     v2 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w2, u2         C                       M I
+   (p6)        cmpeqor p7, p0 = LIM, w1        C                       M I
+       ld8     u2 = [up], 8            C                       M01
+   (p6)        add     w1 = INCR, w1           C                       M I
+       ADDSUB  w3 = u3, v3             C                       M I
+       ;;
+       st8     [rp] = w0, 8            C                       M23
+       ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, u3         C                       M I
+   (p7)        cmpeqor p8, p0 = LIM, w2        C                       M I
+       ld8     u3 = [up], 8            C                       M01
+   (p7)        add     w2 = INCR, w2           C                       M I
+       ;;
+L(m4): st8     [rp] = w1, 16           C                       M23
+       st8     [rpx] = w2, 32          C                       M23
+   (p8)        cmpeqor p9, p0 = LIM, w3        C                       M I
+       lfetch  [upadv], 64
+   (p8)        add     w3 = INCR, w3           C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+       ;;
+L(m23):        st8     [rp] = w3, 8            C                       M23
+       ld8     v0 = [vp], 8            C                       M01
+       cmp.CND p6, p0 = w0, u0         C                       M I
+       ld8     u0 = [up], 8            C                       M01
+       ADDSUB  w1 = u1, v1             C                       M I
+       nop.b   0
+       ;;
+L(c1): ld8     v1 = [vp], 8            C                       M01
+       cmp.CND p7, p0 = w1, u1         C                       M I
+   (p9)        cmpeqor p6, p0 = LIM, w0        C                       M I
+       ld8     u1 = [up], 8            C                       M01
+   (p9)        add     w0 = INCR, w0           C                       M I
+       ADDSUB  w2 = u2, v2             C                       M I
+       ;;
+L(m1): ld8     v2 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w2, u2         C                       M I
+   (p6)        cmpeqor p7, p0 = LIM, w1        C                       M I
+       ld8     u2 = [up], 8            C                       M01
+   (p6)        add     w1 = INCR, w1           C                       M I
+       ADDSUB  w3 = u3, v3             C                       M I
+       ;;
+       st8     [rp] = w0, 8            C                       M23
+       ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p9, p0 = w3, u3         C                       M I
+   (p7)        cmpeqor p8, p0 = LIM, w2        C                       M I
+       ld8     u3 = [up], 8            C                       M01
+   (p7)        add     w2 = INCR, w2           C                       M I
+       ;;
+L(m0): st8     [rp] = w1, 16           C                       M23
+       st8     [rpx] = w2, 32          C                       M23
+   (p8)        cmpeqor p9, p0 = LIM, w3        C                       M I
+       lfetch  [vpadv], 64
+   (p8)        add     w3 = INCR, w3           C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+       ;;
+L(m67):        st8     [rp] = w3, 8            C                       M23
+       ld8     v0 = [vp], 8            C                       M01
+       cmp.CND p6, p0 = w0, u0         C                       M I
+       ld8     u0 = [up], 8            C                       M01
+       ADDSUB  w1 = u1, v1             C                       M I
+       br.cloop.dptk   L(top)          C                       B
         ;;
  C *** MAIN LOOP END ***
  
-       cmp.PRED        p7, p0 = w1, u1         C                       M I
-   (p9)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
-   (p9)        add             w0 = INCR, w0           C                       M I
-       ADDSUB          w2 = u2, v2             C                       M I
-       ;;
-.Lcj9: cmp.PRED        p8, p0 = w2, u2         C                       M I
-   (p6)        cmp.eq.or       p7, p0 = LIM, w1        C                       M I
-       st8             [rp] = w0, 8            C                       M23
-   (p6)        add             w1 = INCR, w1           C                       M I
-       ADDSUB          w3 = u3, v3             C                       M I
-       ;;
-       cmp.PRED        p9, p0 = w3, u3         C                       M I
-   (p7)        cmp.eq.or       p8, p0 = LIM, w2        C                       M I
-   (p7)        add             w2 = INCR, w2           C                       M I
-       ;;
-.Lcj8: st8             [rp] = w1, 16           C                       M23
-       st8             [rpx] = w2, 32          C                       M23
-   (p8)        cmp.eq.or       p9, p0 = LIM, w3        C                       M I
-   (p8)        add             w3 = INCR, w3           C                       M I
-       ADDSUB          w4 = u4, v4             C                       M I
-       ;;
-.Lcj67:        st8             [rp] = w3, 8            C                       M23
-       cmp.PRED        p6, p0 = w4, u4         C                       M I
-       ADDSUB          w5 = u5, v5             C                       M I
-       ;;
-       cmp.PRED        p7, p0 = w5, u5         C                       M I
-   (p9)        cmp.eq.or       p6, p0 = LIM, w4        C                       M I
-   (p9)        add             w4 = INCR, w4           C                       M I
-       ADDSUB          w6 = u6, v6             C                       M I
-       ;;
-.Lcj5: cmp.PRED        p8, p0 = w6, u6         C                       M I
-   (p6)        cmp.eq.or       p7, p0 = LIM, w5        C                       M I
-       st8             [rp] = w4, 8            C                       M23
-   (p6)        add             w5 = INCR, w5           C                       M I
-       ADDSUB          w7 = u7, v7             C                       M I
-       ;;
-.Lcj4: cmp.PRED        p9, p0 = w7, u7         C                       M I
-   (p7)        cmp.eq.or       p8, p0 = LIM, w6        C                       M I
-   (p7)        add             w6 = INCR, w6           C                       M I
-       ;;
-       st8             [rp] = w5, 16           C                       M23
-       st8             [rpx] = w6, 32          C                       M23
-.Lcj3:
-   (p8)        cmp.eq.or       p9, p0 = LIM, w7        C                       M I
-   (p8)        add             w7 = INCR, w7           C                       M I
-       ADDSUB          w0 = u0, v0             C                       M I
-       ;;
-.Lcj2: st8             [rp] = w7, 8            C                       M23
-       cmp.PRED        p6, p0 = w0, u0         C                       M I
-       ;;
-   (p9)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
-   (p9)        add             w0 = INCR, w0           C                       M I
-       mov             r8 = 0                  C                       M I
-       ;;
-.Lcj1: st8             [rp] = w0, 8            C                       M23
-       mov.i           ar.lc = r2              C                       I0
-   (p6)        mov             r8 = 1                  C                       M I
-       br.ret.sptk.many b0                     C                       B
+L(end):
+.mmi;
+   (p9)        cmpeqor p6, p0 = LIM, w0        C                       M I
+   (p9)        add     w0 = INCR, w0           C                       M I
+       mov     ar.lc = r2              C                       I0
+L(cj5):
+.mmi;  cmp.CND p7, p0 = w1, u1         C                       M I
+       ADDSUB  w2 = u2, v2             C                       M I
+       nop     0
+       ;;
+.mmi;  st8     [rp] = w0, 8            C                       M23
+   (p6)        cmpeqor p7, p0 = LIM, w1        C                       M I
+   (p6)        add     w1 = INCR, w1           C                       M I
+L(cj4):
+.mmi;  cmp.CND p8, p0 = w2, u2         C                       M I
+       ADDSUB  w3 = u3, v3             C                       M I
+       nop     0
+       ;;
+.mmi;  st8     [rp] = w1, 8            C                       M23
+   (p7)        cmpeqor p8, p0 = LIM, w2        C                       M I
+   (p7)        add     w2 = INCR, w2           C                       M I
+L(cj3):
+.mmi;  cmp.CND p9, p0 = w3, u3         C                       M I
+       ADDSUB  w0 = u0, v0             C                       M I
+       nop     0
+       ;;
+.mmi;  st8     [rp] = w2, 8            C                       M23
+   (p8)        cmpeqor p9, p0 = LIM, w3        C                       M I
+   (p8)        add     w3 = INCR, w3           C                       M I
+.mmi;  cmp.CND p6, p0 = w0, u0         C                       M I
+       nop     0
+       mov     r8 = 0                  C                       M I
+       ;;
+L(cj2):
+.mmi;  st8     [rp] = w3, 8            C                       M23
+   (p9)        cmpeqor p6, p0 = LIM, w0        C                       M I
+   (p9)        add     w0 = INCR, w0           C                       M I
+       ;;
+L(cj1):
+.mmb;  st8     [rp] = w0, 8            C                       M23
+   (p6)        mov     r8 = 1                  C                       M I
+       br.ret.sptk.many b0             C                       B
  EPILOGUE()
  ASM_END()
diff --git a/mpn/ia64/aorslsh1_n.asm b/mpn/ia64/aorslsh1_n.asm

index 5348149c87ec1ee2bbc9a0cd6720f4c84b72d91b..01eff964ffde717979ff2881b1dd37ddfca44869 100644 (file)
--- a/mpn/ia64/aorslsh1_n.asm
+++ b/mpn/ia64/aorslsh1_n.asm
@@ -1,6 +1,8 @@
  dnl  IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
  
-dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -23,301 +25,27 @@ C           cycles/limb
  C Itanium:      3.0
  C Itanium 2:    1.5
  
-C TODO
-C  * Use shladd in feed-in code (for mpn_addlsh1_n).
  
-C INPUT PARAMETERS
-define(`rp',`r32')
-define(`up',`r33')
-define(`vp',`r34')
-define(`n',`r35')
+define(LSH,            1)
+define(RSH,            63)
  
  ifdef(`OPERATION_addlsh1_n',`
    define(ADDSUB,       add)
-  define(PRED,        ltu)
+  define(ADDP,         1)
+  define(CND,         ltu)
    define(INCR,        1)
    define(LIM,         -1)
    define(func, mpn_addlsh1_n)
  ')
  ifdef(`OPERATION_sublsh1_n',`
    define(ADDSUB,       sub)
-  define(PRED,        gtu)
+  define(CND,         gtu)
    define(INCR,        -1)
    define(LIM,         0)
    define(func, mpn_sublsh1_n)
  ')
  
-C Some useful aliases for registers we use
-define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
-define(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')
-define(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')
-define(`s0',`r26') define(`s1',`r27') define(`s2',`r28') define(`s3',`r29')
-define(`x0',`r30') define(`x1',`r31') define(`x2',`r30') define(`x3',`r31')
  
  MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
  
-ASM_START()
-PROLOGUE(func)
-       .prologue
-       .save   ar.lc, r2
-       .body
-ifdef(`HAVE_ABI_32',`
-       addp4           rp = 0, rp              C                       M I
-       addp4           up = 0, up              C                       M I
-       addp4           vp = 0, vp              C                       M I
-       zxt4            n = n                   C                       I
-       ;;
-')
- {.mmi;        ld8             r11 = [vp], 8           C                       M01
-       ld8             r10 = [up], 8           C                       M01
-       mov.i           r2 = ar.lc              C                       I0
-}{.mmi;        and             r14 = 3, n              C                       M I
-       cmp.lt          p15, p0 = 4, n          C                       M I
-       add             n = -4, n               C                       M I
-       ;;
-}{.mmi;        cmp.eq          p6, p0 = 1, r14         C                       M I
-       cmp.eq          p7, p0 = 2, r14         C                       M I
-       cmp.eq          p8, p0 = 3, r14         C                       M I
-}{.bbb
-  (p6) br.dptk         .Lb01                   C                       B
-  (p7) br.dptk         .Lb10                   C                       B
-  (p8) br.dptk         .Lb11                   C                       B
-}
-
-.Lb00: ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-       shr.u           n = n, 2                C                       I0
-       ;;
-       ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       add             x3 = r11, r11           C                       M I
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       ADDSUB          w3 = r10, x3            C                       M I
-  (p15)        br.dpnt         .grt4                   C                       B
-       ;;
-       shrp            x0 = v0, r11, 63        C                       I0
-       cmp.PRED        p8, p0 = w3, r10        C                       M I
-       ;;
-       shrp            x1 = v1, v0, 63         C                       I0
-       ADDSUB          w0 = u0, x0             C                       M I
-       ;;
-       cmp.PRED        p6, p0 = w0, u0         C                       M I
-       ADDSUB          w1 = u1, x1             C                       M I
-       br              .Lcj4                   C                       B
-
-.grt4: ld8             v3 = [vp], 8            C                       M01
-       shrp            x0 = v0, r11, 63        C                       I0
-       cmp.PRED        p8, p0 = w3, r10        C                       M I
-       add             n = -1, n
-       ;;
-       ld8             u3 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       shrp            x1 = v1, v0, 63         C                       I0
-       ld8             v0 = [vp], 8            C                       M01
-       ADDSUB          w0 = u0, x0             C                       M I
-       ;;
-       cmp.PRED        p6, p0 = w0, u0         C                       M I
-       ld8             u0 = [up], 8            C                       M01
-       ADDSUB          w1 = u1, x1             C                       M I
-       br              .LL00                   C                       B
-
-.Lb01: add             x2 = r11, r11           C                       M I
-       shr.u           n = n, 2                C                       I0
-  (p15)        br.dpnt         .grt1                   C                       B
-       ;;
-       ADDSUB          w2 = r10, x2            C                       M I
-       shr.u           r8 = r11, 63            C retval                I0
-       ;;
-       cmp.PRED        p6, p0 = w2, r10        C                       M I
-       ;;
-       st8             [rp] = w2, 8            C                       M23
-   (p6)        add             r8 = 1, r8              C                       M I
-       br.ret.sptk.many b0                     C                       B
-
-.grt1: ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C FIXME swap with next  I0
-       ;;
-       ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-       ADDSUB          w2 = r10, x2
-       ;;
-       ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       shrp            x3 = v3, r11, 63        C                       I0
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       cmp.PRED        p6, p0 = w2, r10        C                       M I
-       ADDSUB          w3 = u3, x3             C                       M I
-       br.cloop.dptk   .grt5                   C                       B
-       ;;
-       shrp            x0 = v0, v3, 63         C                       I0
-       cmp.PRED        p8, p0 = w3, u3         C                       M I
-       br              .Lcj5                   C                       B
-
-.grt5: shrp            x0 = v0, v3, 63         C                       I0
-       ld8             v3 = [vp], 8            C                       M01
-       cmp.PRED        p8, p0 = w3, u3         C                       M I
-       br              .LL01                   C                       B
-
-.Lb10: ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-       shr.u           n = n, 2                C                       I0
-       add             x1 = r11, r11           C                       M I
-  (p15)        br.dpnt         .grt2                   C                       B
-       ;;
-       ADDSUB          w1 = r10, x1            C                       M I
-       shrp            x2 = v2, r11, 63        C                       I0
-       ;;
-       cmp.PRED        p8, p0 = w1, r10        C                       M I
-       ADDSUB          w2 = u2, x2             C                       M I
-       shr.u           r8 = v2, 63             C retval                I0
-       ;;
-       cmp.PRED        p6, p0 = w2, u2         C                       M I
-       br              .Lcj2                   C                       B
-
-.grt2: ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       ;;
-       ld8             v0 = [vp], 8            C                       M01
-       ld8             u0 = [up], 8            C                       M01
-       ADDSUB          w1 = r10, x1            C                       M I
-       ;;
-       ld8             v1 = [vp], 8            C                       M01
-       shrp            x2 = v2, r11, 63        C                       I0
-       cmp.PRED        p8, p0 = w1, r10        C                       M I
-       ;;
-       ld8             u1 = [up], 8            C                       M01
-       shrp            x3 = v3, v2, 63         C                       I0
-       ld8             v2 = [vp], 8            C                       M01
-       ADDSUB          w2 = u2, x2             C                       M I
-       ;;
-       cmp.PRED        p6, p0 = w2, u2         C                       M I
-       ld8             u2 = [up], 8            C                       M01
-       ADDSUB          w3 = u3, x3             C                       M I
-       br.cloop.dpnt   .Loop                   C                       B
-       br              .Lskip                  C                       B
-
-.Lb11: ld8             v1 = [vp], 8            C                       M01
-       ld8             u1 = [up], 8            C                       M01
-       shr.u           n = n, 2                C                       I0
-       add             x0 = r11, r11           C                       M I
-       ;;
-       ld8             v2 = [vp], 8            C                       M01
-       ld8             u2 = [up], 8            C                       M01
-  (p15)        br.dpnt         .grt3                   C                       B
-       ;;
-
-       shrp            x1 = v1, r11, 63        C                       I0
-       ADDSUB          w0 = r10, x0            C                       M I
-       ;;
-       cmp.PRED        p6, p0 = w0, r10        C                       M I
-       ADDSUB          w1 = u1, x1             C                       M I
-       ;;
-       shrp            x2 = v2, v1, 63         C                       I0
-       cmp.PRED        p8, p0 = w1, u1         C                       M I
-       br              .Lcj3                   C                       B
-
-.grt3: ld8             v3 = [vp], 8            C                       M01
-       ld8             u3 = [up], 8            C                       M01
-       mov.i           ar.lc = n               C                       I0
-       shrp            x1 = v1, r11, 63        C                       I0
-       ADDSUB          w0 = r10, x0            C                       M I
-       ;;
-       ld8             v0 = [vp], 8            C                       M01
-       cmp.PRED        p6, p0 = w0, r10        C                       M I
-       ld8             u0 = [up], 8            C                       M01
-       ADDSUB          w1 = u1, x1             C                       M I
-       ;;
-       shrp            x2 = v2, v1, 63         C                       I0
-       ld8             v1 = [vp], 8            C                       M01
-       cmp.PRED        p8, p0 = w1, u1         C                       M I
-       br              .LL11                   C                       B
-
-
-C *** MAIN LOOP START ***
-       ALIGN(32)
-.Loop: st8             [rp] = w1, 8            C                       M23
-       shrp            x0 = v0, v3, 63         C                       I0
-   (p8)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
-   (p8)        add             w2 = INCR, w2           C                       M I
-       ld8             v3 = [vp], 8            C                       M01
-       cmp.PRED        p8, p0 = w3, u3         C                       M I
-       ;;
-.LL01: ld8             u3 = [up], 8            C                       M01
-       shrp            x1 = v1, v0, 63         C                       I0
-   (p6)        cmp.eq.or       p8, p0 = LIM, w3        C                       M I
-   (p6)        add             w3 = INCR, w3           C                       M I
-       ld8             v0 = [vp], 8            C                       M01
-       ADDSUB          w0 = u0, x0             C                       M I
-       ;;
-       st8             [rp] = w2, 8            C                       M23
-       cmp.PRED        p6, p0 = w0, u0         C                       M I
-       ld8             u0 = [up], 8            C                       M01
-       ADDSUB          w1 = u1, x1             C                       M I
-       ;;
-.LL00: st8             [rp] = w3, 8            C                       M23
-       shrp            x2 = v2, v1, 63         C                       I0
-   (p8)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
-   (p8)        add             w0 = INCR, w0           C                       M I
-       ld8             v1 = [vp], 8            C                       M01
-       cmp.PRED        p8, p0 = w1, u1         C                       M I
-       ;;
-.LL11: ld8             u1 = [up], 8            C                       M01
-       shrp            x3 = v3, v2, 63         C                       I0
-   (p6)        cmp.eq.or       p8, p0 = LIM, w1        C                       M I
-   (p6)        add             w1 = INCR, w1           C                       M I
-       ld8             v2 = [vp], 8            C                       M01
-       ADDSUB          w2 = u2, x2             C                       M I
-       ;;
-       st8             [rp] = w0, 8            C                       M23
-       cmp.PRED        p6, p0 = w2, u2         C                       M I
-       ld8             u2 = [up], 8            C                       M01
-       ADDSUB          w3 = u3, x3             C                       M I
-       br.cloop.dptk   .Loop                   C                       B
-       ;;
-C *** MAIN LOOP END ***
-
-.Lskip:        st8             [rp] = w1, 8            C                       M23
-       shrp            x0 = v0, v3, 63         C                       I0
-   (p8)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
-   (p8)        add             w2 = INCR, w2           C                       M I
-       cmp.PRED        p8, p0 = w3, u3         C                       M I
-       ;;
-.Lcj5: shrp            x1 = v1, v0, 63         C                       I0
-   (p6)        cmp.eq.or       p8, p0 = LIM, w3        C                       M I
-   (p6)        add             w3 = INCR, w3           C                       M I
-       ADDSUB          w0 = u0, x0             C                       M I
-       ;;
-       st8             [rp] = w2, 8            C                       M23
-       cmp.PRED        p6, p0 = w0, u0         C                       M I
-       ADDSUB          w1 = u1, x1             C                       M I
-       ;;
-.Lcj4: st8             [rp] = w3, 8            C                       M23
-       shrp            x2 = v2, v1, 63         C                       I0
-   (p8)        cmp.eq.or       p6, p0 = LIM, w0        C                       M I
-   (p8)        add             w0 = INCR, w0           C                       M I
-       cmp.PRED        p8, p0 = w1, u1         C                       M I
-       ;;
-.Lcj3: shr.u           r8 = v2, 63             C                       I0
-   (p6)        cmp.eq.or       p8, p0 = LIM, w1        C                       M I
-   (p6)        add             w1 = INCR, w1           C                       M I
-       ADDSUB          w2 = u2, x2             C                       M I
-       ;;
-       st8             [rp] = w0, 8            C                       M23
-       cmp.PRED        p6, p0 = w2, u2         C                       M I
-       ;;
-.Lcj2: st8             [rp] = w1, 8            C                       M23
-   (p8)        cmp.eq.or       p6, p0 = LIM, w2        C                       M I
-   (p8)        add             w2 = INCR, w2           C                       M I
-       ;;
-.Lcj1: st8             [rp] = w2, 8            C                       M23
-       mov.i           ar.lc = r2              C                       I0
-   (p6)        add             r8 = 1, r8              C                       M I
-       br.ret.sptk.many b0                     C                       B
-EPILOGUE()
-ASM_END()
+include_mpn(`ia64/aorslshC_n.asm')
diff --git a/mpn/ia64/aorslsh2_n.asm b/mpn/ia64/aorslsh2_n.asm

new file mode 100644 (file)

index 0000000..6c7c732
--- /dev/null
+++ b/mpn/ia64/aorslsh2_n.asm
@@ -0,0 +1,51 @@
+dnl  IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      3.0
+C Itanium 2:    1.5
+
+
+define(LSH,            2)
+define(RSH,            62)
+
+ifdef(`OPERATION_addlsh2_n',`
+  define(ADDSUB,       add)
+  define(ADDP,         1)
+  define(CND,         ltu)
+  define(INCR,        1)
+  define(LIM,         -1)
+  define(func, mpn_addlsh2_n)
+')
+ifdef(`OPERATION_sublsh2_n',`
+  define(ADDSUB,       sub)
+  define(CND,         gtu)
+  define(INCR,        -1)
+  define(LIM,         0)
+  define(func, mpn_sublsh2_n)
+')
+
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
+
+include_mpn(`ia64/aorslshC_n.asm')
diff --git a/mpn/ia64/aorslshC_n.asm b/mpn/ia64/aorslshC_n.asm

new file mode 100644 (file)

index 0000000..c304100
--- /dev/null
+++ b/mpn/ia64/aorslshC_n.asm
@@ -0,0 +1,360 @@
+dnl  IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+C           cycles/limb
+C Itanium:      ?
+C Itanium 2:    1.5
+
+C TODO
+C  * Use shladd in feed-in code (for mpn_addlshC_n).
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`vp', `r34')
+define(`n',  `r35')
+
+define(cmpeqor, `cmp.eq.or')
+define(PFDIST, 500)
+
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
+define(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')
+define(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')
+define(`s0',`r26') define(`s1',`r27') define(`s2',`r28') define(`s3',`r29')
+define(`x0',`r30') define(`x1',`r31') define(`x2',`r30') define(`x3',`r31')
+
+
+ASM_START()
+PROLOGUE(func)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',`
+       addp4   rp = 0, rp              C                       M I
+       addp4   up = 0, up              C                       M I
+       addp4   vp = 0, vp              C                       M I
+       zxt4    n = n                   C                       I
+       ;;
+')
+ {.mmi;        ld8     r11 = [vp], 8           C                       M01
+       ld8     r10 = [up], 8           C                       M01
+       mov.i   r2 = ar.lc              C                       I0
+}{.mmi;        and     r14 = 3, n              C                       M I
+       cmp.lt  p15, p0 = 4, n          C                       M I
+       add     n = -5, n               C                       M I
+       ;;
+}{.mmi;        cmp.eq  p6, p0 = 1, r14         C                       M I
+       cmp.eq  p7, p0 = 2, r14         C                       M I
+       cmp.eq  p8, p0 = 3, r14         C                       M I
+}{.bbb
+  (p6) br.dptk .Lb01                   C                       B
+  (p7) br.dptk .Lb10                   C                       B
+  (p8) br.dptk .Lb11                   C                       B
+}
+
+.Lb00: ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       shr.u   n = n, 2                C                       I0
+       ;;
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       shl     x3 = r11, LSH           C                       I0
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       shrp    x0 = v0, r11, RSH       C                       I0
+.mmb;  ADDSUB  w3 = r10, x3            C                       M I
+       nop     0
+  (p15)        br.dpnt .grt4                   C                       B
+       ;;
+.mii;  cmp.CND p7, p0 = w3, r10        C                       M I
+       shrp    x1 = v1, v0, RSH        C                       I0
+       ADDSUB  w0 = u0, x0             C                       M I
+       ;;
+.mii;  cmp.CND p8, p0 = w0, u0         C                       M I
+       shrp    x2 = v2, v1, RSH        C                       I0
+       ADDSUB  w1 = u1, x1             C                       M I
+.mmb;  nop     0
+       nop     0
+       br      .Lcj4                   C                       B
+
+ALIGN(32)
+.grt4: ld8     v3 = [vp], 8            C                       M01
+       shrp    x0 = v0, r11, RSH       C                       I0
+       cmp.CND p8, p0 = w3, r10        C                       M I
+       ;;
+.mmi;  ld8     u3 = [up], 8            C                       M01
+       add     r11 = PFDIST, vp
+       shrp    x1 = v1, v0, RSH        C                       I0
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ADDSUB  w0 = u0, x0             C                       M I
+       nop     0
+       ;;
+.mmi;  cmp.CND p6, p0 = w0, u0         C                       M I
+       add     r10 = PFDIST, up
+       mov.i   ar.lc = n               C                       I0
+.mmb;  ADDSUB  w1 = u1, x1             C                       M I
+       ld8     u0 = [up], 8            C                       M01
+       br      .LL00                   C                       B
+
+
+       ALIGN(32)
+.Lb01:
+ifdef(`ADDP',
+`      shladd  w2 = r11, LSH, r10      C                       M I
+       shr.u   r8 = r11, RSH           C retval                I0
+  (p15)        br.dpnt .grt1                   C                       B
+       ;;
+',`
+       shl     x2 = r11, LSH           C                       I0
+  (p15)        br.dpnt .grt1                   C                       B
+       ;;
+       ADDSUB  w2 = r10, x2            C                       M I
+       shr.u   r8 = r11, RSH           C retval                I0
+       ;;
+')
+       cmp.CND p6, p0 = w2, r10        C                       M I
+       br              .Lcj1
+
+.grt1: ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       shr.u   n = n, 2                C                       I0
+       ;;
+       ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       mov.i   ar.lc = n               C FIXME swap with next  I0
+ifdef(`ADDP',
+`',`
+       ADDSUB  w2 = r10, x2
+')
+       ;;
+.mmi;  ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       shrp    x3 = v3, r11, RSH       C                       I0
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       shrp    x0 = v0, v3, RSH        C                       I0
+.mmb;  cmp.CND p6, p0 = w2, r10        C                       M I
+       ADDSUB  w3 = u3, x3             C                       M I
+       br.cloop.dptk   .grt5           C                       B
+       ;;
+.mmi;  cmp.CND p7, p0 = w3, u3         C                       M I
+       ADDSUB  w0 = u0, x0             C                       M I
+       shrp    x1 = v1, v0, RSH        C                       I0
+.mmb;  nop     0
+       nop     0
+       br      .Lcj5                   C                       B
+.grt5:
+.mmi;  add     r10 = PFDIST, up
+       add     r11 = PFDIST, vp
+       shrp    x0 = v0, v3, RSH        C                       I0
+.mmb;  ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w3, u3         C                       M I
+       br      .LL01                   C                       B
+
+       ALIGN(32)
+.Lb10: ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       shl     x1 = r11, LSH           C                       I0
+.mmb;  nop     0
+       nop     0
+  (p15)        br.dpnt .grt2                   C                       B
+       ;;
+.mmi;  ADDSUB  w1 = r10, x1            C                       M I
+       nop     0
+       shrp    x2 = v2, r11, RSH       C                       I0
+       ;;
+.mmi;  cmp.CND p9, p0 = w1, r10        C                       M I
+       ADDSUB  w2 = u2, x2             C                       M I
+       shr.u   r8 = v2, RSH            C retval                I0
+       ;;
+.mmb;  cmp.CND p6, p0 = w2, u2         C                       M I
+       nop     0
+       br      .Lcj2                   C                       B
+
+.grt2: ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       shr.u   n = n, 2                C                       I0
+       ;;
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       ld8     u0 = [up], 8            C                       M01
+       mov.i   ar.lc = n               C                       I0
+.mmi;  ADDSUB  w1 = r10, x1            C                       M I
+       nop     0
+       nop     0
+       ;;
+.mii;  ld8     v1 = [vp], 8            C                       M01
+       shrp    x2 = v2, r11, RSH       C                       I0
+       cmp.CND p8, p0 = w1, r10        C                       M I
+       ;;
+.mmi;  add     r10 = PFDIST, up
+       ld8     u1 = [up], 8            C                       M01
+       shrp    x3 = v3, v2, RSH        C                       I0
+.mmi;  add     r11 = PFDIST, vp
+       ld8     v2 = [vp], 8            C                       M01
+       ADDSUB  w2 = u2, x2             C                       M I
+       ;;
+.mmi;  cmp.CND p6, p0 = w2, u2         C                       M I
+       ld8     u2 = [up], 8            C                       M01
+       shrp    x0 = v0, v3, RSH        C                       I0
+.mbb;  ADDSUB  w3 = u3, x3             C                       M I
+       br.cloop.dpnt   L(top)          C                       B
+       br      L(end)                  C                       B
+
+.Lb11: ld8     v1 = [vp], 8            C                       M01
+       ld8     u1 = [up], 8            C                       M01
+       shl     x0 = r11, LSH           C                       I0
+       ;;
+.mmi;  ld8     v2 = [vp], 8            C                       M01
+       ld8     u2 = [up], 8            C                       M01
+       shr.u   n = n, 2                C                       I0
+.mmb;  nop     0
+       nop     0
+  (p15)        br.dpnt .grt3                   C                       B
+       ;;
+.mii;  nop     0
+       shrp    x1 = v1, r11, RSH       C                       I0
+       ADDSUB  w0 = r10, x0            C                       M I
+       ;;
+.mii;  cmp.CND p8, p0 = w0, r10        C                       M I
+       shrp    x2 = v2, v1, RSH        C                       I0
+       ADDSUB  w1 = u1, x1             C                       M I
+       ;;
+.mmb;  cmp.CND p9, p0 = w1, u1         C                       M I
+       ADDSUB  w2 = u2, x2             C                       M I
+       br      .Lcj3                   C                       B
+.grt3:
+.mmi;  ld8     v3 = [vp], 8            C                       M01
+       ld8     u3 = [up], 8            C                       M01
+       shrp    x1 = v1, r11, RSH       C                       I0
+.mmi;  ADDSUB  w0 = r10, x0            C                       M I
+       nop     0
+       nop     0
+       ;;
+.mmi;  ld8     v0 = [vp], 8            C                       M01
+       cmp.CND p6, p0 = w0, r10        C                       M I
+       mov.i   ar.lc = n               C                       I0
+.mmi;  ld8     u0 = [up], 8            C                       M01
+       ADDSUB  w1 = u1, x1             C                       M I
+       nop       0
+       ;;
+.mmi;  add     r10 = PFDIST, up
+       add     r11 = PFDIST, vp
+       shrp    x2 = v2, v1, RSH        C                       I0
+.mmb;  ld8     v1 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w1, u1         C                       M I
+       br      .LL11                   C                       B
+
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+L(top):        st8     [rp] = w1, 8            C                       M23
+       lfetch  [r10], 32
+   (p8)        cmpeqor p6, p0 = LIM, w2        C                       M I
+   (p8)        add     w2 = INCR, w2           C                       M I
+       ld8     v3 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w3, u3         C                       M I
+       ;;
+.LL01: ld8     u3 = [up], 8            C                       M01
+       shrp    x1 = v1, v0, RSH        C                       I0
+   (p6)        cmpeqor p8, p0 = LIM, w3        C                       M I
+   (p6)        add     w3 = INCR, w3           C                       M I
+       ld8     v0 = [vp], 8            C                       M01
+       ADDSUB  w0 = u0, x0             C                       M I
+       ;;
+       st8     [rp] = w2, 8            C                       M23
+       cmp.CND p6, p0 = w0, u0         C                       M I
+       nop.b   0
+       ld8     u0 = [up], 8            C                       M01
+       lfetch  [r11], 32
+       ADDSUB  w1 = u1, x1             C                       M I
+       ;;
+.LL00: st8     [rp] = w3, 8            C                       M23
+       shrp    x2 = v2, v1, RSH        C                       I0
+   (p8)        cmpeqor p6, p0 = LIM, w0        C                       M I
+   (p8)        add     w0 = INCR, w0           C                       M I
+       ld8     v1 = [vp], 8            C                       M01
+       cmp.CND p8, p0 = w1, u1         C                       M I
+       ;;
+.LL11: ld8     u1 = [up], 8            C                       M01
+       shrp    x3 = v3, v2, RSH        C                       I0
+   (p6)        cmpeqor p8, p0 = LIM, w1        C                       M I
+   (p6)        add     w1 = INCR, w1           C                       M I
+       ld8     v2 = [vp], 8            C                       M01
+       ADDSUB  w2 = u2, x2             C                       M I
+       ;;
+.mmi;  st8     [rp] = w0, 8            C                       M23
+       cmp.CND p6, p0 = w2, u2         C                       M I
+       shrp    x0 = v0, v3, RSH        C                       I0
+       ld8     u2 = [up], 8            C                       M01
+       ADDSUB  w3 = u3, x3             C                       M I
+       br.cloop.dptk   L(top)          C                       B
+       ;;
+C *** MAIN LOOP END ***
+
+L(end):
+.mmi;  st8     [rp] = w1, 8            C                       M23
+   (p8)        cmpeqor p6, p0 = LIM, w2        C                       M I
+       shrp    x1 = v1, v0, RSH        C                       I0
+.mmi;
+   (p8)        add     w2 = INCR, w2           C                       M I
+       cmp.CND p7, p0 = w3, u3         C                       M I
+       ADDSUB  w0 = u0, x0             C                       M I
+       ;;
+.Lcj5:
+.mmi;  st8     [rp] = w2, 8            C                       M23
+   (p6)        cmpeqor p7, p0 = LIM, w3        C                       M I
+       shrp    x2 = v2, v1, RSH        C                       I0
+.mmi;
+   (p6)        add     w3 = INCR, w3           C                       M I
+       cmp.CND p8, p0 = w0, u0         C                       M I
+       ADDSUB  w1 = u1, x1             C                       M I
+       ;;
+.Lcj4:
+.mmi;  st8     [rp] = w3, 8            C                       M23
+   (p7)        cmpeqor p8, p0 = LIM, w0        C                       M I
+       mov.i   ar.lc = r2              C                       I0
+.mmi;
+   (p7)        add     w0 = INCR, w0           C                       M I
+       cmp.CND p9, p0 = w1, u1         C                       M I
+       ADDSUB  w2 = u2, x2             C                       M I
+       ;;
+.Lcj3:
+.mmi;  st8     [rp] = w0, 8            C                       M23
+   (p8)        cmpeqor p9, p0 = LIM, w1        C                       M I
+       shr.u   r8 = v2, RSH            C                       I0
+.mmi;
+   (p8)        add     w1 = INCR, w1           C                       M I
+       cmp.CND p6, p0 = w2, u2         C                       M I
+       nop     0
+       ;;
+.Lcj2:
+.mmi;  st8     [rp] = w1, 8            C                       M23
+   (p9)        cmpeqor p6, p0 = LIM, w2        C                       M I
+   (p9)        add     w2 = INCR, w2           C                       M I
+       ;;
+.Lcj1:
+.mmb;  st8     [rp] = w2               C                       M23
+   (p6)        add     r8 = 1, r8              C                       M I
+       br.ret.sptk.many b0             C                       B
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/bdiv_dbm1c.asm b/mpn/ia64/bdiv_dbm1c.asm

index 6ff4fdaaf9c9f258dea70f8e660ca3dc2c33c666..6cd98e7da759a55129e6d65d020e0e7a65ef41be 100644 (file)
--- a/mpn/ia64/bdiv_dbm1c.asm
+++ b/mpn/ia64/bdiv_dbm1c.asm
@@ -1,5 +1,7 @@
  dnl  IA-64 mpn_bdiv_dbm1.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2008, 2009 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
diff --git a/mpn/ia64/copyd.asm b/mpn/ia64/copyd.asm

index 759629e4a741196c0e9b87871d269e6b1aebdf68..5ceb83866cdcc312cc00b4015a4b58c9df9df5b0 100644 (file)
--- a/mpn/ia64/copyd.asm
+++ b/mpn/ia64/copyd.asm
@@ -1,5 +1,7 @@
  dnl  IA-64 mpn_copyd -- copy limb vector, decrementing.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
diff --git a/mpn/ia64/copyi.asm b/mpn/ia64/copyi.asm

index 11451dc08d0198453c8b7cd815ee49e48d51ee90..b8d26198664fce10b59d9df51e440dc7640374d2 100644 (file)
--- a/mpn/ia64/copyi.asm
+++ b/mpn/ia64/copyi.asm
@@ -1,5 +1,7 @@
  dnl  IA-64 mpn_copyi -- copy limb vector, incrementing.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
diff --git a/mpn/ia64/dive_1.asm b/mpn/ia64/dive_1.asm

index 9b9d085c0ce317213e9fcbe12d287b9e580dcc11..d9cd49c5f81a2131f3addddf401a1965fd945f76 100644 (file)
--- a/mpn/ia64/dive_1.asm
+++ b/mpn/ia64/dive_1.asm
@@ -1,6 +1,8 @@
  dnl  IA-64 mpn_divexact_1 -- mpn by limb exact division.
  
-dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Torbjorn Granlund and Kevin Ryde.
+
+dnl  Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -177,22 +179,28 @@ ifdef(`HAVE_ABI_32',
         ld8             r21 = [up], 8
         br              .Lent
  
-.Loop: ld8             r21 = [up], 8
+.Ltop: ld8             r21 = [up], 8
         xma.l           f12 = f9, f8, f10       C q = c * -inverse + si
+       nop.b           0
         ;;
  .Lent: add             r16 = 160, up
         shl             r22 = r21, lshift
+       nop.b           0
         ;;
         stf8            [rp] = f12, 8
         xma.hu          f9 = f12, f6, f9        C c = high(q * divisor + c)
+       nop.b           0
+       nop.m           0
         xmpy.l          f10 = f11, f7           C si = ulimb * inverse
+       nop.b           0
         ;;
         or              r31 = r22, r23
         shr.u           r23 = r21, rshift
+       nop.b           0
         ;;
         lfetch          [r16]
         setf.sig        f11 = r31
-       br.cloop.sptk.few.clr .Loop
+       br.cloop.sptk.few.clr .Ltop
  
  
         xma.l           f12 = f9, f8, f10       C q = c * -inverse + si
diff --git a/mpn/ia64/divrem_1.asm b/mpn/ia64/divrem_1.asm

index aa50ac902bceb43dbcce898209b4d1b4abe43db6..53b994a4ccdc499aa02f2b9eacb0f84fa8f2a4b7 100644 (file)
--- a/mpn/ia64/divrem_1.asm
+++ b/mpn/ia64/divrem_1.asm
@@ -1,6 +1,8 @@
  dnl  IA-64 mpn_divrem_1 and mpn_preinv_divrem_1 -- Divide an mpn number by an
  dnl  unnormalized limb.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2002, 2004, 2005 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
diff --git a/mpn/ia64/divrem_2.asm b/mpn/ia64/divrem_2.asm

index da3e9d64b767fd121d82745fe1b840fb09af09e0..b72741cf13cd9d32b74e83b028ce756ab4cf0709 100644 (file)
--- a/mpn/ia64/divrem_2.asm
+++ b/mpn/ia64/divrem_2.asm
@@ -1,12 +1,12 @@
-dnl  IA-64 mpn_divrem_2 -- Divide an n-limb number by a 2-limb number.
+dnl  IA-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
  
-dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
+dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
  dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
  dnl  your option) any later version.
  
  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
@@ -15,45 +15,22 @@ dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  dnl  License for more details.
  
  dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+dnl  along with the GNU MP Library; see the file COPYING.LIB.  If not, write
+dnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+dnl  Boston, MA 02110-1301, USA.
  
  include(`../config.m4')
  
-C         cycles/limb
-C Itanium:    63
-C Itanium 2:  46
+C               norm   frac
+C itanium 1
+C itanium 2     29     29
  
  
  C TODO
-C  * Further optimize the loop.  We could probably do some more trickery with
-C    arithmetic in the FPU, or perhaps use a non-zero addend of xma in more
-C    places.
-C  * Software pipeline for perhaps 5 saved cycles, around the end and start of
-C    the loop.
-C  * Schedule code outside of loop better.
-C  * Update the comments.  They are now using the same name for the same
-C    logical quantity.
-C  * Handle conditional zeroing of r31 in loop more cleanly.
-C  * Inline mpn_invert_limb and schedule its insns across the entire init code.
-C  * Ultimately, use 2-limb, or perhaps 3-limb or 4-limb inverse.
+C  * Inline and interleave limb inversion code with loop setup code.
+C  * We should use explicit bundling in much of the code, since it typically
+C    cuts some cycles with the GNU assembler.
  
-define(`qp',`r32')
-define(`qxn',`r33')
-define(`np',`r34')
-define(`nn',`r35')
-define(`dp',`r36')
-
-define(`fnh',`f11')
-define(`fminus1',`f10')
-define(`fd0',`f13')
-define(`fd1',`f14')
-define(`d0',`r39')
-define(`d1',`r36')
-define(`fnl',`f32')
-define(`fdinv',`f12')
-
-define(`R1',`r38') define(`R0',`r37')
-define(`P1',`r28') define(`P0',`r27')
  
  ASM_START()
  
@@ -61,204 +38,233 @@ C HP's assembler requires these declarations for importing mpn_invert_limb
         .global mpn_invert_limb
         .type   mpn_invert_limb,@function
  
+C INPUT PARAMETERS
+C qp   = r32
+C fn   = r33
+C np   = r34
+C nn   = r35
+C dp   = r36
+
+define(`f0x1', `f15')
+
+ASM_START()
  PROLOGUE(mpn_divrem_2)
         .prologue
-       .save ar.pfs, r42
-       .save ar.lc, r44
-       .save rp, r41
  ifdef(`HAVE_ABI_32',
-`      addp4           qp = 0, qp              C M I
-       addp4           np = 0, np              C M I
-       addp4           dp = 0, dp              C M I
-       zxt4            nn = nn                 C I
-       zxt4            qxn = qxn               C I
+`      addp4           r32 = 0, r32            C M I
+       addp4           r34 = 0, r34            C M I
+       addp4           r36 = 0, r36            C M I
+       zxt4            r35 = r35               C I
+       zxt4            r33 = r33               C I
         ;;
  ')
-
-       alloc           r42 = ar.pfs, 5,8,1,0   C M2
-       ld8             d0 = [dp], 8            C M0M1  d0
-       mov             r44 = ar.lc             C I0
-       shladd          np = nn, 3, np          C M I
-       ;;
-       ld8             d1 = [dp]               C M0M1  d1
-       mov             r41 = b0                C I0
-       add             r15 = -8, np            C M I
-       add             np = -16, np            C M I
-       mov             r40 = r0                C M I
-       ;;
-       ld8             R1 = [r15]              C M0M1  n1
-       ld8             R0 = [r34], -8          C M0M1  n0
-       ;;
-       cmp.ltu         p6, p0 = d1, R1         C M I
-       cmp.eq          p8, p0 = d1, R1         C M I
-       ;;
-  (p8) cmp.leu         p6, p0 = d0, R0
-       cmp.ltu         p8, p9 = R0, d0
-  (p6) br.cond.dpnt    .L_high_limb_1          C FIXME: inline!
+       .save ar.pfs, r42
+       alloc    r42 = ar.pfs, 5, 9, 1, 0
+       shladd   r34 = r35, 3, r34
+       adds     r14 = 8, r36
+       mov      r43 = r1
+       ;;
+       adds     r15 = -8, r34
+       ld8      r39 = [r14]
+       .save ar.lc, r45
+       mov      r45 = ar.lc
+       adds     r14 = -16, r34
+       mov      r40 = r0
+       adds     r34 = -24, r34
+       ;;
+       ld8      r38 = [r15]
+       .save rp, r41
+       mov      r41 = b0
+       .body
+       ld8      r36 = [r36]
+       ld8      r37 = [r14]
+       ;;
+       cmp.gtu  p6, p7 = r39, r38
+  (p6) br.cond.dptk .L8
+       ;;
+       cmp.leu  p8, p9 = r36, r37
+       cmp.geu  p6, p7 = r39, r38
+       ;;
+  (p8) cmp4.ne.and.orcm p6, p7 = 0, r0
+  (p7) br.cond.dptk .L51
  .L8:
-
-       mov             r45 = d1
-       br.call.sptk.many b0 = mpn_invert_limb  C FIXME: inline+schedule
+       add      r14 = r33, r35         // un + fn
+       mov      r46 = r39              // argument to mpn_invert_limb
         ;;
-       setf.sig        fd1 = d1                C d1
-       setf.sig        fd0 = d0                C d0
-       add             r14 = r33, r35          C nn + qxn
+       adds     r35 = -3, r14
         ;;
-       setf.sig        fdinv = r8              C dinv
-       mov             r9 = -1
-       add             r35 = -3, r14
+       cmp.gt   p12, p0 = r0, r35
+  (p12)        br.cond.dpnt L(end)
+       br.call.sptk.many b0 = mpn_invert_limb
         ;;
-       setf.sig        fminus1 = r9
-       cmp.gt          p6, p0 = r0, r35
-       shladd          qp = r35, 3, qp
-       mov             ar.lc = r35
-       mov             r31 = 0                 C n0
-  (p6) br.cond.dpnt    .Ldone
+       setf.sig f11 = r8               // di (non-final)
+       setf.sig f34 = r39              // d1
+       setf.sig f33 = r36              // d0
+       mov      r1 = r43
         ;;
-       ALIGN(16)
-C *** MAIN LOOP START ***
-.Loop:         C 00
-       mov             r15 = R0                C nadj = n10
-       cmp.le          p14, p15 = 0, R0        C check high bit of R0
-       cmp.le          p8, p0 = r33, r35       C dividend limbs remaining?
-       ;;      C 01
-       .pred.rel "mutex", p14, p15
-  (p8) ld8             r31 = [r34], -8         C n0
-  (p15)        add             r15 = d1, R0            C nadj = n10 + d1
-  (p15)        add             r14 = 1, R1             C nh + (nl:63)
-  (p14)        mov             r14 = R1                C nh
-       cmp.eq          p6, p0 = d1, R1         C nh == d1
-  (p6) br.cond.spnt    .L_R1_eq_d1
-       ;;      C 02
-       setf.sig        f8 = r14                C n2 + (nl:63)
-       setf.sig        f15 = r15               C nadj
-       sub             r23 = -1, R1            C r23 = ~nh
-       ;;      C 03
-       setf.sig        fnh = r23
-       setf.sig        fnl = R0
-       ;;      C 08
-       xma.hu          f7 = fdinv, f8, f15     C xh = HI(dinv*(nh-nmask)+nadj)
-       ;;      C 12
-       xma.l           f7 = f7, fminus1, fnh   C nh + xh
-       ;;      C 16
-       getf.sig        r14 = f7
-       xma.hu          f9 = f7, fd1, fnl       C xh = HI(q1*d1+nl)
-       xma.l           f33 = f7, fd1, fnl      C xh = LO(q1*d1+nl)
-       ;;      C 20
-       getf.sig        r16 = f9
-       sub             r24 = d1, R1
-               C 21
-       getf.sig        r17 = f33
-       ;;      C 25
-       cmp.eq          p6, p7 = r16, r24
-       ;;      C 26
-       .pred.rel "mutex", p6, p7
-  (p6) xma.l           f8 = f7, fminus1, f0    C f8 = -f7
-  (p7) xma.l           f8 = f7,fminus1,fminus1 C f8 = -f7-1
-       ;;      C 27
-       .pred.rel "mutex", p6, p7
-  (p6) sub             r18 = 0, r14            C q = -q1
-  (p7) sub             r18 = -1, r14           C q = -q1-1
-  (p6) add             r14 = 0, r17            C n1 = xl
-  (p7) add             r14 = d1, r17           C n1 = xl + d1
-       ;;      C 30
-       xma.hu          f9 = fd0, f8, f0        C d0*(-f7-1) = -d0*f7-d0
-       xma.l           f35 = fd0, f8, f0
-       ;;      C 34
-       getf.sig        P1 = f9         C P1
-               C 35
-       getf.sig        P0 = f35                C P0
-       ;;
-.L_adj:                C 40
-       cmp.ltu         p8, p0 = r31, P0        C p8 = cy from low limb
-       cmp.ltu         p6, p0 = r14, P1        C p6 = prel cy from high limb
-       sub             R0 = r31, P0
-       sub             R1 = r14, P1
-       ;;      C 41
-  (p8) cmp.eq.or       p6, p0 = 0, R1          C p6 = final cy from high limb
-  (p8) add             R1 = -1, R1
-       cmp.ne          p10, p0 = r0, r0        C clear p10 FIXME: use unc below!
-       cmp.ne          p13, p0 = r0, r0        C clear p13 FIXME: use unc below!
-       ;;      C 42
-  (p6) add             R0 = R0, d0
-  (p6) add             R1 = R1, d1
-  (p6) add             r18 = -1, r18           C q--
-       ;;      C 43
-  (p6) cmp.ltu         p10, p0 = R0, d0
-  (p6) cmp.ltu         p0, p13 = R1, d1
-       ;;      C 44
-  (p10)        cmp.ne.and      p0, p13 = -1, R1        C p13 = !cy
-  (p10)        add             R1 = 1, R1
-  (p13)        br.cond.spnt    .L_two_too_big          C jump if not cy
-       ;;      C 45
-       st8             [qp] = r18, -8
-       add             r35 = -1, r35
-       mov             r31 = 0                 C n0, next iteration
-       br.cloop.sptk   .Loop
-C *** MAIN LOOP END ***
-       ;;
-.Ldone:
-       mov             r8 = r40
-       mov             b0 = r41
-       add             r21 = 8, r34
-       add             r22 = 16, r34
-       ;;
-       st8             [r21] = R0
-       st8             [r22] = R1
-       mov             ar.pfs = r42
-       mov             ar.lc = r44
-       br.ret.sptk.many b0
-
-.L_high_limb_1:
-       .pred.rel "mutex", p8, p9
-       sub             R0 = R0, d0
-  (p8) sub             R1 = R1, d1, 1
-  (p9) sub             R1 = R1, d1
-       mov             r40 = 1
-       br.sptk         .L8
+       mov      r17 = 1
+       setf.sig f9 = r38               // n2
+       xma.l    f6 = f11, f34, f0      // t0 = LO(di * d1)
+       ;;
+       setf.sig f10 = r37              // n1
+       setf.sig f15 = r17              // 1
+       xma.hu   f8 = f11, f33, f0      // s0 = HI(di * d0)
+       ;;
+       getf.sig r17 = f6
+       getf.sig r16 = f8
+       mov      ar.lc = r35
+       ;;
+       sub      r18 = r0, r39          // -d1
+       add      r14 = r17, r36
+       ;;
+       setf.sig f14 = r18              // -d1
+       cmp.leu  p8, p9 = r17, r14
+       add      r16 = r14, r16
+       ;;
+  (p9) adds     r19 = 0, r0
+  (p8) adds     r19 = -1, r0
+       cmp.gtu  p6, p7 = r14, r16
+       ;;
+  (p6) adds     r19 = 1, r19
+       ;;
+ifelse(1,1,`
+       cmp.gt   p7, p6 = r0, r19
+       ;;
+  (p6) adds     r8 = -1, r8            // di--
+  (p6) sub      r14 = r16, r39         // t0 -= d1
+  (p6) cmp.ltu  p6, p7 = r16, r39      // cy for: t0 - d1
+       ;;
+  (p6) cmp.gt   p9, p8 = 1, r19
+  (p7) cmp.gt   p9, p8 = 0, r19
+  (p6) adds     r19 = -1, r19          // t1 -= cy
+       mov      r16 = r14
+       ;;
+  (p8) adds     r8 = -1, r8            // di--
+  (p8) sub      r14 = r16, r39         // t0 -= d1
+  (p8) cmp.ltu  p8, p9 = r16, r39      // cy for: t0 - d1
+       ;;
+  (p8) cmp.gt   p7, p6 = 1, r19
+  (p9) cmp.gt   p7, p6 = 0, r19
+  (p8) adds     r19 = -1, r19          // t1 -= cy
+       mov      r16 = r14
+       ;;
+  (p6) adds     r8 = -1, r8            // di--
+  (p6) sub      r14 = r16, r39         // t0 -= d1
+  (p6) cmp.ltu  p6, p7 = r16, r39      // cy for: t0 - d1
+       ;;
+  (p6) cmp.gt   p9, p8 = 1, r19
+  (p7) cmp.gt   p9, p8 = 0, r19
+  (p6) adds     r19 = -1, r19          // t1 -= cy
+       mov      r16 = r14
+       ;;
+  (p8) adds     r8 = -1, r8            // di--
+  (p8) sub      r14 = r16, r39         // t0 -= d1
+  (p8) cmp.ltu  p8, p9 = r16, r39      // cy for: t0 - d1
+       ;;
+  (p8) adds     r19 = -1, r19          // t1 -= cy
+       mov      r16 = r14
+',`
+       cmp.gt   p8, p9 = r0, r19
+  (p8) br.cond.dpnt .L46
+.L52:
+       cmp.leu  p6, p7 = r39, r16
+       sub      r14 = r16, r39
+       adds     r8 = -1, r8
+       ;;
+  (p7) adds     r19 = -1, r19
+       mov      r16 = r14
+       ;;
+  (p7) cmp.gt   p8, p9 = r0, r19
+  (p9) br.cond.dptk .L52
+.L46:
+')
+       setf.sig f32 = r8               // di
+       shladd   r32 = r35, 3, r32
         ;;
  
-.L_two_too_big:
-       add             R0 = R0, d0
-       add             R1 = R1, d1
-       ;;
-       add             r18 = -1, r18           C q--
-       cmp.ltu         p10, p0 = R0, d0
-       ;;
-  (p10)        add             R1 = 1, R1
-       st8             [qp] = r18, -8
-       add             r35 = -1, r35
-       mov             r31 = 0                 C n0, next iteration
-       br.cloop.sptk   .Loop
-       br.sptk         .Ldone
-
-.L_R1_eq_d1:
-       add             r14 = R0, d1            C r = R0 + d1
-       mov             r18 = -1                C q = -1
+       ALIGN(16)
+L(top):        nop 0
+       nop 0
+       cmp.gt   p8, p9 = r33, r35
+       ;;
+ (p8)  mov      r37 = r0
+ (p9)  ld8      r37 = [r34], -8
+       xma.hu   f8 = f9, f32, f10      //                              0,29
+       xma.l    f12 = f9, f32, f10     //                              0
+       ;;
+       getf.sig r20 = f12              // q0                           4
+       xma.l    f13 = f15, f8, f9      // q += n2                      4
+       sub      r8 = -1, r36           // bitnot d0
+       ;;
+       getf.sig r18 = f13              //                              8
+       xma.l    f7 = f14, f13, f10     //                              8
+       xma.l    f6 = f33, f13, f33     // t0 = LO(d0*q+d0)             8
+       xma.hu   f9 = f33, f13, f33     // t1 = HI(d0*q+d0)             9
         ;;
-       cmp.leu         p6, p0 = R0, r14
- (p6)  br.cond.spnt    .L20                    C jump unless cy
+       getf.sig r38 = f7               // n1                           12
+       getf.sig r16 = f6               //                              13
+       getf.sig r19 = f9               //                              14
         ;;
-       sub             P1 = r14, d0
-       add             R0 = r31, d0
+       sub      r38 = r38, r39         // n1 -= d1                     17
         ;;
-       cmp.ltu         p8, p9 = R0, r31
+       cmp.ne   p9, p0 = r0, r0        // clear p9
+       cmp.leu  p10, p11 = r16, r37    // cy for: n0 - t0              18
         ;;
+       sub      r37 = r37, r16         // n0 -= t0                     19
+  (p11)        sub      r38 = r38, r19, 1      // n1 -= t1 - cy                19
+  (p10)        sub      r38 = r38, r19         // n1 -= t1                     19
+       ;;
+       cmp.gtu  p6, p7 = r20, r38      // n1 >= q0                     20
+       ;;
+  (p7) cmp.ltu  p9, p0 = r8, r37       //                              21
+  (p6) add      r18 = 1, r18           //
+  (p7) add      r37 = r37, r36         //                              21
+  (p7) add      r38 = r38, r39         //                              21
+       ;;
+       setf.sig f10 = r37              // n1                           22
+  (p9) add      r38 = 1, r38           //                              22
+       ;;
+       setf.sig f9 = r38               // n2                           23
+       cmp.gtu  p6, p7 = r39, r38      //                              23
+  (p7) br.cond.spnt L(fix)
+L(bck):        st8      [r32] = r18, -8
+       adds     r35 = -1, r35
+       br.cloop.sptk.few L(top)
+       ;;
+
+L(end):        add     r14 = 8, r34
+       add     r15 = 16, r34
+       mov      b0 = r41
+       ;;
+       st8     [r14] = r37
+       st8     [r15] = r38
+       mov      ar.pfs = r42
+       mov      r8 = r40
+       mov      ar.lc = r45
+       br.ret.sptk.many b0
+       ;;
+.L51:
         .pred.rel "mutex", p8, p9
-       st8             [qp] = r18, -8
-  (p8) add             R1 = r0, P1, 1          C R1 = n1 - P1 - cy
-  (p9) add             R1 = r0, P1             C R1 = n1 - P1
-       add             r35 = -1, r35
-       mov             r31 = 0                 C n0, next iteration
-       br.cloop.sptk   .Loop
-       br.sptk         .Ldone
-       ;;
-.L20:  cmp.ne          p6, p7 = 0, d0
-       ;;
-       .pred.rel "mutex", p6, p7
-  (p6) add             P1 = -1, d0
-  (p7) mov             P1 = d0
-       sub             P0 = r0, d0
-       br.sptk         .L_adj
+       sub      r37 = r37, r36
+  (p9) sub      r38 = r38, r39, 1
+  (p8) sub      r38 = r38, r39
+       adds     r40 = 1, r0
+       br .L8
+       ;;
+
+L(fix):        cmp.geu  p6, p7 = r39, r38
+       cmp.leu  p8, p9 = r36, r37
+       ;;
+  (p8) cmp4.ne.and.orcm p6, p7 = 0, r0
+  (p6) br.cond.dptk L(bck)
+       sub      r37 = r37, r36
+  (p9) sub      r38 = r38, r39, 1
+  (p8) sub      r38 = r38, r39
+       adds     r18 = 1, r18
+       ;;
+       setf.sig f9 = r38               // n2
+       setf.sig f10 = r37              // n1
+       br       L(bck)
+
  EPILOGUE()
  ASM_END()
diff --git a/mpn/ia64/gcd_1.asm b/mpn/ia64/gcd_1.asm

index c6efa5def5e57be542fd7b401819483aee40c4d2..3a173dda07e35beee71e4017901644e3e1d3748a 100644 (file)
--- a/mpn/ia64/gcd_1.asm
+++ b/mpn/ia64/gcd_1.asm
@@ -1,6 +1,9 @@
  dnl  Itanium-2 mpn_gcd_1 -- mpn by 1 gcd.
  
-dnl  Copyright 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Kevin Ryde, innerloop by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2002, 2003, 2004, 2005, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,8 +24,8 @@ include(`../config.m4')
  
  
  C           cycles/bitpair (1x1 gcd)
-C Itanium:      14 (approx)
-C Itanium 2:     6.3
+C Itanium:       ?
+C Itanium 2:     5.8  (trimmable to 5.64 with huge ctz_table)
  
  
  C mpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y);
@@ -45,29 +48,13 @@ C The main loop consists of transforming x,y to abs(x-y),min(x,y), and then
  C stripping factors of 2 from abs(x-y).  Those factors of two are
  C determined from just y-x, without the abs(), since there's the same
  C number of trailing zeros on n or -n in twos complement.  That makes the
-C dependent chain
-C
-C      cycles
-C        1    sub     x-y and x-y-1
-C        3    andcm   (x-y-1)&~(x-y)
-C        2    popcnt  trailing zeros
-C        3    shr.u   strip abs(x-y)
-C       ---
-C        9
+C dependent chain 8 cycles deep.
  C
  C The selection of x-y versus y-x for abs(x-y), and the selection of the
-C minimum of x and y, is done in parallel with the above.
+C minimum of x and y, is done in parallel with the critical path.
  C
  C The algorithm takes about 0.68 iterations per bit (two N bit operands) on
-C average, hence the final 6.3 cycles/bitpair.
-C
-C The loop is not as fast as one might hope, since there's extra latency
-C from andcm going across to the `multimedia' popcnt, and vice versa from
-C multimedia shr.u back to the integer sub.
-C
-C The loop branch is .sptk.clr since we usually expect a good number of
-C iterations, and the iterations are data dependent so it's unlikely past
-C results will predict anything much about the future.
+C average, hence the final 5.8 cycles/bitpair.
  C
  C Not done:
  C
@@ -88,13 +75,10 @@ C only going down I0), perhaps it'd be possible to shift left instead,
  C using add.  That would mean keeping track of the lowest not-yet-zeroed
  C bit, using some sort of mask.
  C
-C Itanium-1:
-C
-C This code is not designed for itanium-1 and in fact doesn't run well on
-C that chip.  The loop seems to be about 21 cycles, probably because we end
-C up with a 10 cycle replay for not forcibly scheduling the shr.u latency.
-C Lack of branch hints might introduce a couple of bubbles too.
-C
+C TODO:
+C  * Once mod_1_N exists in assembly for Itanium, add conditional calls.
+C  * Call bmod_1 even for n=1 when up[0] >> v0 (like other gcd_1 impls).
+C  * Probably avoid popcnt also outside of loop, instead use ctz_table.
  
  ASM_START()
         .explicit                               C What does this mean?
@@ -103,6 +87,18 @@ C HP's assembler requires these declarations for importing mpn_modexact_1c_odd
         .global mpn_modexact_1c_odd
         .type   mpn_modexact_1c_odd,@function
  
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+       .section        ".rodata"
+ctz_table:
+       .byte   MAXSHIFT
+forloop(i,1,MASK,
+`      .byte   m4_count_trailing_zeros(i)
+')
+
  PROLOGUE(mpn_gcd_1)
  
                 C r32   xp
@@ -146,13 +142,9 @@ ifdef(`HAVE_ABI_32',
  
                 mov     out_carry = 0
  
-               C
-
                 popcnt  y_twos = y_twos         C I0  y twos
                 ;;
  
-               C
-
  { .mmi;                add     x_orig_one = -1, x_orig C M0  orig x-1
                 shr.u   out_divisor = y, y_twos C I0  y without twos
  }{             shr.u   y = y, y_twos           C I1  y without twos
@@ -169,63 +161,61 @@ ifdef(`HAVE_ABI_32',
                 mov     b0 = save_rp            C I0
  }              ;;
  
-               C
-
                 popcnt  x_orig = x_orig         C I0  orig x twos
-
                 popcnt  r9 = r9                 C I0  x twos
                 ;;
  
-               C
-
  {              cmp.lt  p7,p0 = x_orig, y_twos  C M0  orig x_twos < y_twos
                 shr.u   x = x, r9               C I0  x odd
  }              ;;
  
  {      (p7)    mov     y_twos = x_orig         C M0  common twos
                 add     r10 = -1, y             C I0  y-1
-       (p6)    br.dpnt.few .Ldone_y            C B0  x%y==0 then result y
-}              ;;
-
-               C
-
-
-               C No noticable difference in speed for the loop aligned to
-               C 32 or just 16.
-.Ltop:
-               C r8    x
-               C r10  y-1
-               C r34   y
-               C r38   common twos, for use at end
-
-{  .mmi;       cmp.gtu p8,p9 = x, y    C M0  x>y
-               cmp.ne  p10,p0 = x, y   C M1  x==y
-               sub     r9 = y, x       C I0  d = y - x
-}{ .mmi;       sub     r10 = r10, x    C M2  d-1 = y - x - 1
+       (p6)    br.dpnt.few L(done_y)           C B0  x%y==0 then result y
  }              ;;
  
-{ .mmi;        .pred.rel "mutex", p8, p9
-       (p8)    sub     x = x, y        C M0  x>y  use x=x-y, y unchanged
-       (p9)    mov     y = x           C M1  y>=x use y=x
-       (p9)    mov     x = r9          C I0  y>=x use x=y-x
-}{ .mmi;       andcm   r9 = r10, r9    C M2  (d-1)&~d
+               addl    r22 = @ltoffx(ctz_table#), r1
                 ;;
-
-               add     r10 = -1, y     C M0  new y-1
-               popcnt  r9 = r9         C I0  twos on x-y
-}              ;;
-
-{              shr.u   x = x, r9       C I0   new x without twos
-       (p10)   br.sptk.few.clr .Ltop
-}              ;;
-
+               ld8.mov r22 = [r22], ctz_table#
+               br      L(ent)
+
+
+               ALIGN(32)
+L(top):                .pred.rel "mutex", p6,p7
+.mmi;          and     r20 = MASK, r19
+       (p7)    mov     y = x
+       (p6)    sub     x = x, y
+.mmi;  (p7)    mov     x = r19
+               nop     0
+               nop     0
+               ;;
+L(mid):
+.mmb;          add     r21 = r22, r20
+               cmp.eq  p10,p0 = 0, r20
+       (p10)   br.spnt.few.clr  L(shift_alot)
+               ;;
+.mmi;          ld1     r16 = [r21]
+               ;;
+               nop     0
+               shr.u   x = x, r16
+               ;;
+L(ent):
+.mmi;          sub     r19 = y, x
+               cmp.gtu p6,p7 = x, y
+               cmp.ne  p8,p0 = x, y
+.mmb;          nop     0
+               nop     0
+       (p8)    br.sptk.few.clr L(top)
  
  
                 C result is y
-.Ldone_y:
-               shl     r8 = y, y_twos          C I   common factors of 2
-               ;;
+L(done_y):
                 mov     ar.pfs = save_pfs       C I0
+               shl     r8 = y, y_twos          C I   common factors of 2
                 br.ret.sptk.many b0
  
+L(shift_alot):
+               extr.u  r20 = x, MAXSHIFT, MAXSHIFT
+               shr.u   x = x, MAXSHIFT
+               br      L(mid)
  EPILOGUE()
diff --git a/mpn/ia64/gmp-mparam.h b/mpn/ia64/gmp-mparam.h

index 323c167864efb70754300d4ada358eb285e5b4fe..1e0ccb97c3da6aa7d818a95c108ddccad1d9f568 100644 (file)
--- a/mpn/ia64/gmp-mparam.h
+++ b/mpn/ia64/gmp-mparam.h
@@ -1,6 +1,6 @@
  /* gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010 Free Software
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010, 2011 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -21,187 +21,169 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define GMP_LIMB_BITS 64
  #define BYTES_PER_MP_LIMB 8
  
-/* 1300MHz Itanium2 (babe.fsffrance.org) */
-
+/* 900MHz Itanium2 (titanic.gmplib.org) */
  
+#define MOD_1_1P_METHOD                      2
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     22
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        26
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD              12
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
  #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
  
-#define MUL_TOOM22_THRESHOLD                44
-#define MUL_TOOM33_THRESHOLD                89
-#define MUL_TOOM44_THRESHOLD               232
-#define MUL_TOOM6H_THRESHOLD               351
-#define MUL_TOOM8H_THRESHOLD               454
+#define MUL_TOOM22_THRESHOLD                36
+#define MUL_TOOM33_THRESHOLD               129
+#define MUL_TOOM44_THRESHOLD               214
+#define MUL_TOOM6H_THRESHOLD               318
+#define MUL_TOOM8H_THRESHOLD               430
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     121
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     138
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     121
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     145
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     203
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     101
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     160
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     138
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     159
+#define SQR_BASECASE_THRESHOLD              11
+#define SQR_TOOM2_THRESHOLD                 84
+#define SQR_TOOM3_THRESHOLD                131
+#define SQR_TOOM4_THRESHOLD                494
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
+#define SQR_TOOM8_THRESHOLD                  0  /* always */
  
-#define SQR_BASECASE_THRESHOLD              26
-#define SQR_TOOM2_THRESHOLD                119
-#define SQR_TOOM3_THRESHOLD                141
-#define SQR_TOOM4_THRESHOLD                282
-#define SQR_TOOM6_THRESHOLD                375
-#define SQR_TOOM8_THRESHOLD                527
+#define MULMID_TOOM42_THRESHOLD             98
  
-#define MULMOD_BNM1_THRESHOLD               24
-#define SQRMOD_BNM1_THRESHOLD               19
+#define MULMOD_BNM1_THRESHOLD               21
+#define SQRMOD_BNM1_THRESHOLD               25
  
-#define MUL_FFT_MODF_THRESHOLD             888  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             468  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    888, 5}, {     31, 6}, {     16, 5}, {     33, 6}, \
-    {     17, 5}, {     35, 6}, {     28, 7}, {     15, 6}, \
-    {     33, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     29, 8}, {     15, 7}, {     33, 8}, \
-    {     17, 7}, {     37, 8}, {     19, 7}, {     41, 8}, \
-    {     21, 7}, {     43, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
-    {     37, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
-    {     51, 9}, {     27, 8}, {     55, 9}, {     31, 8}, \
-    {     63, 9}, {     35, 8}, {     71, 9}, {     39, 8}, \
-    {     79, 9}, {     43,10}, {     23, 9}, {     47, 8}, \
-    {     95, 9}, {     55,10}, {     31, 9}, {     71,10}, \
+  { {    476, 5}, {     27, 6}, {     14, 5}, {     29, 6}, \
+    {     33, 7}, {     17, 6}, {     37, 7}, {     19, 6}, \
+    {     39, 7}, {     21, 6}, {     43, 7}, {     33, 8}, \
+    {     17, 7}, {     37, 8}, {     19, 7}, {     39, 8}, \
+    {     21, 7}, {     43, 8}, {     37, 9}, {     19, 8}, \
+    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
+    {     57, 9}, {     31, 8}, {     63, 9}, {     43,10}, \
+    {     23, 9}, {     59,10}, {     31, 9}, {     71,10}, \
      {     39, 9}, {     83,10}, {     47, 9}, {     99,10}, \
-    {     55,11}, {     31,10}, {     63, 9}, {    127,10}, \
-    {     71, 9}, {    143,10}, {     87,11}, {     47,10}, \
+    {     55,11}, {     31,10}, {     87,11}, {     47,10}, \
      {    111,12}, {     31,11}, {     63,10}, {    143,11}, \
-    {     79,10}, {    167,11}, {     95,10}, {    199,11}, \
-    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
-    {    143,10}, {    287,11}, {    159,10}, {    319,12}, \
-    {     95,11}, {    223,13}, {     63,12}, {    127,11}, \
-    {    287,12}, {    159,11}, {    335,12}, {    191,11}, \
-    {    383,10}, {    767,11}, {    399,12}, {    223,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1023,12}, \
-    {    287,11}, {    575,10}, {   1151,12}, {    319,11}, \
-    {    639,10}, {   1279,11}, {    671,13}, {    191,12}, \
-    {    383,11}, {    767,10}, {   1535,12}, {    415,11}, \
-    {    831,14}, {    127,13}, {    255,12}, {    511,11}, \
-    {   1023,12}, {    543,11}, {   1087,12}, {    575,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
-    {   1343,12}, {    703,11}, {   1471,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
-    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
-    {    511,12}, {   1055,11}, {   2111,12}, {   1087,13}, \
-    {    575,12}, {   1215,11}, {   2431,12}, {   1247,13}, \
-    {    639,12}, {   1279,11}, {   2559,12}, {   1343,13}, \
-    {    703,12}, {   1471,14}, {    383,13}, {    767,12}, \
-    {   1599,13}, {    831,12}, {   1663,11}, {   3327,12}, \
-    {   1727,13}, {    895,12}, {   1791,13}, {    959,15}, \
-    {    255,14}, {    511,13}, {   1023,12}, {   2047,13}, \
-    {   1087,12}, {   2175,13}, {   1151,12}, {   2303,13}, \
-    {   1215,11}, {   4863,12}, {   2495,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1471,12}, {   2943,14}, \
-    {    767,13}, {   1599,12}, {   3199,13}, {   1727,12}, \
-    {   3455,14}, {    895,13}, {   1983,12}, {   3967,15}, \
-    {    511,14}, {   1023,13}, {   2111,12}, {   4223,13}, \
-    {   2239,12}, {   4479,13}, {   2495,14}, {   1279,13}, \
-    {   2751,14}, {   1407,13}, {   2943,15}, {    767,14}, \
-    {   1535,13}, {   3199,14}, {   1663,13}, {   3455,14}, \
-    {   1791,12}, {   7167,14}, {   1919,13}, {   3967,16}, \
-    {    511,15}, {   1023,14}, {   2175,13}, {   4351,14}, \
-    {   2431,15}, {   1279,14}, {   2943,13}, {   5887,15}, \
-    {   1535,14}, {   3199,13}, {   6399,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 217
-#define MUL_FFT_THRESHOLD                 9856
-
-#define SQR_FFT_MODF_THRESHOLD             751  /* k = 5 */
+    {     79,10}, {    167,11}, {     95,10}, {    191,11}, \
+    {    111,12}, {     63,11}, {    143,10}, {    287, 9}, \
+    {    575,10}, {    303,11}, {    159,10}, {    319,12}, \
+    {     95,11}, {    191,10}, {    399,11}, {    207,10}, \
+    {    431,13}, {     63,12}, {    127,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
+    {    159,11}, {    335,10}, {    671,11}, {    367,12}, \
+    {    191,11}, {    399,10}, {    799,11}, {    431,12}, \
+    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
+    {    543,12}, {    287,11}, {    607,12}, {    319,11}, \
+    {    671,12}, {    351,11}, {    703,13}, {    191,12}, \
+    {    415,11}, {    863,12}, {    447,14}, {    127,13}, \
+    {    255,12}, {    607,13}, {    319,12}, {    735,13}, \
+    {    383,12}, {    799,11}, {   1599,12}, {    863,13}, \
+    {    447,12}, {    927,11}, {   1855,14}, {    255,13}, \
+    {    511,12}, {   1055,13}, {    575,12}, {   1215,13}, \
+    {    639,12}, {   1279,13}, {    703,14}, {    383,13}, \
+    {    767,12}, {   1535,13}, {    831,12}, {   1663,13}, \
+    {    895,12}, {   1791,15}, {    255,14}, {    511,13}, \
+    {   1087,12}, {   2175,13}, {   1215,14}, {    639,13}, \
+    {   1343,12}, {   2687,13}, {   1471,14}, {    767,13}, \
+    {   1599,12}, {   3199,13}, {   1663,14}, {    895,13}, \
+    {   1855,15}, {    511,14}, {   1023,13}, {   2175,14}, \
+    {   1151,13}, {   2431,14}, {   1279,13}, {   2687,14}, \
+    {   1407,15}, {    767,14}, {   1535,13}, {   3199,14}, \
+    {   1663,13}, {   3455,14}, {   1791,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 155
+#define MUL_FFT_THRESHOLD                 6272
+
+#define SQR_FFT_MODF_THRESHOLD             440  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    751, 5}, {     35, 6}, {     18, 5}, {     37, 6}, \
-    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     35, 7}, {     29, 8}, {     15, 7}, {     37, 8}, \
-    {     19, 7}, {     41, 8}, {     21, 7}, {     43, 8}, \
-    {     23, 7}, {     47, 8}, {     43, 9}, {     23, 8}, \
-    {     51, 9}, {     27, 8}, {     55, 9}, {     31, 8}, \
-    {     63, 9}, {     39, 8}, {     79, 9}, {     43,10}, \
-    {     23, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     67,10}, {     39, 9}, {     83,10}, \
+  { {    436, 5}, {     14, 4}, {     29, 5}, {     31, 6}, \
+    {     35, 7}, {     18, 6}, {     37, 7}, {     37, 8}, \
+    {     19, 7}, {     40, 8}, {     37, 9}, {     19, 8}, \
+    {     43, 9}, {     23, 8}, {     49, 9}, {     27, 8}, \
+    {     57, 9}, {     43,10}, {     23, 9}, {     55,10}, \
+    {     31, 9}, {     71,10}, {     39, 9}, {     83,10}, \
      {     47, 9}, {     99,10}, {     55,11}, {     31,10}, \
-    {     63, 9}, {    127,10}, {     79,11}, {     47,10}, \
-    {    103,12}, {     31,11}, {     63,10}, {    143,11}, \
-    {     79,10}, {    159,11}, {     95,10}, {    199,11}, \
-    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
-    {    143,10}, {    287,11}, {    159,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    271,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    335,12}, \
-    {    191,11}, {    383,10}, {    767,12}, {    223,13}, \
-    {    127,11}, {    511,10}, {   1023,11}, {    527,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    591,12}, \
-    {    319,11}, {    639,13}, {    191,12}, {    383,11}, \
-    {    767,10}, {   1535,11}, {    799,10}, {   1599, 9}, \
-    {   3199,14}, {    127,13}, {    255,12}, {    511, 9}, \
-    {   4095,10}, {   2111,12}, {    543,11}, {   1087,10}, \
-    {   2239,12}, {    575,10}, {   2303,13}, {    319,12}, \
-    {    671,11}, {   1471,13}, {    383,11}, {   1599,12}, \
-    {    831,11}, {   1663,12}, {    863,10}, {   3455,13}, \
-    {    447,12}, {    895,11}, {   1791,14}, {    255,13}, \
-    {    511,12}, {   1023,11}, {   2111,12}, {   1087,11}, \
-    {   2239,13}, {    575,12}, {   1215,11}, {   2495,13}, \
-    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
-    {    383,13}, {    767,12}, {   1599,13}, {    831,12}, \
-    {   1727,11}, {   3455,12}, {   1791,15}, {    255,14}, \
-    {    511,13}, {   1023,12}, {   2111,11}, {   4223,12}, \
-    {   2239,11}, {   4479,10}, {   8959,11}, {   4607,13}, \
-    {   1215,14}, {    639,13}, {   1343,12}, {   2815,13}, \
-    {   1471,12}, {   2943,14}, {    767,13}, {   1599,12}, \
-    {   3199,13}, {   1727,12}, {   3455,14}, {    895,13}, \
-    {   1855,12}, {   3711,13}, {   1983,12}, {   3967,15}, \
-    {    511,14}, {   1023,13}, {   2111,12}, {   4223,13}, \
-    {   2239,12}, {   4479,14}, {   1151,13}, {   2495,14}, \
-    {   1279,13}, {   2687,14}, {   1407,13}, {   2943,15}, \
-    {    767,14}, {   1535,13}, {   3071,14}, {   1663,13}, \
-    {   3327,14}, {   1791,16}, {    511,15}, {   1023,14}, \
-    {   2047,13}, {   4223,14}, {   2175,13}, {   4479,12}, \
-    {   8959,14}, {   2303,13}, {   4735,14}, {   2431,15}, \
-    {   1279,14}, {   2943,15}, {   1535,14}, {   3071,13}, \
-    {   6143,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    {     87,11}, {     47,10}, {    111,12}, {     31,11}, \
+    {     63,10}, {    135,11}, {     79,10}, {    167,11}, \
+    {     95,10}, {    191,11}, {    111,12}, {     63,11}, \
+    {    127,10}, {    255,11}, {    143,10}, {    287, 9}, \
+    {    575,10}, {    303,11}, {    159,10}, {    319,12}, \
+    {     95,11}, {    191,10}, {    399,11}, {    207,10}, \
+    {    431,13}, {     63,12}, {    127,11}, {    271,10}, \
+    {    543,11}, {    303,12}, {    159,11}, {    335,10}, \
+    {    671,11}, {    367,10}, {    735,12}, {    191,11}, \
+    {    399,10}, {    799,11}, {    431,12}, {    223,11}, \
+    {    463,13}, {    127,12}, {    255,11}, {    543,12}, \
+    {    287,11}, {    607,12}, {    319,11}, {    671,12}, \
+    {    351,11}, {    735,13}, {    191,12}, {    383,11}, \
+    {    799,12}, {    415,11}, {    863,12}, {    447,11}, \
+    {    895,14}, {    127,13}, {    255,12}, {    543,11}, \
+    {   1087,12}, {    607,13}, {    319,12}, {    735,13}, \
+    {    383,12}, {    863,13}, {    447,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
+    {   1183,13}, {    639,12}, {   1279,13}, {    703,12}, \
+    {   1407,14}, {    383,13}, {    767,12}, {   1535,13}, \
+    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
+    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
+    {   2175,13}, {   1215,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1471,14}, {    767,13}, {   1663,14}, \
+    {    895,13}, {   1919,15}, {    511,14}, {   1023,13}, \
+    {   2175,14}, {   1151,13}, {   2431,14}, {   1279,13}, \
+    {   2687,14}, {   1407,15}, {    767,14}, {   1535,13}, \
+    {   3199,14}, {   1663,13}, {   3455,14}, {   1791,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
      { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
      {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 203
-#define SQR_FFT_THRESHOLD                 7552
+#define SQR_FFT_TABLE3_SIZE 151
+#define SQR_FFT_THRESHOLD                 4032
  
-#define MULLO_BASECASE_THRESHOLD            17
-#define MULLO_DC_THRESHOLD                  91
-#define MULLO_MUL_N_THRESHOLD            19187
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  62
+#define MULLO_MUL_N_THRESHOLD            12322
  
-#define DC_DIV_QR_THRESHOLD                 72
-#define DC_DIVAPPR_Q_THRESHOLD             254
-#define DC_BDIV_QR_THRESHOLD               117
-#define DC_BDIV_Q_THRESHOLD                292
+#define DC_DIV_QR_THRESHOLD                 55
+#define DC_DIVAPPR_Q_THRESHOLD             220
+#define DC_BDIV_QR_THRESHOLD                92
+#define DC_BDIV_Q_THRESHOLD                252
  
-#define INV_MULMOD_BNM1_THRESHOLD           86
-#define INV_NEWTON_THRESHOLD               178
-#define INV_APPR_THRESHOLD                 179
+#define INV_MULMOD_BNM1_THRESHOLD           70
+#define INV_NEWTON_THRESHOLD               156
+#define INV_APPR_THRESHOLD                 154
  
-#define BINV_NEWTON_THRESHOLD              300
-#define REDC_1_TO_REDC_2_THRESHOLD           2
-#define REDC_2_TO_REDC_N_THRESHOLD         167
+#define BINV_NEWTON_THRESHOLD              248
+#define REDC_1_TO_REDC_2_THRESHOLD           0  /* always */
+#define REDC_2_TO_REDC_N_THRESHOLD         149
  
-#define MU_DIV_QR_THRESHOLD               1787
-#define MU_DIVAPPR_Q_THRESHOLD            1470
+#define MU_DIV_QR_THRESHOLD               1142
+#define MU_DIVAPPR_Q_THRESHOLD            1142
  #define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD              1787
-#define MU_BDIV_Q_THRESHOLD               2089
-
-#define MATRIX22_STRASSEN_THRESHOLD         27
-#define HGCD_THRESHOLD                     139
-#define GCD_DC_THRESHOLD                   469
-#define GCDEXT_DC_THRESHOLD                496
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        22
-#define SET_STR_DC_THRESHOLD              1474
-#define SET_STR_PRECOMPUTE_THRESHOLD      3495
+#define MU_BDIV_QR_THRESHOLD              1142
+#define MU_BDIV_Q_THRESHOLD               1470
+
+#define POWM_SEC_TABLE  2,29,298,1897
+
+#define MATRIX22_STRASSEN_THRESHOLD         19
+#define HGCD_THRESHOLD                     115
+#define HGCD_APPR_THRESHOLD                181
+#define HGCD_REDUCE_THRESHOLD             3014
+#define GCD_DC_THRESHOLD                   555
+#define GCDEXT_DC_THRESHOLD                368
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        21
+#define SET_STR_DC_THRESHOLD              1216
+#define SET_STR_PRECOMPUTE_THRESHOLD      3170
+
+#define FAC_DSC_THRESHOLD                  746
+#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/mpn/ia64/hamdist.asm b/mpn/ia64/hamdist.asm

index 92dffcebecadcf6d1c4d959b0cdc81dc96f58424..e838800ceebe930162aaa3ab6dd26afb3fc67c6e 100644 (file)
--- a/mpn/ia64/hamdist.asm
+++ b/mpn/ia64/hamdist.asm
@@ -1,7 +1,9 @@
  dnl  IA-64 mpn_hamdist -- mpn hamming distance.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
-dnl
+
  dnl  This file is part of the GNU MP Library.
  
  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/mpn/ia64/ia64-defs.m4 b/mpn/ia64/ia64-defs.m4

index 2a8b5cffb290cf16ede07fb1c6adcf7bbbb7969b..419adc437baf04ff030f89c082f06f5b4f869e3c 100644 (file)
--- a/mpn/ia64/ia64-defs.m4
+++ b/mpn/ia64/ia64-defs.m4
@@ -120,5 +120,17 @@ define(`ASSERT_label_counter',eval(ASSERT_label_counter+1))
  ')')
  define(`ASSERT_label_counter',1)
  
+define(`getfsig', `getf.sig')
+define(`setfsig', `setf.sig')
+define(`cmpeq',   `cmp.eq')
+define(`cmpne',   `cmp.ne')
+define(`cmpltu',  `cmp.ltu')
+define(`cmpleu',  `cmp.leu')
+define(`cmpgtu',  `cmp.gtu')
+define(`cmpgeu',  `cmp.geu')
+define(`cmple',   `cmp.le')
+define(`cmpgt',   `cmp.gt')
+define(`cmpeqor', `cmp.eq.or')
+define(`cmpequc', `cmp.eq.unc')
  
  divert
diff --git a/mpn/ia64/invert_limb.asm b/mpn/ia64/invert_limb.asm

index ca987ba0d819801ae9e18acf6d7390523c4953c7..976a89c3dd8bed948421bdabe16fdcc7ba9e9b48 100644 (file)
--- a/mpn/ia64/invert_limb.asm
+++ b/mpn/ia64/invert_limb.asm
@@ -1,5 +1,7 @@
  dnl  IA-64 mpn_invert_limb -- Invert a normalized limb.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund and Kevin Ryde.
+
  dnl  Copyright 2000, 2002, 2004 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
diff --git a/mpn/ia64/logops_n.asm b/mpn/ia64/logops_n.asm

index 3ab9d2518b5aa52052a151f21b5c6ebc03626209..5e69f3ba78ac29babe0befc26c3d3781f38ecea6 100644 (file)
--- a/mpn/ia64/logops_n.asm
+++ b/mpn/ia64/logops_n.asm
@@ -1,6 +1,8 @@
  dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
  dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
diff --git a/mpn/ia64/lorrshift.asm b/mpn/ia64/lorrshift.asm

index 59badebc6af96750ecf817191f3ab0ff80ce468d..80167acc3170a6050678e2ddc23a55f774d3267e 100644 (file)
--- a/mpn/ia64/lorrshift.asm
+++ b/mpn/ia64/lorrshift.asm
@@ -1,5 +1,7 @@
  dnl  IA-64 mpn_lshift/mpn_rshift.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
  dnl  Inc.
  
@@ -21,20 +23,25 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C Itanium:      2.0
-C Itanium 2:    1.0
+C Itanium:      2
+C Itanium 2:    1
  
  C This code is scheduled deeply since the plain shift instructions shr and shl
  C have a latency of 4 (on Itanium) or 3 (on Itanium 2).  Poor scheduling of
  C these instructions cause a 10 cycle replay trap on Itanium.
  
-C TODO
-C  * Optimize function entry and feed-in code.
+C The ld8 scheduling should probably be decreased to make the function smaller.
+C Good lfetch  will make sure we never stall anyway.
+
+C We should actually issue the first ld8 at cycle 0, and the first BSH/FSH pair
+C at cycle 2.  Judicious use of predicates could allow us to issue more ld8's
+C in the prologue.
+
  
  C INPUT PARAMETERS
-define(`rp',`r32')
-define(`up',`r33')
-define(`n',`r34')
+define(`rp', `r32')
+define(`up', `r33')
+define(`n',  `r34')
  define(`cnt',`r35')
  
  define(`tnc',`r9')
@@ -61,284 +68,279 @@ MULFUNC_PROLOGUE(mpn_lshift mpn_rshift)
  ASM_START()
  PROLOGUE(func)
         .prologue
-       .save           ar.lc, r2
+       .save   ar.lc, r2
         .body
  ifdef(`HAVE_ABI_32',
-`      addp4           rp = 0, rp              C                       M I
-       addp4           up = 0, up              C                       M I
-       sxt4            n = n                   C                       M I
-       zxt4            cnt = cnt               C                       I
+`      addp4   rp = 0, rp              C                       M I
+       addp4   up = 0, up              C               M I
+       sxt4    n = n                   C               M I
+       zxt4    cnt = cnt               C               I
         ;;
  ')
  
- {.mmi;        cmp.lt          p14, p15 = 4, n         C                       M I
-       and             r14 = 3, n              C                       M I
-       mov.i           r2 = ar.lc              C                       I0
-}{.mmi;        add             r15 = -1, n             C                       M I
-       sub             tnc = 64, cnt           C                       M I
-       add             r16 = -5, n
-       ;;
-}{.mmi;        cmp.eq          p6, p0 = 1, r14         C                       M I
-       cmp.eq          p7, p0 = 2, r14         C                       M I
-       shr.u           n = r16, 2              C                       I0
-}{.mmi;        cmp.eq          p8, p0 = 3, r14         C                       M I
+ {.mmi;        cmp.lt  p14, p15 = 4, n         C               M I
+       and     r14 = 3, n              C               M I
+       mov.i   r2 = ar.lc              C               I0
+}{.mmi;        add     r15 = -1, n             C               M I
+       sub     tnc = 64, cnt           C               M I
+       add     r16 = -5, n
+       ;;
+}{.mmi;        cmp.eq  p6, p0 = 1, r14         C               M I
+       cmp.eq  p7, p0 = 2, r14         C               M I
+       shr.u   n = r16, 2              C               I0
+}{.mmi;        cmp.eq  p8, p0 = 3, r14         C               M I
  ifdef(`OPERATION_lshift',
-`      shladd          up = r15, 3, up         C                       M I
-       shladd          rp = r15, 3, rp')       C                       M I
+`      shladd  up = r15, 3, up         C               M I
+       shladd  rp = r15, 3, rp')       C               M I
         ;;
-}{.mmi;        add             r11 = POFF, up          C                       M I
-       ld8             r10 = [up], UPD         C                       M01
-       mov.i           ar.lc = n               C                       I0
+}{.mmi;        add     r11 = POFF, up          C               M I
+       ld8     r10 = [up], UPD         C               M01
+       mov.i   ar.lc = n               C               I0
  }{.bbb;
-   (p6)        br.dptk         .Lb01
-   (p7)        br.dptk         .Lb10
-   (p8)        br.dptk         .Lb11
-       ;;
-}
+   (p6)        br.dptk .Lb01
+   (p7)        br.dptk .Lb10
+   (p8)        br.dptk .Lb11
+       ;; }
  
-.Lb00: ld8             r19 = [up], UPD
+.Lb00: ld8     r19 = [up], UPD
+       ;;
+       ld8     r16 = [up], UPD
         ;;
-       ld8             r16 = [up], UPD
+       ld8     r17 = [up], UPD
+       BSH     r8 = r10, tnc           C function return value
         ;;
-       ld8             r17 = [up], UPD
-       BSH             r8 = r10, tnc           C function return value
+       FSH     r24 = r10, cnt
+       BSH     r25 = r19, tnc
    (p14)        br.cond.dptk    .grt4
-
-       FSH             r24 = r10, cnt
-       BSH             r25 = r19, tnc
         ;;
-       FSH             r26 = r19, cnt
-       BSH             r27 = r16, tnc
+       FSH     r26 = r19, cnt
+       BSH     r27 = r16, tnc
         ;;
-       FSH             r20 = r16, cnt
-       BSH             r21 = r17, tnc
+       FSH     r20 = r16, cnt
+       BSH     r21 = r17, tnc
         ;;
-       or              r14 = r25, r24
-       FSH             r22 = r17, cnt
-       BSH             r23 = r10, tnc
-       br              .Lr4
+       or      r14 = r25, r24
+       FSH     r22 = r17, cnt
+       BSH     r23 = r10, tnc
+       br      .Lr4
  
-.grt4: FSH             r24 = r10, cnt
-       BSH             r25 = r19, tnc
-       ;;
-       ld8             r18 = [up], UPD
-       FSH             r26 = r19, cnt
-       BSH             r27 = r16, tnc
+.grt4: ld8     r18 = [up], UPD
+       FSH     r26 = r19, cnt
+       BSH     r27 = r16, tnc
         ;;
-       ld8             r19 = [up], UPD
-       FSH             r20 = r16, cnt
-       BSH             r21 = r17, tnc
+       ld8     r19 = [up], UPD
+       FSH     r20 = r16, cnt
+       BSH     r21 = r17, tnc
         ;;
-       ld8             r16 = [up], UPD
-       FSH             r22 = r17, cnt
-       BSH             r23 = r18, tnc
+       ld8     r16 = [up], UPD
+       FSH     r22 = r17, cnt
+       BSH     r23 = r18, tnc
         ;;
-       or              r14 = r25, r24
-       ld8             r17 = [up], UPD
+       or      r14 = r25, r24
+       ld8     r17 = [up], UPD
         br.cloop.dpnt   .Ltop
-       br              .Lbot
+       br      .Lbot
  
  .Lb01:
-  (p15)        BSH             r8 = r10, tnc           C function return value I
-  (p15)        FSH             r22 = r10, cnt          C                       I
-  (p15)        br.cond.dptk    .Lr1                    C return                B
+  (p15)        BSH     r8 = r10, tnc           C function return value I
+  (p15)        FSH     r22 = r10, cnt          C               I
+  (p15)        br.cond.dptk    .Lr1            C return        B
  
-.grt1: ld8             r18 = [up], UPD
+.grt1: ld8     r18 = [up], UPD
         ;;
-       ld8             r19 = [up], UPD
-       BSH             r8 = r10, tnc           C function return value
+       ld8     r19 = [up], UPD
+       BSH     r8 = r10, tnc           C function return value
         ;;
-       ld8             r16 = [up], UPD
-       FSH             r22 = r10, cnt
-       BSH             r23 = r18, tnc
+       ld8     r16 = [up], UPD
+       FSH     r22 = r10, cnt
+       BSH     r23 = r18, tnc
         ;;
-       ld8             r17 = [up], UPD
+       ld8     r17 = [up], UPD
+       FSH     r24 = r18, cnt
+       BSH     r25 = r19, tnc
         br.cloop.dpnt   .grt5
         ;;
-
-       FSH             r24 = r18, cnt
-       BSH             r25 = r19, tnc
+       or      r15 = r23, r22
+       FSH     r26 = r19, cnt
+       BSH     r27 = r16, tnc
         ;;
-       or              r15 = r23, r22
-       FSH             r26 = r19, cnt
-       BSH             r27 = r16, tnc
-       ;;
-       FSH             r20 = r16, cnt
-       BSH             r21 = r17, tnc
-       br              .Lr5
+       FSH     r20 = r16, cnt
+       BSH     r21 = r17, tnc
+       br      .Lr5
  
-.grt5: FSH             r24 = r18, cnt
-       BSH             r25 = r19, tnc
-       ;;
-       ld8             r18 = [up], UPD
-       FSH             r26 = r19, cnt
-       BSH             r27 = r16, tnc
+.grt5: ld8     r18 = [up], UPD
+       FSH     r26 = r19, cnt
+       BSH     r27 = r16, tnc
         ;;
-       ld8             r19 = [up], UPD
-       FSH             r20 = r16, cnt
-       BSH             r21 = r17, tnc
+       ld8     r19 = [up], UPD
+       FSH     r20 = r16, cnt
+       BSH     r21 = r17, tnc
         ;;
-       or              r15 = r23, r22
-       ld8             r16 = [up], UPD
-       br              .LL01
+       or      r15 = r23, r22
+       ld8     r16 = [up], UPD
+       br      .LL01
  
  
-.Lb10: ld8             r17 = [up], UPD
+.Lb10: ld8     r17 = [up], UPD
    (p14)        br.cond.dptk    .grt2
  
-       BSH             r8 = r10, tnc           C function return value
+       BSH     r8 = r10, tnc           C function return value
         ;;
-       FSH             r20 = r10, cnt
-       BSH             r21 = r17, tnc
+       FSH     r20 = r10, cnt
+       BSH     r21 = r17, tnc
         ;;
-       or              r14 = r21, r20
-       FSH             r22 = r17, cnt
-       br              .Lr2                    C return
+       or      r14 = r21, r20
+       FSH     r22 = r17, cnt
+       br      .Lr2                    C return
  
-.grt2: ld8             r18 = [up], UPD
-       BSH             r8 = r10, tnc           C function return value
+.grt2: ld8     r18 = [up], UPD
+       BSH     r8 = r10, tnc           C function return value
         ;;
-       ld8             r19 = [up], UPD
-       FSH             r20 = r10, cnt
-       BSH             r21 = r17, tnc
+       ld8     r19 = [up], UPD
+       FSH     r20 = r10, cnt
+       BSH     r21 = r17, tnc
         ;;
-       ld8             r16 = [up], UPD
-       FSH             r22 = r17, cnt
-       BSH             r23 = r18, tnc
+       ld8     r16 = [up], UPD
+       FSH     r22 = r17, cnt
+       BSH     r23 = r18, tnc
         ;;
-       ld8             r17 = [up], UPD
+ {.mmi;        ld8     r17 = [up], UPD
+       or      r14 = r21, r20
+       FSH     r24 = r18, cnt
+}{.mib;        nop     0
+       BSH     r25 = r19, tnc
         br.cloop.dpnt   .grt6
-       ;;
+       ;; }
  
-       or              r14 = r21, r20
-       FSH             r24 = r18, cnt
-       BSH             r25 = r19, tnc
-       ;;
-       FSH             r26 = r19, cnt
-       BSH             r27 = r16, tnc
-       br              .Lr6
+       FSH     r26 = r19, cnt
+       BSH     r27 = r16, tnc
+       br      .Lr6
  
-.grt6: or              r14 = r21, r20
-       FSH             r24 = r18, cnt
-       BSH             r25 = r19, tnc
+.grt6: ld8     r18 = [up], UPD
+       FSH     r26 = r19, cnt
+       BSH     r27 = r16, tnc
         ;;
-       ld8             r18 = [up], UPD
-       FSH             r26 = r19, cnt
-       BSH             r27 = r16, tnc
-       ;;
-       ld8             r19 = [up], UPD
-       br              .LL10
+       ld8     r19 = [up], UPD
+       br      .LL10
  
  
-.Lb11: ld8             r16 = [up], UPD
+.Lb11: ld8     r16 = [up], UPD
         ;;
-       ld8             r17 = [up], UPD
-       BSH             r8 = r10, tnc           C function return value
+       ld8     r17 = [up], UPD
+       BSH     r8 = r10, tnc           C function return value
    (p14)        br.cond.dptk    .grt3
         ;;
  
-       FSH             r26 = r10, cnt
-       BSH             r27 = r16, tnc
+       FSH     r26 = r10, cnt
+       BSH     r27 = r16, tnc
         ;;
-       FSH             r20 = r16, cnt
-       BSH             r21 = r17, tnc
+       FSH     r20 = r16, cnt
+       BSH     r21 = r17, tnc
         ;;
-       or              r15 = r27, r26
-       FSH             r22 = r17, cnt
-       br              .Lr3                    C return
+       or      r15 = r27, r26
+       FSH     r22 = r17, cnt
+       br      .Lr3                    C return
  
-.grt3: ld8             r18 = [up], UPD
-       FSH             r26 = r10, cnt
-       BSH             r27 = r16, tnc
+.grt3: ld8     r18 = [up], UPD
+       FSH     r26 = r10, cnt
+       BSH     r27 = r16, tnc
         ;;
-       ld8             r19 = [up], UPD
-       FSH             r20 = r16, cnt
-       BSH             r21 = r17, tnc
+       ld8     r19 = [up], UPD
+       FSH     r20 = r16, cnt
+       BSH     r21 = r17, tnc
         ;;
-       ld8             r16 = [up], UPD
-       FSH             r22 = r17, cnt
-       BSH             r23 = r18, tnc
+       ld8     r16 = [up], UPD
+       FSH     r22 = r17, cnt
+       BSH     r23 = r18, tnc
         ;;
-       ld8             r17 = [up], UPD
+       ld8     r17 = [up], UPD
         br.cloop.dpnt   .grt7
  
-       or              r15 = r27, r26
-       FSH             r24 = r18, cnt
-       BSH             r25 = r19, tnc
-       br              .Lr7
+       or      r15 = r27, r26
+       FSH     r24 = r18, cnt
+       BSH     r25 = r19, tnc
+       br      .Lr7
  
-.grt7: or              r15 = r27, r26
-       FSH             r24 = r18, cnt
-       BSH             r25 = r19, tnc
-       ld8             r18 = [up], UPD
-       br              .LL11
+.grt7: or      r15 = r27, r26
+       FSH     r24 = r18, cnt
+       BSH     r25 = r19, tnc
+       ld8     r18 = [up], UPD
+       br      .LL11
  
  C *** MAIN LOOP START ***
         ALIGN(32)
  .Ltop:
- {.mmi;        st8             [rp] = r14, UPD         C M2
-       or              r15 = r27, r26          C M3
-       FSH             r24 = r18, cnt          C I0
-}{.mmi;        ld8             r18 = [up], UPD         C M1
-       lfetch          [r11], PUPD
-       BSH             r25 = r19, tnc          C I1
+ {.mmi;        st8     [rp] = r14, UPD         C M2
+       or      r15 = r27, r26          C M3
+       FSH     r24 = r18, cnt          C I0
+}{.mmi;        ld8     r18 = [up], UPD         C M1
+       lfetch  [r11], PUPD
+       BSH     r25 = r19, tnc          C I1
         ;; }
  .LL11:
- {.mmi;        st8             [rp] = r15, UPD
-       or              r14 = r21, r20
-       FSH             r26 = r19, cnt
-}{.mmi;        ld8             r19 = [up], UPD
-       nop.m           0
-       BSH             r27 = r16, tnc
+ {.mmi;        st8     [rp] = r15, UPD
+       or      r14 = r21, r20
+       FSH     r26 = r19, cnt
+}{.mmi;        ld8     r19 = [up], UPD
+       nop.m   0
+       BSH     r27 = r16, tnc
         ;; }
  .LL10:
- {.mmi;        st8             [rp] = r14, UPD
-       or              r15 = r23, r22
-       FSH             r20 = r16, cnt
-}{.mmi;        ld8             r16 = [up], UPD
-       nop.m           0
-       BSH             r21 = r17, tnc
+ {.mmi;        st8     [rp] = r14, UPD
+       or      r15 = r23, r22
+       FSH     r20 = r16, cnt
+}{.mmi;        ld8     r16 = [up], UPD
+       nop.m   0
+       BSH     r21 = r17, tnc
         ;; }
  .LL01:
- {.mmi;        st8             [rp] = r15, UPD
-       or              r14 = r25, r24
-       FSH             r22 = r17, cnt
-}{.mib;        ld8             r17 = [up], UPD
-       BSH             r23 = r18, tnc
+ {.mmi;        st8     [rp] = r15, UPD
+       or      r14 = r25, r24
+       FSH     r22 = r17, cnt
+}{.mib;        ld8     r17 = [up], UPD
+       BSH     r23 = r18, tnc
         br.cloop.dptk   .Ltop
         ;; }
-
  C *** MAIN LOOP END ***
  
-.Lbot: or              r15 = r27, r26
-       FSH             r24 = r18, cnt
-       BSH             r25 = r19, tnc
-       st8             [rp] = r14, UPD
-       ;;
-.Lr7:  or              r14 = r21, r20
-       FSH             r26 = r19, cnt
-       BSH             r27 = r16, tnc
-       st8             [rp] = r15, UPD
-       ;;
-.Lr6:  or              r15 = r23, r22
-       FSH             r20 = r16, cnt
-       BSH             r21 = r17, tnc
-       st8             [rp] = r14, UPD
-       ;;
-.Lr5:  st8             [rp] = r15, UPD
-       or              r14 = r25, r24
-       FSH             r22 = r17, cnt
+.Lbot:
+ {.mmi;        st8     [rp] = r14, UPD
+       or      r15 = r27, r26
+       FSH     r24 = r18, cnt
+}{.mib;        nop     0
+       BSH     r25 = r19, tnc
+       nop     0
+       ;; }
+.Lr7:
+ {.mmi;        st8     [rp] = r15, UPD
+       or      r14 = r21, r20
+       FSH     r26 = r19, cnt
+}{.mib;        nop     0
+       BSH     r27 = r16, tnc
+       nop     0
+       ;; }
+.Lr6:
+ {.mmi;        st8     [rp] = r14, UPD
+       or      r15 = r23, r22
+       FSH     r20 = r16, cnt
+}{.mib;        nop     0
+       BSH     r21 = r17, tnc
+       nop     0
+       ;; }
+.Lr5:  st8     [rp] = r15, UPD
+       or      r14 = r25, r24
+       FSH     r22 = r17, cnt
         ;;
-.Lr4:  or              r15 = r27, r26
-       st8             [rp] = r14, UPD
+.Lr4:  st8     [rp] = r14, UPD
+       or      r15 = r27, r26
         ;;
-.Lr3:  or              r14 = r21, r20
-       st8             [rp] = r15, UPD
+.Lr3:  st8     [rp] = r15, UPD
+       or      r14 = r21, r20
         ;;
-.Lr2:  st8             [rp] = r14, UPD
+.Lr2:  st8     [rp] = r14, UPD
         ;;
-.Lr1:  st8             [rp] = r22, UPD         C                       M23
-       mov             ar.lc = r2              C                       I0
-       br.ret.sptk.many b0                     C                       B
+.Lr1:  st8     [rp] = r22, UPD         C               M23
+       mov     ar.lc = r2              C               I0
+       br.ret.sptk.many b0             C               B
  EPILOGUE(func)
  ASM_END()
diff --git a/mpn/ia64/lshiftc.asm b/mpn/ia64/lshiftc.asm

new file mode 100644 (file)

index 0000000..2cf1900
--- /dev/null
+++ b/mpn/ia64/lshiftc.asm
@@ -0,0 +1,451 @@
+dnl  IA-64 mpn_lshiftc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2010 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      ?
+C Itanium 2:    1.25
+
+C This code is scheduled deeply since the plain shift instructions shr and shl
+C have a latency of 4 (on Itanium) or 3 (on Itanium 2).  Poor scheduling of
+C these instructions cause a 10 cycle replay trap on Itanium.
+
+C The ld8 scheduling should probably be decreased to make the function smaller.
+C Good lfetch  will make sure we never stall anyway.
+
+C We should actually issue the first ld8 at cycle 0, and the first BSH/FSH pair
+C at cycle 2.  Judicious use of predicates could allow us to issue more ld8's
+C in the prologue.
+
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`n',  `r34')
+define(`cnt',`r35')
+
+define(`tnc',`r9')
+
+define(`FSH',`shl')
+define(`BSH',`shr.u')
+define(`UPD',`-8')
+define(`POFF',`-512')
+define(`PUPD',`-32')
+define(`func',`mpn_lshiftc')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',
+`      addp4   rp = 0, rp              C                               M I
+       addp4   up = 0, up              C                               M I
+       sxt4    n = n                   C                               M I
+       zxt4    cnt = cnt               C                               I
+       ;;
+')
+
+ {.mmi;        nop     0                       C                               M I
+       and     r14 = 3, n              C                               M I
+       mov.i   r2 = ar.lc              C                               I0
+}{.mmi;        add     r15 = -1, n             C                               M I
+       sub     tnc = 64, cnt           C                               M I
+       nop     0
+       ;;
+}{.mmi;        cmp.eq  p6, p0 = 1, r14         C                               M I
+       cmp.eq  p7, p0 = 2, r14         C                               M I
+       shr.u   n = r15, 2              C                               I0
+}{.mmi;        cmp.eq  p8, p0 = 3, r14         C                               M I
+       shladd  up = r15, 3, up         C                               M I
+       shladd  rp = r15, 3, rp         C                               M I
+       ;;
+}{.mmi;        add     r11 = POFF, up          C                               M I
+       ld8     r10 = [up], UPD         C                               M01
+       mov.i   ar.lc = n               C                               I0
+}{.bbb;
+   (p6)        br.dptk .Lb01
+   (p7)        br.dptk .Lb10
+   (p8)        br.dptk .Lb11
+       ;; }
+
+.Lb00:
+       ld8     r19 = [up], UPD
+       ;;
+       ld8     r16 = [up], UPD
+       ;;
+       ld8     r17 = [up], UPD
+       BSH     r8 = r10, tnc
+       br.cloop.dptk   L(gt4)
+       ;;
+       FSH     r24 = r10, cnt
+       BSH     r25 = r19, tnc
+       ;;
+       FSH     r26 = r19, cnt
+       BSH     r27 = r16, tnc
+       ;;
+       FSH     r20 = r16, cnt
+       BSH     r21 = r17, tnc
+       ;;
+       or      r14 = r25, r24
+       FSH     r22 = r17, cnt
+       ;;
+       or      r15 = r27, r26
+       sub     r31 = -1, r14
+       br      .Lr4
+
+L(gt4):
+ {.mmi;        nop     0
+       nop     0
+       FSH     r24 = r10, cnt
+}{.mmi;        ld8     r18 = [up], UPD
+       nop     0
+       BSH     r25 = r19, tnc
+       ;; }
+ {.mmi;        nop     0
+       nop     0
+       FSH     r26 = r19, cnt
+}{.mmi;        ld8     r19 = [up], UPD
+       nop     0
+       BSH     r27 = r16, tnc
+       ;; }
+ {.mmi;        nop     0
+       nop     0
+       FSH     r20 = r16, cnt
+}{.mmi;        ld8     r16 = [up], UPD
+       nop     0
+       BSH     r21 = r17, tnc
+       ;; }
+ {.mmi;        nop     0
+       or      r14 = r25, r24
+       FSH     r22 = r17, cnt
+}{.mib;        ld8     r17 = [up], UPD
+       BSH     r23 = r18, tnc
+       br.cloop.dptk   L(gt8)
+       ;; }
+ {.mmi;        nop     0
+       or      r15 = r27, r26
+       FSH     r24 = r18, cnt
+}{.mib;        sub     r31 = -1, r14
+       BSH     r25 = r19, tnc
+       br      .Lr8 }
+
+L(gt8):
+       or      r15 = r27, r26
+       FSH     r24 = r18, cnt
+       ld8     r18 = [up], UPD
+       sub     r31 = -1, r14
+       BSH     r25 = r19, tnc
+       br      .LL00
+
+.Lb01:
+       br.cloop.dptk   L(gt1)
+       ;;
+       BSH     r8 = r10, tnc
+       FSH     r22 = r10, cnt
+       ;;
+       sub     r31 = -1, r22
+       br      .Lr1
+       ;;
+L(gt1):
+       ld8     r18 = [up], UPD
+       BSH     r8 = r10, tnc
+       FSH     r22 = r10, cnt
+       ;;
+       ld8     r19 = [up], UPD
+       ;;
+       ld8     r16 = [up], UPD
+       ;;
+       ld8     r17 = [up], UPD
+       BSH     r23 = r18, tnc
+       br.cloop.dptk   L(gt5)
+       ;;
+       nop     0
+       FSH     r24 = r18, cnt
+       BSH     r25 = r19, tnc
+       ;;
+       nop     0
+       FSH     r26 = r19, cnt
+       BSH     r27 = r16, tnc
+       ;;
+       or      r15 = r23, r22
+       FSH     r20 = r16, cnt
+       BSH     r21 = r17, tnc
+       ;;
+       or      r14 = r25, r24
+       FSH     r22 = r17, cnt
+       sub     r31 = -1, r15
+       br      .Lr5
+
+L(gt5):
+ {.mmi;        nop     0
+       nop     0
+       FSH     r24 = r18, cnt
+}{.mmi;        ld8     r18 = [up], UPD
+       nop     0
+       BSH     r25 = r19, tnc
+       ;; }
+ {.mmi;        nop     0
+       nop     0
+       FSH     r26 = r19, cnt
+}{.mmi;        ld8     r19 = [up], UPD
+       nop     0
+       BSH     r27 = r16, tnc
+       ;; }
+ {.mmi;        nop     0
+       or      r15 = r23, r22
+       FSH     r20 = r16, cnt
+}{.mmi;        ld8     r16 = [up], UPD
+       nop     0
+       BSH     r21 = r17, tnc
+       ;; }
+ {.mmi;        or      r14 = r25, r24
+       sub     r31 = -1, r15
+       FSH     r22 = r17, cnt
+}{.mib;        ld8     r17 = [up], UPD
+       BSH     r23 = r18, tnc
+       br      L(end)
+       ;; }
+
+.Lb10:
+       ld8     r17 = [up], UPD
+       br.cloop.dptk   L(gt2)
+       ;;
+       BSH     r8 = r10, tnc
+       FSH     r20 = r10, cnt
+       ;;
+       BSH     r21 = r17, tnc
+       FSH     r22 = r17, cnt
+       ;;
+       or      r14 = r21, r20
+       ;;
+       sub     r31 = -1, r14
+       br      .Lr2
+       ;;
+L(gt2):
+       ld8     r18 = [up], UPD
+       BSH     r8 = r10, tnc
+       FSH     r20 = r10, cnt
+       ;;
+       ld8     r19 = [up], UPD
+       ;;
+       ld8     r16 = [up], UPD
+       BSH     r21 = r17, tnc
+       FSH     r22 = r17, cnt
+       ;;
+       ld8     r17 = [up], UPD
+       BSH     r23 = r18, tnc
+       br.cloop.dptk   L(gt6)
+       ;;
+       nop     0
+       FSH     r24 = r18, cnt
+       BSH     r25 = r19, tnc
+       ;;
+       or      r14 = r21, r20
+       FSH     r26 = r19, cnt
+       BSH     r27 = r16, tnc
+       ;;
+ {.mmi;        nop     0
+       or      r15 = r23, r22
+       FSH     r20 = r16, cnt
+}{.mib;        sub     r31 = -1, r14
+       BSH     r21 = r17, tnc
+       br      .Lr6
+       ;; }
+L(gt6):
+ {.mmi;        nop     0
+       nop     0
+       FSH     r24 = r18, cnt
+}{.mmi;        ld8     r18 = [up], UPD
+       nop     0
+       BSH     r25 = r19, tnc
+       ;; }
+ {.mmi; nop   0
+       or      r14 = r21, r20
+       FSH     r26 = r19, cnt
+}{.mmi;        ld8     r19 = [up], UPD
+       nop     0
+       BSH     r27 = r16, tnc
+       ;; }
+ {.mmi;        or      r15 = r23, r22
+       sub     r31 = -1, r14
+       FSH     r20 = r16, cnt
+}{.mib;        ld8     r16 = [up], UPD
+       BSH     r21 = r17, tnc
+       br      .LL10
+}
+
+.Lb11:
+       ld8     r16 = [up], UPD
+       ;;
+       ld8     r17 = [up], UPD
+       BSH     r8 = r10, tnc
+       FSH     r26 = r10, cnt
+       br.cloop.dptk   L(gt3)
+       ;;
+       BSH     r27 = r16, tnc
+       ;;
+       FSH     r20 = r16, cnt
+       BSH     r21 = r17, tnc
+       ;;
+       FSH     r22 = r17, cnt
+       ;;
+       or      r15 = r27, r26
+       ;;
+       or      r14 = r21, r20
+       sub     r31 = -1, r15
+       br      .Lr3
+       ;;
+L(gt3):
+       ld8     r18 = [up], UPD
+       ;;
+       ld8     r19 = [up], UPD
+       BSH     r27 = r16, tnc
+       ;;
+ {.mmi;        nop     0
+       nop     0
+       FSH     r20 = r16, cnt
+}{.mmi;        ld8     r16 = [up], UPD
+       nop     0
+       BSH     r21 = r17, tnc
+       ;; }
+ {.mmi nop     0
+       nop     0
+       FSH     r22 = r17, cnt
+}{.mib;        ld8     r17 = [up], UPD
+       BSH     r23 = r18, tnc
+       br.cloop.dptk   L(gt7)
+       ;; }
+       or      r15 = r27, r26
+       FSH     r24 = r18, cnt
+       BSH     r25 = r19, tnc
+       ;;
+ {.mmi;        nop     0
+       or      r14 = r21, r20
+       FSH     r26 = r19, cnt
+}{.mib;        sub     r31 = -1, r15
+       BSH     r27 = r16, tnc
+       br      .Lr7
+}
+L(gt7):
+ {.mmi;        nop     0
+       or      r15 = r27, r26
+       FSH     r24 = r18, cnt
+}{.mmi;        ld8     r18 = [up], UPD
+       nop     0
+       BSH     r25 = r19, tnc
+       ;; }
+ {.mmi;        or      r14 = r21, r20
+       sub     r31 = -1, r15
+       FSH     r26 = r19, cnt
+}{.mib;        ld8     r19 = [up], UPD
+       BSH     r27 = r16, tnc
+       br      .LL11
+}
+
+C *** MAIN LOOP START ***
+       ALIGN(32)
+L(top):
+.LL01:
+ {.mmi;        st8     [rp] = r31, UPD         C M2
+       or      r15 = r27, r26          C M3
+       FSH     r24 = r18, cnt          C I0
+}{.mmi;        ld8     r18 = [up], UPD         C M0
+       sub     r31 = -1, r14           C M1
+       BSH     r25 = r19, tnc          C I1
+       ;; }
+.LL00:
+ {.mmi;        st8     [rp] = r31, UPD
+       or      r14 = r21, r20
+       FSH     r26 = r19, cnt
+}{.mmi;        ld8     r19 = [up], UPD
+       sub     r31 = -1, r15
+       BSH     r27 = r16, tnc
+       ;; }
+.LL11:
+ {.mmi;        st8     [rp] = r31, UPD
+       or      r15 = r23, r22
+       FSH     r20 = r16, cnt
+}{.mmi;        ld8     r16 = [up], UPD
+       sub     r31 = -1, r14
+       BSH     r21 = r17, tnc
+       ;; }
+.LL10:
+ {.mmi;        st8     [rp] = r31, UPD
+       or      r14 = r25, r24
+       FSH     r22 = r17, cnt
+}{.mmi;        ld8     r17 = [up], UPD
+       sub     r31 = -1, r15
+       BSH     r23 = r18, tnc
+       ;; }
+L(end):        lfetch          [r11], PUPD
+       br.cloop.dptk   L(top)
+C *** MAIN LOOP END ***
+
+ {.mmi;        st8     [rp] = r31, UPD
+       or      r15 = r27, r26
+       FSH     r24 = r18, cnt
+}{.mib;        sub     r31 = -1, r14
+       BSH     r25 = r19, tnc
+       nop     0
+       ;; }
+.Lr8:
+ {.mmi;        st8     [rp] = r31, UPD
+       or      r14 = r21, r20
+       FSH     r26 = r19, cnt
+}{.mib;        sub     r31 = -1, r15
+       BSH     r27 = r16, tnc
+       nop     0
+       ;; }
+.Lr7:
+ {.mmi;        st8     [rp] = r31, UPD
+       or      r15 = r23, r22
+       FSH     r20 = r16, cnt
+}{.mib;        sub     r31 = -1, r14
+       BSH     r21 = r17, tnc
+       nop     0
+       ;; }
+.Lr6:  st8     [rp] = r31, UPD
+       or      r14 = r25, r24
+       FSH     r22 = r17, cnt
+       sub     r31 = -1, r15
+       ;;
+.Lr5:  st8     [rp] = r31, UPD
+       or      r15 = r27, r26
+       sub     r31 = -1, r14
+       ;;
+.Lr4:  st8     [rp] = r31, UPD
+       or      r14 = r21, r20
+       sub     r31 = -1, r15
+       ;;
+.Lr3:  st8     [rp] = r31, UPD
+       sub     r31 = -1, r14
+       ;;
+.Lr2:  st8     [rp] = r31, UPD
+       sub     r31 = -1, r22
+       ;;
+.Lr1:  st8     [rp] = r31, UPD         C                               M23
+       mov     ar.lc = r2              C                               I0
+       br.ret.sptk.many b0             C                               B
+EPILOGUE(func)
+ASM_END()
diff --git a/mpn/ia64/mod_34lsub1.asm b/mpn/ia64/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..9f181ca
--- /dev/null
+++ b/mpn/ia64/mod_34lsub1.asm
@@ -0,0 +1,224 @@
+dnl  IA-64 mpn_mod_34lsub1
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      ?
+C Itanium 2:    1
+
+
+C INPUT PARAMETERS
+define(`up', `r32')
+define(`n',  `r33')
+
+C Some useful aliases for registers we use
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16')
+define(`a0',`r17') define(`a1',`r18') define(`a2',`r19')
+define(`c0',`r20') define(`c1',`r21') define(`c2',`r22')
+
+C This is a fairly simple-minded implementation.  One could approach 0.67 c/l
+C with a more sophisticated implementation.  If we're really crazy, we could
+C super-unroll, storing carries just in predicate registers, then copy them to
+C a general register, and population count them from there.  That'd bring us
+C close to 3 insn/limb, for nearly 0.5 c/l.
+
+C Computing n/3 needs 16 cycles, which is a lot of startup overhead.
+C We therefore use a plain while-style loop:
+C      add             n = -3, n
+C      cmp.le          p9, p0 = 3, n
+C  (p9)        br.cond         .Loop
+C Alternatively, we could table n/3 for, say, n < 256, and predicate the
+C 16-cycle code.
+
+C The summing-up code at the end was written quickly, and could surely be
+C vastly improved.
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',`
+       addp4           up = 0, up              C                       M I
+       zxt4            n = n                   C                       I
+       ;;
+')
+
+ifelse(0,1,`
+       movl            r14 = 0xAAAAAAAAAAAAAAAB
+       ;;
+       setf.sig        f6 = r14
+       setf.sig        f7 = r33
+       ;;
+       xmpy.hu         f6 = f6, f7
+       ;;
+       getf.sig        r8 = f6
+       ;;
+       shr.u           r8 = r8, 1              C Loop count
+       ;;
+       mov.i           ar.lc = r8
+')
+
+       ld8     u0 = [up], 8
+       cmp.ne  p9, p0 = 1, n
+  (p9) br      L(gt1)
+       ;;
+       shr.u   r8 = u0, 48
+       dep.z   r27 = u0, 0, 48
+       ;;
+       add     r8 = r8, r27
+       br.ret.sptk.many b0
+
+
+L(gt1):
+.mmi;  nop.m   0
+       mov     a0 = 0
+       add     n = -2, n
+.mmi;  mov     c0 = 0
+       mov     c1 = 0
+       mov     c2 = 0
+       ;;
+.mmi;  ld8     u1 = [up], 8
+       mov     a1 = 0
+       cmp.ltu p6, p0 = r0, r0         C clear p6
+.mmb;  cmp.gt  p9, p0 = 3, n
+       mov     a2 = 0
+  (p9) br.cond.dptk    L(end)
+       ;;
+
+       ALIGN(32)
+L(top):
+.mmi;  ld8     u2 = [up], 8
+  (p6) add     c0 = 1, c0
+       cmp.ltu p7, p0 = a0, u0
+.mmb;  sub     a0 = a0, u0
+       add     n = -3, n
+       nop.b   0
+       ;;
+.mmi;  ld8     u0 = [up], 8
+  (p7) add     c1 = 1, c1
+       cmp.ltu p8, p0 = a1, u1
+.mmb;  sub     a1 = a1, u1
+       cmp.le  p9, p0 = 3, n
+       nop.b   0
+       ;;
+.mmi;  ld8     u1 = [up], 8
+  (p8) add     c2 = 1, c2
+       cmp.ltu p6, p0 = a2, u2
+.mmb;  sub     a2 = a2, u2
+       nop.m   0
+dnl    br.cloop.dptk   L(top)
+  (p9) br.cond.dptk    L(top)
+       ;;
+
+L(end):
+       cmp.eq  p10, p0 = 0, n
+       cmp.eq  p11, p0 = 1, n
+  (p10)        br      L(0)
+
+L(2):
+.mmi;  ld8     u2 = [up], 8
+  (p6) add     c0 = 1, c0
+       cmp.ltu p7, p0 = a0, u0
+.mmb;  sub     a0 = a0, u0
+       nop.m   0
+  (p11)        br      L(1)
+       ;;
+       ld8     u0 = [up], 8
+  (p7) add     c1 = 1, c1
+       cmp.ltu p8, p0 = a1, u1
+       sub     a1 = a1, u1
+       ;;
+  (p8) add     c2 = 1, c2
+       cmp.ltu p6, p0 = a2, u2
+       sub     a2 = a2, u2
+       ;;
+  (p6) add     c0 = 1, c0
+       cmp.ltu p7, p0 = a0, u0
+       sub     a0 = a0, u0
+       ;;
+  (p7) add     c1 = 1, c1
+       br      L(com)
+
+
+L(1):
+  (p7) add     c1 = 1, c1
+       cmp.ltu p8, p0 = a1, u1
+       sub     a1 = a1, u1
+       ;;
+  (p8) add     c2 = 1, c2
+       cmp.ltu p6, p0 = a2, u2
+       sub     a2 = a2, u2
+       ;;
+  (p6) add     c0 = 1, c0
+       br      L(com)
+
+
+L(0):
+  (p6) add     c0 = 1, c0
+       cmp.ltu p7, p0 = a0, u0
+       sub     a0 = a0, u0
+       ;;
+  (p7) add     c1 = 1, c1
+       cmp.ltu p8, p0 = a1, u1
+       sub     a1 = a1, u1
+       ;;
+  (p8) add     c2 = 1, c2
+
+L(com):
+C |     a2    |     a1    |     a0    |
+C |        |        |        |        |
+       shr.u   r24 = a0, 48            C 16 bits
+       shr.u   r25 = a1, 32            C 32 bits
+       shr.u   r26 = a2, 16            C 48 bits
+       ;;
+       shr.u   r10 = c0, 48            C 16 bits, always zero
+       shr.u   r11 = c1, 32            C 32 bits
+       shr.u   r30 = c2, 16            C 48 bits
+       ;;
+       dep.z   r27 = a0,  0, 48        C 48 bits
+       dep.z   r28 = a1, 16, 32        C 48 bits
+       dep.z   r29 = a2, 32, 16        C 48 bits
+       dep.z   r31 = c0,  0, 48        C 48 bits
+       dep.z   r14 = c1, 16, 32        C 48 bits
+       dep.z   r15 = c2, 32, 16        C 48 bits
+       ;;
+.mmi;  add     r24 = r24, r25
+       add     r26 = r26, r27
+       add     r28 = r28, r29
+.mmi;  add     r10 = r10, r11
+       add     r30 = r30, r31
+       add     r14 = r14, r15
+       ;;
+       movl    r8 = 0xffffffffffff0
+       add     r24 = r24, r26
+       add     r10 = r10, r30
+       ;;
+       add     r24 = r24, r28
+       add     r10 = r10, r14
+       ;;
+       sub     r8 = r8, r24
+       ;;
+       add     r8 = r8, r10
+       br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
diff --git a/mpn/ia64/mode1o.asm b/mpn/ia64/mode1o.asm

index 6b3626ebe6e80a1b780269323066c78ccf65868f..16ca97368a04f6b332b87a1c8d530da8a2d4ce08 100644 (file)
--- a/mpn/ia64/mode1o.asm
+++ b/mpn/ia64/mode1o.asm
@@ -1,5 +1,7 @@
  dnl  Itanium-2 mpn_modexact_1c_odd -- mpn by 1 exact remainder.
  
+dnl  Contributed to the GNU project by Kevin Ryde.
+
  dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
diff --git a/mpn/ia64/mul_1.asm b/mpn/ia64/mul_1.asm

index 8df8d93f8e3e48154847d5b7418d66afd8412392..f020ae1ea16c3271351250ba26df1706f3eac984 100644 (file)
--- a/mpn/ia64/mul_1.asm
+++ b/mpn/ia64/mul_1.asm
@@ -1,6 +1,8 @@
  dnl  IA-64 mpn_mul_1, mpn_mul_1c -- Multiply a limb vector with a limb and
  dnl  store the result in a second limb vector.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free Software
  dnl  Foundation, Inc.
  
@@ -541,7 +543,6 @@ C *** MAIN LOOP END ***
     (p6)        cmp.leu         p8, p9 = r24, r17
     (p7)        cmp.ltu         p8, p9 = r24, r17
         ;;
-       .pred.rel "mutex",p8,p9
     (p8)        add             r8 = 1, r8
         mov.i           ar.lc = r2
         br.ret.sptk.many b0
diff --git a/mpn/ia64/mul_2.asm b/mpn/ia64/mul_2.asm

index b0d4ef70a15bea8b65408357731ed1862b8ac22f..a0abdac02fe9d5d7b68e870db6ff5047f3fadcaf 100644 (file)
--- a/mpn/ia64/mul_2.asm
+++ b/mpn/ia64/mul_2.asm
@@ -1,7 +1,9 @@
  dnl  IA-64 mpn_mul_2 -- Multiply a n-limb number with a 2-limb number and store
  dnl  store the result to a (n+1)-limb number.
  
-dnl  Copyright 2004 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2004, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,19 +23,14 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C         cycles/limb
-C Itanium:    3.15
-C Itanium 2:  1.625
-
-C Note that this is very similar to addmul_2.asm.  If you change this file,
-C please change that file too.
+C Itanium:    ?
+C Itanium 2:  1.5
  
  C TODO
  C  * Clean up variable names, and try to decrease the number of distinct
  C    registers used.
-C  * Cleanup feed-in code to not require zeroing several registers.
+C  * Clean up feed-in code to not require zeroing several registers.
  C  * Make sure we don't depend on uninitialized predicate registers.
-C  * We currently cross-jump very aggressively, at the expense of a few cycles
-C    per operation.  Consider changing that.
  C  * Could perhaps save a few cycles by using 1 c/l carry propagation in
  C    wind-down code.
  C  * Ultimately rewrite.  The problem with this code is that it first uses a
@@ -94,598 +91,519 @@ PROLOGUE(mpn_mul_2)
         .save   ar.lc, r2
         .body
  
-ifdef(`HAVE_ABI_32',
-`      addp4           rp = 0, rp              C                       M I
-       addp4           up = 0, up              C                       M I
-       addp4           vp = 0, vp              C                       M I
-       zxt4            n = n                   C                       I
+ifdef(`HAVE_ABI_32',`
+.mmi;          addp4   rp = 0, rp              C                       M I
+               addp4   up = 0, up              C                       M I
+               addp4   vp = 0, vp              C                       M I
+.mmi;          nop     1
+               nop     1
+               zxt4    n = n                   C                       I
         ;;')
  
-{.mmi          C 00
-       ldf8            ux = [up], 8            C                       M
-       ldf8            v0 = [vp], 8            C                       M
-       mov.i           r2 = ar.lc              C                       I0
-}{.mmi
-       nop             0                       C                       M
-       and             r14 = 3, n              C                       M I
-       add             n = -2, n               C                       M I
-       ;;
-}{.mmi         C 01
-       ldf8            uy = [up], 8            C                       M
-       ldf8            v1 = [vp]               C                       M
-       shr.u           n = n, 2                C                       I
-}{.mmi
-       nop             0                       C                       M
-       cmp.eq          p10, p0 = 1, r14        C                       M I
-       cmp.eq          p11, p0 = 2, r14        C                       M I
-       ;;
-}{.mmi         C 02
-       nop             0                       C                       M
-       cmp.eq          p12, p0 = 3, r14        C                       M I
-       mov.i           ar.lc = n               C                       I0
-}{.bbb
-  (p10) br.dptk                .Lb01                   C                       B
-  (p11) br.dptk                .Lb10                   C                       B
-  (p12) br.dptk                .Lb11                   C                       B
-       ;;
-}
+.mmi;          ldf8    ux = [up], 8            C                       M
+               ldf8    v0 = [vp], 8            C                       M
+               mov     r2 = ar.lc              C                       I0
+.mmi;          nop     1                       C                       M
+               and     r14 = 3, n              C                       M I
+               add     n = -2, n               C                       M I
+       ;;
+.mmi;          ldf8    uy = [up], 8            C                       M
+               ldf8    v1 = [vp]               C                       M
+               shr.u   n = n, 2                C                       I
+.mmi;          nop     1                       C                       M
+               cmp.eq  p10, p0 = 1, r14        C                       M I
+               cmp.eq  p11, p0 = 2, r14        C                       M I
+       ;;
+.mmi;          nop     1                       C                       M
+               cmp.eq  p12, p0 = 3, r14        C                       M I
+               mov     ar.lc = n               C                       I0
+.bbb;  (p10)   br.dptk L(b01)                  C                       B
+       (p11)   br.dptk L(b10)                  C                       B
+       (p12)   br.dptk L(b11)                  C                       B
+       ;;
  
         ALIGN(32)
-.Lb00: ldf8            u_1 = [up], 8
-       mov             acc1_2 = 0
-       mov             pr1_2 = 0
-       mov             pr0_3 = 0
-       cmp.ne          p8, p9 = r0, r0
+L(b00):                ldf8    u_1 = [up], 8
+               mov     acc1_2 = 0
+               mov     pr1_2 = 0
+               mov     pr0_3 = 0
+               cmp.ne  p8, p9 = r0, r0
         ;;
-       xma.l           fp0b_3 = ux, v0, f0
-       cmp.ne          p12, p13 = r0, r0
-       ldf8            u_2 = [up], 8
-       xma.hu          fp1a_3 = ux, v0, f0
-       br.cloop.dptk   .grt4
+               xma.l   fp0b_3 = ux, v0, f0
+               cmp.ne  p12, p13 = r0, r0
+               ldf8    u_2 = [up], 8
+               xma.hu  fp1a_3 = ux, v0, f0
+               br.cloop.dptk   L(gt4)
  
-       xma.l           fp0b_0 = uy, v0, f0
-       xma.hu          fp1a_0 = uy, v0, f0
+               xma.l   fp0b_0 = uy, v0, f0
+               xma.hu  fp1a_0 = uy, v0, f0
         ;;
-       getf.sig        acc0 = fp0b_3
-       xma.l           fp1b_3 = ux, v1, fp1a_3
-       xma.hu          fp2a_3 = ux, v1, fp1a_3
+               getfsig acc0 = fp0b_3
+               xma.l   fp1b_3 = ux, v1, fp1a_3
+               xma.hu  fp2a_3 = ux, v1, fp1a_3
         ;;
-       xma.l           fp0b_1 = u_1, v0, f0
-       xma.hu          fp1a_1 = u_1, v0, f0
+               xma.l   fp0b_1 = u_1, v0, f0
+               xma.hu  fp1a_1 = u_1, v0, f0
         ;;
-       getf.sig        pr0_0 = fp0b_0
-       xma.l           fp1b_0 = uy, v1, fp1a_0
-       xma.hu          fp2a_0 = uy, v1, fp1a_0
+               getfsig pr0_0 = fp0b_0
+               xma.l   fp1b_0 = uy, v1, fp1a_0
+               xma.hu  fp2a_0 = uy, v1, fp1a_0
         ;;
-       getf.sig        pr1_3 = fp1b_3
-       getf.sig        acc1_3 = fp2a_3
-       xma.l           fp0b_2 = u_2, v0, f0
-       xma.hu          fp1a_2 = u_2, v0, f0
-       br              .Lcj4
+               getfsig pr1_3 = fp1b_3
+               getfsig acc1_3 = fp2a_3
+               xma.l   fp0b_2 = u_2, v0, f0
+               xma.hu  fp1a_2 = u_2, v0, f0
+               br      L(cj4)
  
-.grt4: xma.l           fp0b_0 = uy, v0, f0
-       xma.hu          fp1a_0 = uy, v0, f0
+L(gt4):                xma.l   fp0b_0 = uy, v0, f0
+               xma.hu  fp1a_0 = uy, v0, f0
         ;;
-       getf.sig        acc0 = fp0b_3
-       xma.l           fp1b_3 = ux, v1, fp1a_3
-       ldf8            u_3 = [up], 8
-       xma.hu          fp2a_3 = ux, v1, fp1a_3
+               getfsig acc0 = fp0b_3
+               xma.l   fp1b_3 = ux, v1, fp1a_3
+               ldf8    u_3 = [up], 8
+               xma.hu  fp2a_3 = ux, v1, fp1a_3
         ;;
-       xma.l           fp0b_1 = u_1, v0, f0
-       xma.hu          fp1a_1 = u_1, v0, f0
+               xma.l   fp0b_1 = u_1, v0, f0
+               xma.hu  fp1a_1 = u_1, v0, f0
         ;;
-       getf.sig        pr0_0 = fp0b_0
-       xma.l           fp1b_0 = uy, v1, fp1a_0
-       xma.hu          fp2a_0 = uy, v1, fp1a_0
+               getfsig pr0_0 = fp0b_0
+               xma.l   fp1b_0 = uy, v1, fp1a_0
+               xma.hu  fp2a_0 = uy, v1, fp1a_0
         ;;
-       ldf8            u_0 = [up], 8
-       getf.sig        pr1_3 = fp1b_3
+               ldf8    u_0 = [up], 8
+               getfsig pr1_3 = fp1b_3
+               xma.l   fp0b_2 = u_2, v0, f0
         ;;
-       getf.sig        acc1_3 = fp2a_3
-       xma.l           fp0b_2 = u_2, v0, f0
-       xma.hu          fp1a_2 = u_2, v0, f0
-       br              .LL00
+               getfsig acc1_3 = fp2a_3
+               xma.hu  fp1a_2 = u_2, v0, f0
+               br      L(00)
  
  
         ALIGN(32)
-.Lb01: ldf8            u_0 = [up], 8           C M
-       mov             acc1_1 = 0              C M I
-       mov             pr1_1 = 0               C M I
-       mov             pr0_2 = 0               C M I
-       cmp.ne          p6, p7 = r0, r0         C M I
+L(b01):                ldf8    u_0 = [up], 8           C M
+               mov     acc1_1 = 0              C M I
+               mov     pr1_1 = 0               C M I
+               mov     pr0_2 = 0               C M I
+               cmp.ne  p6, p7 = r0, r0         C M I
         ;;
-       xma.l           fp0b_2 = ux, v0, f0     C F
-       cmp.ne          p10, p11 = r0, r0       C M I
-       ldf8            u_1 = [up], 8           C M
-       xma.hu          fp1a_2 = ux, v0, f0     C F
+               xma.l   fp0b_2 = ux, v0, f0     C F
+               cmp.ne  p10, p11 = r0, r0       C M I
+               ldf8    u_1 = [up], 8           C M
+               xma.hu  fp1a_2 = ux, v0, f0     C F
         ;;
-       xma.l           fp0b_3 = uy, v0, f0     C F
-       xma.hu          fp1a_3 = uy, v0, f0     C F
+               xma.l   fp0b_3 = uy, v0, f0     C F
+               xma.hu  fp1a_3 = uy, v0, f0     C F
         ;;
-       getf.sig        acc0 = fp0b_2           C M
-       xma.l           fp1b_2 = ux, v1,fp1a_2  C F
-       xma.hu          fp2a_2 = ux, v1,fp1a_2  C F
-       ldf8            u_2 = [up], 8           C M
-       br.cloop.dptk   .grt5
+               getfsig acc0 = fp0b_2           C M
+               xma.l   fp1b_2 = ux, v1,fp1a_2  C F
+               ldf8    u_2 = [up], 8           C M
+               xma.hu  fp2a_2 = ux, v1,fp1a_2  C F
+               br.cloop.dptk   L(gt5)
  
-       xma.l           fp0b_0 = u_0, v0, f0    C F
-       xma.hu          fp1a_0 = u_0, v0, f0    C F
+               xma.l   fp0b_0 = u_0, v0, f0    C F
+               xma.hu  fp1a_0 = u_0, v0, f0    C F
         ;;
-       getf.sig        pr0_3 = fp0b_3          C M
-       xma.l           fp1b_3 = uy, v1,fp1a_3  C F
-       xma.hu          fp2a_3 = uy, v1,fp1a_3  C F
+               getfsig pr0_3 = fp0b_3          C M
+               xma.l   fp1b_3 = uy, v1,fp1a_3  C F
+               xma.hu  fp2a_3 = uy, v1,fp1a_3  C F
         ;;
-       getf.sig        pr1_2 = fp1b_2          C M
-       getf.sig        acc1_2 = fp2a_2         C M
-       xma.l           fp0b_1 = u_1, v0, f0    C F
-       xma.hu          fp1a_1 = u_1, v0, f0    C F
-       br              .Lcj5
+               getfsig pr1_2 = fp1b_2          C M
+               getfsig acc1_2 = fp2a_2         C M
+               xma.l   fp0b_1 = u_1, v0, f0    C F
+               xma.hu  fp1a_1 = u_1, v0, f0    C F
+               br      L(cj5)
  
-.grt5: xma.l           fp0b_0 = u_0, v0, f0
-       xma.hu          fp1a_0 = u_0, v0, f0
+L(gt5):                xma.l   fp0b_0 = u_0, v0, f0
+               xma.hu  fp1a_0 = u_0, v0, f0
         ;;
-       getf.sig        pr0_3 = fp0b_3
-       xma.l           fp1b_3 = uy, v1, fp1a_3
-       xma.hu          fp2a_3 = uy, v1, fp1a_3
+               getfsig pr0_3 = fp0b_3
+               xma.l   fp1b_3 = uy, v1, fp1a_3
+               xma.hu  fp2a_3 = uy, v1, fp1a_3
         ;;
-       ldf8            u_3 = [up], 8
-       getf.sig        pr1_2 = fp1b_2
+               ldf8    u_3 = [up], 8
+               getfsig pr1_2 = fp1b_2
+               xma.l   fp0b_1 = u_1, v0, f0
         ;;
-       getf.sig        acc1_2 = fp2a_2
-       xma.l           fp0b_1 = u_1, v0, f0
-       xma.hu          fp1a_1 = u_1, v0, f0
-       br              .LL01
+               getfsig acc1_2 = fp2a_2
+               xma.hu  fp1a_1 = u_1, v0, f0
+               br      L(01)
  
  
-C We have two variants for n = 2.  They turn out to run at exactly the same
-C speed.  But the first, odd variant might allow one cycle to be trimmed.
         ALIGN(32)
-ifdef(`',`
-.Lb10:         C 03
-       br.cloop.dptk   .grt2
-               C 04
-               C 05
-               C 06
-       xma.l           fp0b_1 = ux, v0, f0     C 0
-       xma.hu          fp1a_1 = ux, v0, f0     C 1
-       ;;      C 07
-       xma.l           fp0b_2 = uy, v0, f0     C 1
-       xma.l           fp1b_1 = ux, v1, f0     C 1
-       ;;      C 08
-       xma.hu          fp1a_2 = uy, v0, f0     C 2
-       xma.hu          fp2a_1 = ux, v1, f0     C 2
-       ;;      C 09
-       xma.l           fp1b_2 = uy, v1, f0     C 2
-       xma.hu          fp2a_2 = uy, v1, f0     C 3
-       ;;      C 10
-       getf.sig        r16 = fp1a_1
-       stf8            [rp] = fp0b_1, 8
-       ;;      C 11
-       getf.sig        r17 = fp0b_2
-               C 12
-       getf.sig        r18 = fp1b_1
-               C 13
-       getf.sig        r19 = fp1a_2
-               C 14
-       getf.sig        r20 = fp2a_1
-               C 15
-       getf.sig        r21 = fp1b_2
-       ;;      C 16
-       getf.sig        r8 = fp2a_2
-       add             r24 = r16, r17
-       ;;      C 17
-       cmp.ltu         p6, p7 = r24, r16
-       add             r26 = r24, r18
-       ;;      C 18
-       cmp.ltu         p8, p9 = r26, r24
-       ;;      C 19
-       st8             [rp] = r26, 8
-  (p6) add             r25 = r19, r20, 1
-  (p7) add             r25 = r19, r20
-       ;;      C 20
-  (p8) add             r27 = r25, r21, 1
-  (p9) add             r27 = r25, r21
-  (p6) cmp.leu         p10, p0 = r25, r19
-  (p7) cmp.ltu         p10, p0 = r25, r19
-       ;;      C 21
-  (p10)        add             r8 = 1, r8
-  (p8) cmp.leu         p12, p0 = r27, r25
-  (p9) cmp.ltu         p12, p0 = r27, r25
-       ;;      C 22
-       st8             [rp] = r27, 8
-       mov.i           ar.lc = r2
-  (p12)        add             r8 = 1, r8
-       br.ret.sptk.many b0
-')
-
-.Lb10:         C 03
-       br.cloop.dptk   .grt2
-               C 04
-               C 05
-               C 06
-       xma.l           fp0b_1 = ux, v0, f0
-       xma.hu          fp1a_1 = ux, v0, f0
-       ;;      C 07
-       xma.l           fp0b_2 = uy, v0, f0
-       xma.hu          fp1a_2 = uy, v0, f0
-       ;;      C 08
-               C 09
-               C 10
-       stf8            [rp] = fp0b_1, 8
-       xma.l           fp1b_1 = ux, v1, fp1a_1
-       xma.hu          fp2a_1 = ux, v1, fp1a_1
-       ;;      C 11
-       getf.sig        acc0 = fp0b_2
-       xma.l           fp1b_2 = uy, v1, fp1a_2
-       xma.hu          fp2a_2 = uy, v1, fp1a_2
-       ;;      C 12
-               C 13
-               C 14
-       getf.sig        pr1_1 = fp1b_1
-               C 15
-       getf.sig        acc1_1 = fp2a_1
-               C 16
-       getf.sig        pr1_2 = fp1b_2
-               C 17
-       getf.sig        r8 = fp2a_2
-       ;;      C 18
-               C 19
-       add             s0 = pr1_1, acc0
-       ;;      C 20
-       st8             [rp] = s0, 8
-       cmp.ltu         p8, p9 = s0, pr1_1
-       sub             r31 = -1, acc1_1
-       ;;      C 21
-       .pred.rel "mutex", p8, p9
-  (p8) add             acc0 = pr1_2, acc1_1, 1
-  (p9) add             acc0 = pr1_2, acc1_1
-  (p8) cmp.leu         p10, p0 = r31, pr1_2
-  (p9) cmp.ltu         p10, p0 = r31, pr1_2
-       ;;      C 22
-       st8             [rp] = acc0, 8
-       mov.i           ar.lc = r2
-  (p10)        add             r8 = 1, r8
-       br.ret.sptk.many b0
-
-
-.grt2: ldf8            u_3 = [up], 8
-       mov             acc1_0 = 0
-       mov             pr1_0 = 0
-       ;;
-       mov             pr0_1 = 0
-       xma.l           fp0b_1 = ux, v0, f0
-       ldf8            u_0 = [up], 8
-       xma.hu          fp1a_1 = ux, v0, f0
-       ;;
-       xma.l           fp0b_2 = uy, v0, f0
-       xma.hu          fp1a_2 = uy, v0, f0
-       ;;
-       getf.sig        acc0 = fp0b_1
-       xma.l           fp1b_1 = ux, v1, fp1a_1
-       xma.hu          fp2a_1 = ux, v1, fp1a_1
-       ;;
-       ldf8            u_1 = [up], 8
-       xma.l           fp0b_3 = u_3, v0, f0
-       xma.hu          fp1a_3 = u_3, v0, f0
-       ;;
-       getf.sig        pr0_2 = fp0b_2
-       xma.l           fp1b_2 = uy, v1, fp1a_2
-       xma.hu          fp2a_2 = uy, v1, fp1a_2
-       ;;
-       ldf8            u_2 = [up], 8
-       getf.sig        pr1_1 = fp1b_1
-       ;;
-       getf.sig        acc1_1 = fp2a_1
-       xma.l           fp0b_0 = u_0, v0, f0
-       cmp.ne          p8, p9 = r0, r0
-       cmp.ne          p12, p13 = r0, r0
-       xma.hu          fp1a_0 = u_0, v0, f0
-       br              .LL10
+L(b10):                br.cloop.dptk   L(gt2)
+               xma.l   fp0b_1 = ux, v0, f0
+               xma.hu  fp1a_1 = ux, v0, f0
+       ;;
+               xma.l   fp0b_2 = uy, v0, f0
+               xma.hu  fp1a_2 = uy, v0, f0
+       ;;
+               stf8    [rp] = fp0b_1, 8
+               xma.l   fp1b_1 = ux, v1, fp1a_1
+               xma.hu  fp2a_1 = ux, v1, fp1a_1
+       ;;
+               getfsig acc0 = fp0b_2
+               xma.l   fp1b_2 = uy, v1, fp1a_2
+               xma.hu  fp2a_2 = uy, v1, fp1a_2
+       ;;
+               getfsig pr1_1 = fp1b_1
+               getfsig acc1_1 = fp2a_1
+               mov     ar.lc = r2
+               getfsig pr1_2 = fp1b_2
+               getfsig r8 = fp2a_2
+       ;;
+               add     s0 = pr1_1, acc0
+       ;;
+               st8     [rp] = s0, 8
+               cmp.ltu p8, p9 = s0, pr1_1
+               sub     r31 = -1, acc1_1
+       ;;
+               .pred.rel "mutex", p8, p9
+       (p8)    add     acc0 = pr1_2, acc1_1, 1
+       (p9)    add     acc0 = pr1_2, acc1_1
+       (p8)    cmp.leu p10, p0 = r31, pr1_2
+       (p9)    cmp.ltu p10, p0 = r31, pr1_2
+       ;;
+               st8     [rp] = acc0, 8
+       (p10)   add     r8 = 1, r8
+               br.ret.sptk.many b0
+
+L(gt2):                ldf8    u_3 = [up], 8
+               mov     acc1_0 = 0
+               mov     pr1_0 = 0
+       ;;
+               mov     pr0_1 = 0
+               xma.l   fp0b_1 = ux, v0, f0
+               ldf8    u_0 = [up], 8
+               xma.hu  fp1a_1 = ux, v0, f0
+       ;;
+               xma.l   fp0b_2 = uy, v0, f0
+               xma.hu  fp1a_2 = uy, v0, f0
+       ;;
+               getfsig acc0 = fp0b_1
+               xma.l   fp1b_1 = ux, v1, fp1a_1
+               xma.hu  fp2a_1 = ux, v1, fp1a_1
+       ;;
+               ldf8    u_1 = [up], 8
+               xma.l   fp0b_3 = u_3, v0, f0
+               xma.hu  fp1a_3 = u_3, v0, f0
+       ;;
+               getfsig pr0_2 = fp0b_2
+               xma.l   fp1b_2 = uy, v1, fp1a_2
+               xma.hu  fp2a_2 = uy, v1, fp1a_2
+       ;;
+               ldf8    u_2 = [up], 8
+               getfsig pr1_1 = fp1b_1
+       ;;
+.mfi;          getfsig acc1_1 = fp2a_1
+               xma.l   fp0b_0 = u_0, v0, f0
+               cmp.ne  p8, p9 = r0, r0
+.mfb;          cmp.ne  p12, p13 = r0, r0
+               xma.hu  fp1a_0 = u_0, v0, f0
+               br      L(10)
  
  
         ALIGN(32)
-.Lb11: mov             acc1_3 = 0
-       mov             pr1_3 = 0
-       mov             pr0_0 = 0
-       cmp.ne          p6, p7 = r0, r0
-       ;;
-       ldf8            u_2 = [up], 8
-       br.cloop.dptk   .grt3
+L(b11):                mov     acc1_3 = 0
+               mov     pr1_3 = 0
+               mov     pr0_0 = 0
+               ldf8    u_2 = [up], 8
+               cmp.ne  p6, p7 = r0, r0
+               br.cloop.dptk   L(gt3)
         ;;
-       xma.l           fp0b_0 = ux, v0, f0
-       xma.hu          fp1a_0 = ux, v0, f0
+               xma.l   fp0b_0 = ux, v0, f0
+               xma.hu  fp1a_0 = ux, v0, f0
         ;;
-       cmp.ne          p10, p11 = r0, r0
-       xma.l           fp0b_1 = uy, v0, f0
-       xma.hu          fp1a_1 = uy, v0, f0
+               cmp.ne  p10, p11 = r0, r0
+               xma.l   fp0b_1 = uy, v0, f0
+               xma.hu  fp1a_1 = uy, v0, f0
         ;;
-       getf.sig        acc0 = fp0b_0
-       xma.l           fp1b_0 = ux, v1, fp1a_0
-       xma.hu          fp2a_0 = ux, v1, fp1a_0
+               getfsig acc0 = fp0b_0
+               xma.l   fp1b_0 = ux, v1, fp1a_0
+               xma.hu  fp2a_0 = ux, v1, fp1a_0
         ;;
-       xma.l           fp0b_2 = u_2, v0, f0
-       xma.hu          fp1a_2 = u_2, v0, f0
+               xma.l   fp0b_2 = u_2, v0, f0
+               xma.hu  fp1a_2 = u_2, v0, f0
         ;;
-       getf.sig        pr0_1 = fp0b_1
-       xma.l           fp1b_1 = uy, v1, fp1a_1
-       xma.hu          fp2a_1 = uy, v1, fp1a_1
+               getfsig pr0_1 = fp0b_1
+               xma.l   fp1b_1 = uy, v1, fp1a_1
+               xma.hu  fp2a_1 = uy, v1, fp1a_1
         ;;
-       getf.sig        pr1_0 = fp1b_0
-       getf.sig        acc1_0 = fp2a_0
-       br              .Lcj3
+               getfsig pr1_0 = fp1b_0
+               getfsig acc1_0 = fp2a_0
+               br      L(cj3)
  
-.grt3: xma.l           fp0b_0 = ux, v0, f0
-       cmp.ne          p10, p11 = r0, r0
-       ldf8            u_3 = [up], 8
-       xma.hu          fp1a_0 = ux, v0, f0
+L(gt3):                xma.l   fp0b_0 = ux, v0, f0
+               cmp.ne  p10, p11 = r0, r0
+               ldf8    u_3 = [up], 8
+               xma.hu  fp1a_0 = ux, v0, f0
         ;;
-       xma.l           fp0b_1 = uy, v0, f0
-       xma.hu          fp1a_1 = uy, v0, f0
+               xma.l   fp0b_1 = uy, v0, f0
+               xma.hu  fp1a_1 = uy, v0, f0
         ;;
-       getf.sig        acc0 = fp0b_0
-       xma.l           fp1b_0 = ux, v1, fp1a_0
-       ldf8            u_0 = [up], 8
-       xma.hu          fp2a_0 = ux, v1, fp1a_0
+               getfsig acc0 = fp0b_0
+               xma.l   fp1b_0 = ux, v1, fp1a_0
+               ldf8    u_0 = [up], 8
+               xma.hu  fp2a_0 = ux, v1, fp1a_0
         ;;
-       xma.l           fp0b_2 = u_2, v0, f0
-       xma.hu          fp1a_2 = u_2, v0, f0
+               xma.l   fp0b_2 = u_2, v0, f0
+               xma.hu  fp1a_2 = u_2, v0, f0
         ;;
-       getf.sig        pr0_1 = fp0b_1
-       xma.l           fp1b_1 = uy, v1, fp1a_1
-       xma.hu          fp2a_1 = uy, v1, fp1a_1
+               getfsig pr0_1 = fp0b_1
+               xma.l   fp1b_1 = uy, v1, fp1a_1
+               xma.hu  fp2a_1 = uy, v1, fp1a_1
         ;;
-       ldf8            u_1 = [up], 8
-       getf.sig        pr1_0 = fp1b_0
+               ldf8    u_1 = [up], 8
+               getfsig pr1_0 = fp1b_0
         ;;
-       getf.sig        acc1_0 = fp2a_0
-       xma.l           fp0b_3 = u_3, v0, f0
-       xma.hu          fp1a_3 = u_3, v0, f0
-       br              .LL11
+               getfsig acc1_0 = fp2a_0
+               xma.l   fp0b_3 = u_3, v0, f0
+               xma.hu  fp1a_3 = u_3, v0, f0
+               br      L(11)
  
  
  C *** MAIN LOOP START ***
         ALIGN(32)
-.Loop:                                         C 00
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr0_3 = fp0b_3
-       xma.l           fp1b_3 = u_3, v1, fp1a_3
-  (p12)        add             s0 = pr1_0, acc0, 1
-  (p13)        add             s0 = pr1_0, acc0
-       xma.hu          fp2a_3 = u_3, v1, fp1a_3
+L(top):                                                C 00
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+               ldf8    u_3 = [up], 8
+               getfsig pr1_2 = fp1b_2
+       (p8)    cmp.leu p6, p7 = acc0, pr0_1
+       (p9)    cmp.ltu p6, p7 = acc0, pr0_1
+       (p12)   cmp.leu p10, p11 = s0, pr1_0
+       (p13)   cmp.ltu p10, p11 = s0, pr1_0
         ;;                                      C 01
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       ldf8            u_3 = [up], 8
-       getf.sig        pr1_2 = fp1b_2
-  (p8) cmp.leu         p6, p7 = acc0, pr0_1
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
-  (p12)        cmp.leu         p10, p11 = s0, pr1_0
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
+               .pred.rel "mutex", p6, p7
+               getfsig acc1_2 = fp2a_2
+               st8     [rp] = s0, 8
+               xma.l   fp0b_1 = u_1, v0, f0
+       (p6)    add     acc0 = pr0_2, acc1_0, 1
+       (p7)    add     acc0 = pr0_2, acc1_0
+               xma.hu  fp1a_1 = u_1, v0, f0
         ;;                                      C 02
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_2 = fp2a_2
-       st8             [rp] = s0, 8
-       xma.l           fp0b_1 = u_1, v0, f0
-  (p6) add             acc0 = pr0_2, acc1_0, 1
-  (p7) add             acc0 = pr0_2, acc1_0
-       xma.hu          fp1a_1 = u_1, v0, f0
+L(01):
+               .pred.rel "mutex", p10, p11
+               getfsig pr0_0 = fp0b_0
+               xma.l   fp1b_0 = u_0, v1, fp1a_0
+       (p10)   add     s0 = pr1_1, acc0, 1
+       (p11)   add     s0 = pr1_1, acc0
+               xma.hu  fp2a_0 = u_0, v1, fp1a_0
+               nop     1
         ;;                                      C 03
-.LL01:
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr0_0 = fp0b_0
-       xma.l           fp1b_0 = u_0, v1, fp1a_0
-  (p10)        add             s0 = pr1_1, acc0, 1
-  (p11)        add             s0 = pr1_1, acc0
-       xma.hu          fp2a_0 = u_0, v1, fp1a_0
+               .pred.rel "mutex", p6, p7
+               .pred.rel "mutex", p10, p11
+               ldf8    u_0 = [up], 8
+               getfsig pr1_3 = fp1b_3
+       (p6)    cmp.leu p8, p9 = acc0, pr0_2
+       (p7)    cmp.ltu p8, p9 = acc0, pr0_2
+       (p10)   cmp.leu p12, p13 = s0, pr1_1
+       (p11)   cmp.ltu p12, p13 = s0, pr1_1
         ;;                                      C 04
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-       ldf8            u_0 = [up], 8
-       getf.sig        pr1_3 = fp1b_3
-  (p6) cmp.leu         p8, p9 = acc0, pr0_2
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
-  (p10)        cmp.leu         p12, p13 = s0, pr1_1
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
+               .pred.rel "mutex", p8, p9
+               getfsig acc1_3 = fp2a_3
+               st8     [rp] = s0, 8
+               xma.l   fp0b_2 = u_2, v0, f0
+       (p8)    add     acc0 = pr0_3, acc1_1, 1
+       (p9)    add     acc0 = pr0_3, acc1_1
+               xma.hu  fp1a_2 = u_2, v0, f0
         ;;                                      C 05
-       .pred.rel "mutex", p8, p9
-       getf.sig        acc1_3 = fp2a_3
-       st8             [rp] = s0, 8
-       xma.l           fp0b_2 = u_2, v0, f0
-  (p8) add             acc0 = pr0_3, acc1_1, 1
-  (p9) add             acc0 = pr0_3, acc1_1
-       xma.hu          fp1a_2 = u_2, v0, f0
+L(00):
+               .pred.rel "mutex", p12, p13
+               getfsig pr0_1 = fp0b_1
+               xma.l   fp1b_1 = u_1, v1, fp1a_1
+       (p12)   add     s0 = pr1_2, acc0, 1
+       (p13)   add     s0 = pr1_2, acc0
+               xma.hu  fp2a_1 = u_1, v1, fp1a_1
+               nop     1
         ;;                                      C 06
-.LL00:
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr0_1 = fp0b_1
-       xma.l           fp1b_1 = u_1, v1, fp1a_1
-  (p12)        add             s0 = pr1_2, acc0, 1
-  (p13)        add             s0 = pr1_2, acc0
-       xma.hu          fp2a_1 = u_1, v1, fp1a_1
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+               ldf8    u_1 = [up], 8
+               getfsig pr1_0 = fp1b_0
+       (p8)    cmp.leu p6, p7 = acc0, pr0_3
+       (p9)    cmp.ltu p6, p7 = acc0, pr0_3
+       (p12)   cmp.leu p10, p11 = s0, pr1_2
+       (p13)   cmp.ltu p10, p11 = s0, pr1_2
         ;;                                      C 07
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       ldf8            u_1 = [up], 8
-       getf.sig        pr1_0 = fp1b_0
-  (p8) cmp.leu         p6, p7 = acc0, pr0_3
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_3
-  (p12)        cmp.leu         p10, p11 = s0, pr1_2
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_2
+               .pred.rel "mutex", p6, p7
+               getfsig acc1_0 = fp2a_0
+               st8     [rp] = s0, 8
+               xma.l   fp0b_3 = u_3, v0, f0
+       (p6)    add     acc0 = pr0_0, acc1_2, 1
+       (p7)    add     acc0 = pr0_0, acc1_2
+               xma.hu  fp1a_3 = u_3, v0, f0
         ;;                                      C 08
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_0 = fp2a_0
-       st8             [rp] = s0, 8
-       xma.l           fp0b_3 = u_3, v0, f0
-  (p6) add             acc0 = pr0_0, acc1_2, 1
-  (p7) add             acc0 = pr0_0, acc1_2
-       xma.hu          fp1a_3 = u_3, v0, f0
+L(11):
+               .pred.rel "mutex", p10, p11
+               getfsig pr0_2 = fp0b_2
+               xma.l   fp1b_2 = u_2, v1, fp1a_2
+       (p10)   add     s0 = pr1_3, acc0, 1
+       (p11)   add     s0 = pr1_3, acc0
+               xma.hu  fp2a_2 = u_2, v1, fp1a_2
+               nop     1
         ;;                                      C 09
-.LL11:
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr0_2 = fp0b_2
-       xma.l           fp1b_2 = u_2, v1, fp1a_2
-  (p10)        add             s0 = pr1_3, acc0, 1
-  (p11)        add             s0 = pr1_3, acc0
-       xma.hu          fp2a_2 = u_2, v1, fp1a_2
+               .pred.rel "mutex", p6, p7
+               .pred.rel "mutex", p10, p11
+               ldf8    u_2 = [up], 8
+               getfsig pr1_1 = fp1b_1
+       (p6)    cmp.leu p8, p9 = acc0, pr0_0
+       (p7)    cmp.ltu p8, p9 = acc0, pr0_0
+       (p10)   cmp.leu p12, p13 = s0, pr1_3
+       (p11)   cmp.ltu p12, p13 = s0, pr1_3
         ;;                                      C 10
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-       ldf8            u_2 = [up], 8
-       getf.sig        pr1_1 = fp1b_1
-  (p6) cmp.leu         p8, p9 = acc0, pr0_0
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_0
-  (p10)        cmp.leu         p12, p13 = s0, pr1_3
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_3
+               .pred.rel "mutex", p8, p9
+               getfsig acc1_1 = fp2a_1
+               st8     [rp] = s0, 8
+               xma.l   fp0b_0 = u_0, v0, f0
+       (p8)    add     acc0 = pr0_1, acc1_3, 1
+       (p9)    add     acc0 = pr0_1, acc1_3
+               xma.hu  fp1a_0 = u_0, v0, f0
         ;;                                      C 11
-       .pred.rel "mutex", p8, p9
-       getf.sig        acc1_1 = fp2a_1
-       st8             [rp] = s0, 8
-       xma.l           fp0b_0 = u_0, v0, f0
-  (p8) add             acc0 = pr0_1, acc1_3, 1
-  (p9) add             acc0 = pr0_1, acc1_3
-       xma.hu          fp1a_0 = u_0, v0, f0
-.LL10: br.cloop.dptk   .Loop                   C 12
+L(10):
+               .pred.rel "mutex", p12, p13
+               getfsig pr0_3 = fp0b_3
+               xma.l   fp1b_3 = u_3, v1, fp1a_3
+       (p12)   add     s0 = pr1_0, acc0, 1
+       (p13)   add     s0 = pr1_0, acc0
+               xma.hu  fp2a_3 = u_3, v1, fp1a_3
+               br.cloop.dptk   L(top)
         ;;
  C *** MAIN LOOP END ***
  
-.Lcj6:
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr0_3 = fp0b_3
-       xma.l           fp1b_3 = u_3, v1, fp1a_3
-  (p12)        add             s0 = pr1_0, acc0, 1
-  (p13)        add             s0 = pr1_0, acc0
-       xma.hu          fp2a_3 = u_3, v1, fp1a_3
-       ;;
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr1_2 = fp1b_2
-  (p8) cmp.leu         p6, p7 = acc0, pr0_1
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
-  (p12)        cmp.leu         p10, p11 = s0, pr1_0
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
-       ;;
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_2 = fp2a_2
-       st8             [rp] = s0, 8
-       xma.l           fp0b_1 = u_1, v0, f0
-  (p6) add             acc0 = pr0_2, acc1_0, 1
-  (p7) add             acc0 = pr0_2, acc1_0
-       xma.hu          fp1a_1 = u_1, v0, f0
-       ;;
-.Lcj5:
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr0_0 = fp0b_0
-       xma.l           fp1b_0 = u_0, v1, fp1a_0
-  (p10)        add             s0 = pr1_1, acc0, 1
-  (p11)        add             s0 = pr1_1, acc0
-       xma.hu          fp2a_0 = u_0, v1, fp1a_0
-       ;;
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr1_3 = fp1b_3
-  (p6) cmp.leu         p8, p9 = acc0, pr0_2
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
-  (p10)        cmp.leu         p12, p13 = s0, pr1_1
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
-       ;;
-       .pred.rel "mutex", p8, p9
-       getf.sig        acc1_3 = fp2a_3
-       st8             [rp] = s0, 8
-       xma.l           fp0b_2 = u_2, v0, f0
-  (p8) add             acc0 = pr0_3, acc1_1, 1
-  (p9) add             acc0 = pr0_3, acc1_1
-       xma.hu          fp1a_2 = u_2, v0, f0
-       ;;
-.Lcj4:
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr0_1 = fp0b_1
-       xma.l           fp1b_1 = u_1, v1, fp1a_1
-  (p12)        add             s0 = pr1_2, acc0, 1
-  (p13)        add             s0 = pr1_2, acc0
-       xma.hu          fp2a_1 = u_1, v1, fp1a_1
-       ;;
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr1_0 = fp1b_0
-  (p8) cmp.leu         p6, p7 = acc0, pr0_3
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_3
-  (p12)        cmp.leu         p10, p11 = s0, pr1_2
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_2
-       ;;
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_0 = fp2a_0
-       st8             [rp] = s0, 8
-  (p6) add             acc0 = pr0_0, acc1_2, 1
-  (p7) add             acc0 = pr0_0, acc1_2
-       ;;
-.Lcj3:
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr0_2 = fp0b_2
-       xma.l           fp1b_2 = u_2, v1, fp1a_2
-  (p10)        add             s0 = pr1_3, acc0, 1
-  (p11)        add             s0 = pr1_3, acc0
-       xma.hu          fp2a_2 = u_2, v1, fp1a_2
-       ;;
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-       getf.sig        pr1_1 = fp1b_1
-  (p6) cmp.leu         p8, p9 = acc0, pr0_0
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_0
-  (p10)        cmp.leu         p12, p13 = s0, pr1_3
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_3
-       ;;
-       .pred.rel "mutex", p8, p9
-       getf.sig        acc1_1 = fp2a_1
-       st8             [rp] = s0, 8
-  (p8) add             acc0 = pr0_1, acc1_3, 1
-  (p9) add             acc0 = pr0_1, acc1_3
-       ;;
-       .pred.rel "mutex", p12, p13
-  (p12)        add             s0 = pr1_0, acc0, 1
-  (p13)        add             s0 = pr1_0, acc0
-       ;;
-       .pred.rel "mutex", p8, p9
-       .pred.rel "mutex", p12, p13
-       getf.sig        pr1_2 = fp1b_2
-  (p8) cmp.leu         p6, p7 = acc0, pr0_1
-  (p9) cmp.ltu         p6, p7 = acc0, pr0_1
-  (p12)        cmp.leu         p10, p11 = s0, pr1_0
-  (p13)        cmp.ltu         p10, p11 = s0, pr1_0
-       ;;
-       .pred.rel "mutex", p6, p7
-       getf.sig        acc1_2 = fp2a_2
-       st8             [rp] = s0, 8
-  (p6) add             acc0 = pr0_2, acc1_0, 1
-  (p7) add             acc0 = pr0_2, acc1_0
-       ;;
-       .pred.rel "mutex", p10, p11
-  (p10)        add             s0 = pr1_1, acc0, 1
-  (p11)        add             s0 = pr1_1, acc0
-       ;;
-       .pred.rel "mutex", p6, p7
-       .pred.rel "mutex", p10, p11
-  (p6) cmp.leu         p8, p9 = acc0, pr0_2
-  (p7) cmp.ltu         p8, p9 = acc0, pr0_2
-  (p10)        cmp.leu         p12, p13 = s0, pr1_1
-  (p11)        cmp.ltu         p12, p13 = s0, pr1_1
-       ;;
-       .pred.rel "mutex", p8, p9
-       st8             [rp] = s0, 8
-  (p8) add             acc0 = pr1_2, acc1_1, 1
-  (p9) add             acc0 = pr1_2, acc1_1
-       ;;
-       .pred.rel "mutex", p8, p9
-  (p8) cmp.leu         p10, p11 = acc0, pr1_2
-  (p9) cmp.ltu         p10, p11 = acc0, pr1_2
-  (p12)        add             acc0 = 1, acc0
-       ;;
-       st8             [rp] = acc0, 8
-  (p12)        cmp.eq.or       p10, p0 = 0, acc0
-       mov             r8 = acc1_2
-       ;;
-       .pred.rel "mutex", p10, p11
-  (p10)        add             r8 = 1, r8
-       mov.i           ar.lc = r2
-       br.ret.sptk.many b0
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+.mmi;          getfsig pr1_2 = fp1b_2
+               st8     [rp] = s0, 8
+       (p8)    cmp.leu p6, p7 = acc0, pr0_1
+.mmi;  (p9)    cmp.ltu p6, p7 = acc0, pr0_1
+       (p12)   cmp.leu p10, p11 = s0, pr1_0
+       (p13)   cmp.ltu p10, p11 = s0, pr1_0
+       ;;
+               .pred.rel "mutex", p6, p7
+.mfi;          getfsig acc1_2 = fp2a_2
+               xma.l   fp0b_1 = u_1, v0, f0
+               nop     1
+.mmf;  (p6)    add     acc0 = pr0_2, acc1_0, 1
+       (p7)    add     acc0 = pr0_2, acc1_0
+               xma.hu  fp1a_1 = u_1, v0, f0
+       ;;
+L(cj5):
+               .pred.rel "mutex", p10, p11
+.mfi;          getfsig pr0_0 = fp0b_0
+               xma.l   fp1b_0 = u_0, v1, fp1a_0
+       (p10)   add     s0 = pr1_1, acc0, 1
+.mfi;  (p11)   add     s0 = pr1_1, acc0
+               xma.hu  fp2a_0 = u_0, v1, fp1a_0
+               nop     1
+       ;;
+               .pred.rel "mutex", p6, p7
+               .pred.rel "mutex", p10, p11
+.mmi;          getfsig pr1_3 = fp1b_3
+               st8     [rp] = s0, 8
+       (p6)    cmp.leu p8, p9 = acc0, pr0_2
+.mmi;  (p7)    cmp.ltu p8, p9 = acc0, pr0_2
+       (p10)   cmp.leu p12, p13 = s0, pr1_1
+       (p11)   cmp.ltu p12, p13 = s0, pr1_1
+       ;;
+               .pred.rel "mutex", p8, p9
+.mfi;          getfsig acc1_3 = fp2a_3
+               xma.l   fp0b_2 = u_2, v0, f0
+               nop     1
+.mmf;  (p8)    add     acc0 = pr0_3, acc1_1, 1
+       (p9)    add     acc0 = pr0_3, acc1_1
+               xma.hu  fp1a_2 = u_2, v0, f0
+       ;;
+L(cj4):
+               .pred.rel "mutex", p12, p13
+.mfi;          getfsig pr0_1 = fp0b_1
+               xma.l   fp1b_1 = u_1, v1, fp1a_1
+       (p12)   add     s0 = pr1_2, acc0, 1
+.mfi;  (p13)   add     s0 = pr1_2, acc0
+               xma.hu  fp2a_1 = u_1, v1, fp1a_1
+               nop     1
+       ;;
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+.mmi;          getfsig pr1_0 = fp1b_0
+               st8     [rp] = s0, 8
+       (p8)    cmp.leu p6, p7 = acc0, pr0_3
+.mmi;  (p9)    cmp.ltu p6, p7 = acc0, pr0_3
+       (p12)   cmp.leu p10, p11 = s0, pr1_2
+       (p13)   cmp.ltu p10, p11 = s0, pr1_2
+       ;;
+               .pred.rel "mutex", p6, p7
+.mmi;          getfsig acc1_0 = fp2a_0
+       (p6)    add     acc0 = pr0_0, acc1_2, 1
+       (p7)    add     acc0 = pr0_0, acc1_2
+       ;;
+L(cj3):
+               .pred.rel "mutex", p10, p11
+.mfi;          getfsig pr0_2 = fp0b_2
+               xma.l   fp1b_2 = u_2, v1, fp1a_2
+       (p10)   add     s0 = pr1_3, acc0, 1
+.mfi;  (p11)   add     s0 = pr1_3, acc0
+               xma.hu  fp2a_2 = u_2, v1, fp1a_2
+               nop     1
+       ;;
+               .pred.rel "mutex", p6, p7
+               .pred.rel "mutex", p10, p11
+.mmi;          getfsig pr1_1 = fp1b_1
+               st8     [rp] = s0, 8
+       (p6)    cmp.leu p8, p9 = acc0, pr0_0
+.mmi;  (p7)    cmp.ltu p8, p9 = acc0, pr0_0
+       (p10)   cmp.leu p12, p13 = s0, pr1_3
+       (p11)   cmp.ltu p12, p13 = s0, pr1_3
+       ;;
+               .pred.rel "mutex", p8, p9
+.mmi;          getfsig acc1_1 = fp2a_1
+       (p8)    add     acc0 = pr0_1, acc1_3, 1
+       (p9)    add     acc0 = pr0_1, acc1_3
+       ;;
+               .pred.rel "mutex", p12, p13
+.mmi;  (p12)   add     s0 = pr1_0, acc0, 1
+       (p13)   add     s0 = pr1_0, acc0
+               nop     1
+       ;;
+               .pred.rel "mutex", p8, p9
+               .pred.rel "mutex", p12, p13
+.mmi;          getfsig pr1_2 = fp1b_2
+               st8     [rp] = s0, 8
+       (p8)    cmp.leu p6, p7 = acc0, pr0_1
+.mmi;  (p9)    cmp.ltu p6, p7 = acc0, pr0_1
+       (p12)   cmp.leu p10, p11 = s0, pr1_0
+       (p13)   cmp.ltu p10, p11 = s0, pr1_0
+       ;;
+               .pred.rel "mutex", p6, p7
+.mmi;          getfsig r8 = fp2a_2
+       (p6)    add     acc0 = pr0_2, acc1_0, 1
+       (p7)    add     acc0 = pr0_2, acc1_0
+       ;;
+               .pred.rel "mutex", p10, p11
+.mmi;  (p10)   add     s0 = pr1_1, acc0, 1
+       (p11)   add     s0 = pr1_1, acc0
+       (p6)    cmp.leu p8, p9 = acc0, pr0_2
+       ;;
+               .pred.rel "mutex", p10, p11
+.mmi;  (p7)    cmp.ltu p8, p9 = acc0, pr0_2
+       (p10)   cmp.leu p12, p13 = s0, pr1_1
+       (p11)   cmp.ltu p12, p13 = s0, pr1_1
+       ;;
+               .pred.rel "mutex", p8, p9
+.mmi;          st8     [rp] = s0, 8
+       (p8)    add     acc0 = pr1_2, acc1_1, 1
+       (p9)    add     acc0 = pr1_2, acc1_1
+       ;;
+               .pred.rel "mutex", p8, p9
+.mmi;  (p8)    cmp.leu p10, p11 = acc0, pr1_2
+       (p9)    cmp.ltu p10, p11 = acc0, pr1_2
+       (p12)   add     acc0 = 1, acc0
+       ;;
+.mmi;          st8     [rp] = acc0, 8
+       (p12)   cmpeqor p10, p0 = 0, acc0
+               nop     1
+       ;;
+.mib;  (p10)   add     r8 = 1, r8
+               mov     ar.lc = r2
+               br.ret.sptk.many b0
  EPILOGUE()
  ASM_END()
diff --git a/mpn/ia64/popcount.asm b/mpn/ia64/popcount.asm

index a02bf4346cb1caaab1d7ecb9368f48d724c919be..fc2f4b91621c50e79b15f102855ea2eaa096065d 100644 (file)
--- a/mpn/ia64/popcount.asm
+++ b/mpn/ia64/popcount.asm
@@ -1,5 +1,7 @@
  dnl  IA-64 mpn_popcount -- mpn population count.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
  dnl  Inc.
  
diff --git a/mpn/ia64/rsh1aors_n.asm b/mpn/ia64/rsh1aors_n.asm

index 366b5c50bb38d30e768c8d05b551c917c515097c..d7531178b6533bbdc7424928fbf8c2514668f94d 100644 (file)
--- a/mpn/ia64/rsh1aors_n.asm
+++ b/mpn/ia64/rsh1aors_n.asm
@@ -1,5 +1,7 @@
  dnl  IA-64 mpn_rsh1add_n/mpn_rsh1sub_n -- rp[] = (up[] +- vp[]) >> 1.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
diff --git a/mpn/ia64/sqr_diag_addlsh1.asm b/mpn/ia64/sqr_diag_addlsh1.asm

new file mode 100644 (file)

index 0000000..591945a
--- /dev/null
+++ b/mpn/ia64/sqr_diag_addlsh1.asm
@@ -0,0 +1,133 @@
+dnl  IA-64 mpn_sqr_diag_addlsh1
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      ?
+C Itanium 2:    2      Unrolling could bring it to 1.5 + epsilon
+
+C Exact performance table.  The 2nd line is this code, the 3rd line is ctop-
+C less code.  In an assembly sqr_basecase, the ctop-full numbers will become a
+C few cycles better since we can mitigate the many I0 instructions.
+C
+C 1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20
+C -  20  22  24  26  28  30  32  34  36  38  40  42  44  46  48  50  52  54  56 Needs updating
+C -  13  16  17  18  20  21  23  25  26  30  31  31  33  34  36  38  39  42  43
+
+C We should keep in mind that this code takes linear time in a O(n^2) context
+C and that it will only be used under SQR_TOOM2_THRESHOLD, which might become
+C around 60.  Keeping overhead down for smallish operands (< 10) is more
+C important than optimal cycle counts.
+
+C TODO
+C  * Make sure we don't depend on uninitialised r-registers, f-registers, or
+C  * p-registers.
+C  * Optimise by doing first two loop iterations in function header.
+
+C INPUT PARAMETERS
+define(`rp_param', `r32')  define(`rp', `r14')         C size: 2n
+define(`tp_param', `r33')  define(`tp', `r15')         C size: 2n - 2
+define(`up_param', `r34')  define(`up', `r31')         C size: n
+define(`n',  `r35')
+
+
+ASM_START()
+PROLOGUE(mpn_sqr_diag_addlsh1)
+
+       .prologue
+       .save   ar.pfs, r2
+       .save   ar.lc, r3
+       .body
+
+.mmi;          alloc   r2 = ar.pfs, 4,24,0,24  C                       M
+               nop     4711
+               mov     r3 = ar.lc              C                       I0
+.mmi;          mov     tp = tp_param           C                       M I
+               mov     up = up_param           C                       M I
+               mov     rp = rp_param           C                       M I
+       ;;
+.mmi;          ld8     r36 = [tp], 8           C                       M
+               add     r20 = -2, n             C                       M I
+               mov     r9 = ar.ec              C                       I0
+       ;;
+.mmi;          ld8     r32 = [tp], 8           C                       M
+               mov     r16 = 0                 C                       M I
+               mov     ar.ec = 7               C                       I0
+       ;;
+.mmi;          nop     4711
+               mov     r44 = 0                 C                       M I
+               mov     ar.lc = r20             C                       I0
+       ;;
+.mii;          mov     r33 = 0
+               mov     r10 = pr                C                       I0
+               mov     pr.rot = 0x30000        C                       I0
+       ;;
+               br.cexit.spnt.few.clr   L(end)
+
+dnl *** MAIN LOOP START ***
+       ALIGN(32)
+L(top):
+.mfi;  (p18)   ldf8    f33 = [up], 8           C                       M
+       (p20)   xma.l   f36 = f35, f35, f42     C                       F
+       (p41)   cmpequc p50, p0 = -1, r44       C                       M I
+.mfi;          setfsig f40 = r16               C                       M23
+       (p20)   xma.hu  f38 = f35, f35, f42     C                       F
+       (p23)   add     r50 = r41, r49          C                       M I
+       ;;
+.mmi;  (p16)   ld8     r36 = [tp], 8           C                       M
+       (p23)   cmpltu  p40, p0 = r50, r41      C cyout hi              M I
+       (p19)   shrp    r45 = r38, r35, 63      C non-critical          I0
+.mmi;  (p21)   getfsig r39 = f39               C hi                    M2
+       (p24)   st8     [rp] = r51, 8           C hi                    M23
+       (p41)   add     r44 = 1, r44            C                       M I
+       ;;
+.mmi;  (p16)   ld8     r32 = [tp], 8           C                       M
+       (p50)   cmpeqor p40, p0 = -1, r50       C cyout hi              M I
+       (p17)   shrp    r16 = r33, r37, 63      C critical              I0
+.mmi;  (p21)   getfsig r42 = f37               C lo                    M2
+       (p23)   st8     [rp] = r44, 8           C lo                    M23
+       (p50)   add     r50 = 1, r50            C                       M I
+       ;;
+               br.ctop.sptk.few.clr L(top)     C                       B
+dnl *** MAIN LOOP END ***
+       ;;
+L(end):
+.mmi;          nop     4711
+       (p41)   add     r44 = 1, r44            C                       M I
+               shr.u   r48 = r39, 63           C                       I0
+       ;;
+.mmi;          st8     [rp] = r51, 8           C                       M23
+       (p41)   cmpequc p6, p0 = 0, r44         C                       M I
+               add     r50 = r41, r48          C                       M I
+       ;;
+.mmi;          st8     [rp] = r44, 8           C                       M23
+       (p6)    add     r50 = 1, r50            C                       M I
+               mov     ar.lc = r3              C                       I0
+       ;;
+.mii;          st8     [rp] = r50              C                       M23
+               mov     ar.ec = r9              C                       I0
+               mov     pr = r10                C                       I0
+       ;;
+.mib;          nop     4711
+               mov     ar.pfs = r2             C                       I0
+               br.ret.sptk.many b0             C                       B
+EPILOGUE()
diff --git a/mpn/ia64/sqr_diagonal.asm b/mpn/ia64/sqr_diagonal.asm

deleted file mode 100644 (file)

index 50307d4..0000000
--- a/mpn/ia64/sqr_diagonal.asm
+++ /dev/null
@@ -1,79 +0,0 @@
-dnl  IA-64 mpn_sqr_diagonal.  Helper for sqr_basecase.
-
-dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C         cycles/limb
-C Itanium:    4
-C Itanium 2:  2
-
-C TODO
-C  * Perhaps avoid ctop loop.  Unfortunately, a cloop loop running at 1 c/l
-C    would need prohibitive 8-way unrolling.
-C  * Instead of messing too much with this, write a nifty mpn_sqr_basecase.
-
-C INPUT PARAMETERS
-C rp = r32
-C sp = r33
-C n = r34
-
-ASM_START()
-PROLOGUE(mpn_sqr_diagonal)
-       .prologue
-       .save   ar.lc, r2
-       .save   pr, r15
-       .body
-ifdef(`HAVE_ABI_32',
-`      addp4   r32 = 0, r32
-       addp4   r33 = 0, r33
-       zxt4    r34 = r34
-       ;;
-')
-       ldf8            f32 = [r33], 8          C M     load rp[0] early
-       mov             r2 = ar.lc              C I0
-       mov             r14 = ar.ec             C I0
-       mov             r15 = pr                C I0
-       add             r19 = -1, r34           C M I   decr n
-       add             r18 = 8, r32            C M I   rp for high limb
-       ;;
-       mov             ar.lc = r19             C I0
-       mov             ar.ec = 5               C I0
-       mov             pr.rot = 1<<16          C I0
-       ;;
-       br.cexit.spnt   .Ldone                  C B
-       ;;
-       ALIGN(32)
-.Loop:
-  (p16)        ldf8            f32 = [r33], 8          C M
-  (p19)        xma.l           f36 = f35, f35, f0      C F
-  (p21)        stf8            [r32] = f38, 16         C M2 M3
-  (p19)        xma.hu          f40 = f35, f35, f0      C F
-  (p21)        stf8            [r18] = f42, 16         C M2 M3
-       br.ctop.dptk    .Loop                   C B
-       ;;
-.Ldone:
-       stf8            [r32] = f38             C M2 M3
-       stf8            [r18] = f42             C M2 M3
-       mov             ar.ec = r14             C I0
-       ;;
-       mov             pr = r15, 0x1ffff       C I0
-       mov             ar.lc = r2              C I0
-       br.ret.sptk.many b0                     C B
-EPILOGUE(mpn_sqr_diagonal)
-ASM_END()
diff --git a/mpn/ia64/submul_1.asm b/mpn/ia64/submul_1.asm

index ae46e55d7514747a5ed7ebcee7dc764e22e06aa2..2cb7c680aaf91d59f7ee9ff53abd4c4f8010d74a 100644 (file)
--- a/mpn/ia64/submul_1.asm
+++ b/mpn/ia64/submul_1.asm
@@ -1,6 +1,8 @@
  dnl  IA-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
  dnl  result from a second limb vector.
  
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
  dnl  Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
diff --git a/mpn/ia64/tabselect.asm b/mpn/ia64/tabselect.asm

new file mode 100644 (file)

index 0000000..cc5b49b
--- /dev/null
+++ b/mpn/ia64/tabselect.asm
@@ -0,0 +1,139 @@
+dnl  IA-64 mpn_tabselect.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:       ?
+C Itanium 2:     2.5
+
+C NOTES
+C  * Using software pipelining could trivially yield 2 c/l without unrolling,
+C    or 1+epsilon with unrolling.  (This code was modelled after the powerpc64
+C    code, for simplicity.)
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp',     `r32')
+define(`tp',     `r33')
+define(`n',      `r34')
+define(`nents',  `r35')
+define(`which',  `r36')
+
+define(`mask',   `r8')
+
+define(`rp1',     `r32')
+define(`tp1',     `r33')
+define(`rp2',     `r14')
+define(`tp2',     `r15')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_tabselect)
+       .prologue
+       .save   ar.lc, r2
+       .body
+ifdef(`HAVE_ABI_32',`
+.mmi;  addp4   rp = 0, rp              C                       M I
+       addp4   tp = 0, tp              C                       M I
+       zxt4    n = n                   C                       I
+.mii;  nop     0
+       zxt4    nents = nents           C                       I
+       zxt4    which = which           C                       I
+       ;;
+')
+.mmi;  add     rp2 = 8, rp1
+       add     tp2 = 8, tp1
+       add     r6 = -2, n
+       ;;
+.mmi;  cmp.eq  p10, p0 = 1, n
+       and     r9 = 1, n               C set cr0 for use in inner loop
+       shr.u   r6 = r6, 1              C inner loop count
+       ;;
+.mmi;  cmp.eq  p8, p0 = 0, r9
+       sub     which = nents, which
+       shl     n = n, 3
+       ;;
+
+L(outer):
+.mmi   cmp.eq  p6, p7 = which, nents   C are we at the selected table entry?
+       nop     0
+       mov     ar.lc = r6              C                       I0
+       ;;
+.mmb;
+  (p6) mov     mask = -1
+  (p7) mov     mask = 0
+  (p8) br.dptk L(top)                  C branch to loop entry if n even
+       ;;
+
+.mmi;  ld8     r16 = [tp1], 8
+       add     tp2 = 8, tp2
+       nop     0
+       ;;
+.mmi;  ld8     r18 = [rp1]
+       and     r16 = r16, mask
+       nop     0
+       ;;
+.mmi;  andcm   r18 = r18, mask
+       ;;
+       or      r16 = r16, r18
+       nop     0
+       ;;
+.mmb;  st8     [rp1] = r16, 8
+       add     rp2 = 8, rp2
+  (p10)        br.dpnt L(end)
+
+       ALIGN(32)
+L(top):
+.mmi;  ld8     r16 = [tp1], 16
+       ld8     r17 = [tp2], 16
+       nop     0
+       ;;
+.mmi;  ld8     r18 = [rp1]
+       and     r16 = r16, mask
+       nop     0
+.mmi;  ld8     r19 = [rp2]
+       and     r17 = r17, mask
+       nop     0
+       ;;
+.mmi;  andcm   r18 = r18, mask
+       andcm   r19 = r19, mask
+       nop     0
+       ;;
+.mmi;  or      r16 = r16, r18
+       or      r17 = r17, r19
+       nop     0
+       ;;
+.mmb;  st8     [rp1] = r16, 16
+       st8     [rp2] = r17, 16
+       br.cloop.dptk   L(top)
+       ;;
+L(end):
+.mmi;  sub     rp1 = rp1, n            C move rp back to beginning
+       sub     rp2 = rp2, n            C move rp back to beginning
+       cmp.ne  p9, p0 = 1, nents
+.mmb;  add     nents = -1, nents
+       nop     0
+  (p9) br.dptk L(outer)
+       ;;
+
+.mib;  nop     0
+       nop     0
+       br.ret.sptk.many b0
+EPILOGUE()
diff --git a/mpn/m68k/mc68020/aorsmul_1.asm b/mpn/m68k/mc68020/aorsmul_1.asm

index 17866602f83452870efc0302b3f8f91065ebeb6c..521c36c0c26126212a31f2f084cab594c2fd4237 100644 (file)
--- a/mpn/m68k/mc68020/aorsmul_1.asm
+++ b/mpn/m68k/mc68020/aorsmul_1.asm
@@ -1,19 +1,19 @@
  dnl  mc68020 mpn_addmul_1, mpn_submul_1 -- add or subtract mpn multiple.
  
-dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002, 2011 Free Software
  dnl  Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
  dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
  dnl
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
@@ -45,6 +45,7 @@ define(s1_ptr,  `a1')
  define(s1_size, `d2')
  define(s2_limb, `d4')
  
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
  
  PROLOGUE(M4_function_1)
  
diff --git a/mpn/minithres/gmp-mparam.h b/mpn/minithres/gmp-mparam.h

index 47f0f6c7479dcc0169c7e5e4aca2a43d11d0ac99..9f22ed9d24a274b82bf931b7b050d693e3f44257 100644 (file)
--- a/mpn/minithres/gmp-mparam.h
+++ b/mpn/minithres/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* Minimal values gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright 1991, 1993, 1994, 2000, 2006, 2008, 2009, 2010 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2006, 2008, 2009, 2010, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,19 +23,19 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1_1_THRESHOLD                    2
-#define MOD_1_2_THRESHOLD                    3
-#define MOD_1_4_THRESHOLD                    4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         3
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         4
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      1
  #define USE_PREINV_DIVREM_1                  1  /* native */
-#define USE_PREINV_MOD_1                     1
-#define DIVREM_2_THRESHOLD                   0  /* always */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define MODEXACT_1_ODD_THRESHOLD             0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD            3
  
  #define MUL_TOOM22_THRESHOLD                 8
  #define MUL_TOOM33_THRESHOLD                20
  #define MUL_TOOM44_THRESHOLD                24
-#define MUL_TOOM6H_THRESHOLD               200 /* FIXME */
+#define MUL_TOOM6H_THRESHOLD                70 /* FIXME */
  #define MUL_TOOM8H_THRESHOLD                86
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD      50 /* FIXME */
@@ -47,7 +47,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define SQR_TOOM2_THRESHOLD                  8
  #define SQR_TOOM3_THRESHOLD                 20
  #define SQR_TOOM4_THRESHOLD                 24
-#define SQR_TOOM6H_THRESHOLD               200 /* FIXME */
+#define SQR_TOOM6H_THRESHOLD                70 /* FIXME */
  #define SQR_TOOM8H_THRESHOLD                86
  
  #define MULMOD_BNM1_THRESHOLD            10
@@ -75,7 +75,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define INV_APPR_THRESHOLD                   4
  
  #define BINV_NEWTON_THRESHOLD                6
-#define REDC_1_TO_REDC_N_THRESHOLD           4
+#define REDC_1_TO_REDC_N_THRESHOLD           9
  
  #define MU_DIV_QR_THRESHOLD                  8
  #define MU_DIVAPPR_Q_THRESHOLD               8
@@ -93,3 +93,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define GET_STR_PRECOMPUTE_THRESHOLD        10
  #define SET_STR_THRESHOLD                   64
  #define SET_STR_PRECOMPUTE_THRESHOLD       100
+
+#define FAC_ODD_THRESHOLD                    0  /* always */
+#define FAC_DSC_THRESHOLD                   70
diff --git a/mpn/mips64/add_n.asm b/mpn/mips64/add_n.asm

index 1a3978c3f95a7f393892b5b770328f3c95746f9a..d6cdf9370544cfbc9e45c3055a9fd1a9bb8a5cf3 100644 (file)
--- a/mpn/mips64/add_n.asm
+++ b/mpn/mips64/add_n.asm
@@ -1,7 +1,7 @@
  dnl  MIPS64 mpn_add_n -- Add two limb vectors of the same length > 0 and store
  dnl  sum in a third limb vector.
  
-dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2001, 2002, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -27,6 +27,17 @@ C s2_ptr     $6
  C size         $7
  
  ASM_START()
+PROLOGUE(mpn_add_nc)
+       ld      $10,0($5)
+       ld      $11,0($6)
+
+       daddiu  $7,$7,-1
+       and     $9,$7,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        move   $2,$8
+       b       .Loop0
+        dsubu  $7,$7,$9
+EPILOGUE()
  PROLOGUE(mpn_add_n)
         ld      $10,0($5)
         ld      $11,0($6)
@@ -109,4 +120,4 @@ PROLOGUE(mpn_add_n)
         sd      $11,0($4)
         j       $31
         or      $2,$2,$8
-EPILOGUE(mpn_add_n)
+EPILOGUE()
diff --git a/mpn/mips64/sub_n.asm b/mpn/mips64/sub_n.asm

index b28c1ced9c1e2e22649baacd134d81518228948c..1419cbfd1dde2c5c4bd0fb8547a90b9088ba7149 100644 (file)
--- a/mpn/mips64/sub_n.asm
+++ b/mpn/mips64/sub_n.asm
@@ -1,7 +1,7 @@
  dnl  MIPS64 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
  dnl  store difference in a third limb vector.
  
-dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2001, 2002, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -27,6 +27,17 @@ C s2_ptr     $6
  C size         $7
  
  ASM_START()
+PROLOGUE(mpn_sub_nc)
+       ld      $10,0($5)
+       ld      $11,0($6)
+
+       daddiu  $7,$7,-1
+       and     $9,$7,4-1       C number of limbs in first loop
+       beq     $9,$0,.L0       C if multiple of 4 limbs, skip first loop
+        move   $2,$8
+       b       .Loop0
+        dsubu  $7,$7,$9
+EPILOGUE()
  PROLOGUE(mpn_sub_n)
         ld      $10,0($5)
         ld      $11,0($6)
@@ -109,4 +120,4 @@ PROLOGUE(mpn_sub_n)
         sd      $11,0($4)
         j       $31
         or      $2,$2,$8
-EPILOGUE(mpn_sub_n)
+EPILOGUE()
diff --git a/mpn/pa32/hppa2_0/gmp-mparam.h b/mpn/pa32/hppa2_0/gmp-mparam.h

index 44543c74898123eba28bb0f8a5f7f8abb65b5add..d25a84bc0c4495d128a9d835eb6700dc65ee571e 100644 (file)
--- a/mpn/pa32/hppa2_0/gmp-mparam.h
+++ b/mpn/pa32/hppa2_0/gmp-mparam.h
@@ -24,36 +24,35 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  /* 552 MHz PA8600 (gcc61.fsffrance.org) */
  
  #define DIVREM_1_NORM_THRESHOLD              3
-#define DIVREM_1_UNNORM_THRESHOLD            4
+#define DIVREM_1_UNNORM_THRESHOLD            3
  #define MOD_1_NORM_THRESHOLD                 3
  #define MOD_1_UNNORM_THRESHOLD               4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         14
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         11
  #define MOD_1U_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        18
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     22
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     28
  #define USE_PREINV_DIVREM_1                  1
-#define DIVREM_2_THRESHOLD                   0  /* always */
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           31
+#define BMOD_1_TO_MOD_1_THRESHOLD           36
  
-#define MUL_TOOM22_THRESHOLD                15
-#define MUL_TOOM33_THRESHOLD                91
-#define MUL_TOOM44_THRESHOLD               154
-#define MUL_TOOM6H_THRESHOLD               204
-#define MUL_TOOM8H_THRESHOLD               482
+#define MUL_TOOM22_THRESHOLD                18
+#define MUL_TOOM33_THRESHOLD                65
+#define MUL_TOOM44_THRESHOLD               166
+#define MUL_TOOM6H_THRESHOLD               202
+#define MUL_TOOM8H_THRESHOLD               333
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     103
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     109
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     103
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     105
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     138
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     102
  
-#define SQR_BASECASE_THRESHOLD               6
-#define SQR_TOOM2_THRESHOLD                 47
+#define SQR_BASECASE_THRESHOLD               7
+#define SQR_TOOM2_THRESHOLD                 55
  #define SQR_TOOM3_THRESHOLD                 93
  #define SQR_TOOM4_THRESHOLD                250
-#define SQR_TOOM6_THRESHOLD                278
-#define SQR_TOOM8_THRESHOLD                502
+#define SQR_TOOM6_THRESHOLD                306
+#define SQR_TOOM8_THRESHOLD                527
  
  #define MULMOD_BNM1_THRESHOLD               13
  #define SQRMOD_BNM1_THRESHOLD               15
@@ -124,34 +123,34 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define SQR_FFT_THRESHOLD                 1600
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  90
-#define MULLO_MUL_N_THRESHOLD             4167
+#define MULLO_DC_THRESHOLD                 116
+#define MULLO_MUL_N_THRESHOLD             3574
  
  #define DC_DIV_QR_THRESHOLD                100
-#define DC_DIVAPPR_Q_THRESHOLD             342
-#define DC_BDIV_QR_THRESHOLD               119
-#define DC_BDIV_Q_THRESHOLD                246
+#define DC_DIVAPPR_Q_THRESHOLD             348
+#define DC_BDIV_QR_THRESHOLD               109
+#define DC_BDIV_Q_THRESHOLD                254
  
-#define INV_MULMOD_BNM1_THRESHOLD           12
-#define INV_NEWTON_THRESHOLD               274
-#define INV_APPR_THRESHOLD                 268
+#define INV_MULMOD_BNM1_THRESHOLD           34
+#define INV_NEWTON_THRESHOLD               276
+#define INV_APPR_THRESHOLD                 276
  
-#define BINV_NEWTON_THRESHOLD              327
-#define REDC_1_TO_REDC_N_THRESHOLD          70
+#define BINV_NEWTON_THRESHOLD              278
+#define REDC_1_TO_REDC_N_THRESHOLD          78
  
  #define MU_DIV_QR_THRESHOLD                979
-#define MU_DIVAPPR_Q_THRESHOLD            1142
-#define MUPI_DIV_QR_THRESHOLD              100
-#define MU_BDIV_QR_THRESHOLD               667
+#define MU_DIVAPPR_Q_THRESHOLD             263
+#define MUPI_DIV_QR_THRESHOLD              102
+#define MU_BDIV_QR_THRESHOLD               807
  #define MU_BDIV_Q_THRESHOLD               1187
  
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                      99
-#define GCD_DC_THRESHOLD                   372
-#define GCDEXT_DC_THRESHOLD                241
+#define MATRIX22_STRASSEN_THRESHOLD         11
+#define HGCD_THRESHOLD                     100
+#define GCD_DC_THRESHOLD                   379
+#define GCDEXT_DC_THRESHOLD                249
  #define JACOBI_BASE_METHOD                   2
  
  #define GET_STR_DC_THRESHOLD                 7
-#define GET_STR_PRECOMPUTE_THRESHOLD        14
-#define SET_STR_DC_THRESHOLD               224
-#define SET_STR_PRECOMPUTE_THRESHOLD       788
+#define GET_STR_PRECOMPUTE_THRESHOLD        16
+#define SET_STR_DC_THRESHOLD               270
+#define SET_STR_PRECOMPUTE_THRESHOLD       782
diff --git a/mpn/pa64/gmp-mparam.h b/mpn/pa64/gmp-mparam.h

index 428615ffebcb85ab4dbe3877572ac2b8e4b35799..081757acadd4e3c565661784be7343178cdc7d89 100644 (file)
--- a/mpn/pa64/gmp-mparam.h
+++ b/mpn/pa64/gmp-mparam.h
@@ -25,38 +25,43 @@ with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define DIVREM_1_NORM_THRESHOLD              0  /* always */
  #define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      2
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         10
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
  #define USE_PREINV_DIVREM_1                  1
-#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIV_QR_2_PI2_THRESHOLD              21
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
  #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
  
-#define MUL_TOOM22_THRESHOLD                30
-#define MUL_TOOM33_THRESHOLD               113
-#define MUL_TOOM44_THRESHOLD               195
+#define MUL_TOOM22_THRESHOLD                31
+#define MUL_TOOM33_THRESHOLD               114
+#define MUL_TOOM44_THRESHOLD               179
  #define MUL_TOOM6H_THRESHOLD               222
-#define MUL_TOOM8H_THRESHOLD               236
+#define MUL_TOOM8H_THRESHOLD               296
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD     130
  #define MUL_TOOM32_TO_TOOM53_THRESHOLD     229
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     132
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     129
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD      54
  
-#define SQR_BASECASE_THRESHOLD               4
-#define SQR_TOOM2_THRESHOLD                 54
-#define SQR_TOOM3_THRESHOLD                169
-#define SQR_TOOM4_THRESHOLD                280
-#define SQR_TOOM6_THRESHOLD                280
-#define SQR_TOOM8_THRESHOLD                296
+#define SQR_BASECASE_THRESHOLD               5
+#define SQR_TOOM2_THRESHOLD                 58
+#define SQR_TOOM3_THRESHOLD                153
+#define SQR_TOOM4_THRESHOLD                278
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
+#define SQR_TOOM8_THRESHOLD                  0  /* always */
+
+#define MULMID_TOOM42_THRESHOLD             56
  
  #define MULMOD_BNM1_THRESHOLD               15
-#define SQRMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               19
+
+#define POWM_SEC_TABLE  2,23,228,1084
  
  #define MUL_FFT_MODF_THRESHOLD             336  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
@@ -197,31 +202,33 @@ with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define SQR_FFT_THRESHOLD                 1856
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 125
+#define MULLO_DC_THRESHOLD                 113
  #define MULLO_MUL_N_THRESHOLD             4658
  
  #define DC_DIV_QR_THRESHOLD                123
  #define DC_DIVAPPR_Q_THRESHOLD             372
  #define DC_BDIV_QR_THRESHOLD               142
-#define DC_BDIV_Q_THRESHOLD                309
+#define DC_BDIV_Q_THRESHOLD                312
  
-#define INV_MULMOD_BNM1_THRESHOLD           56
+#define INV_MULMOD_BNM1_THRESHOLD           58
  #define INV_NEWTON_THRESHOLD               315
-#define INV_APPR_THRESHOLD                 318
+#define INV_APPR_THRESHOLD                 315
  
-#define BINV_NEWTON_THRESHOLD              363
-#define REDC_1_TO_REDC_N_THRESHOLD         102
+#define BINV_NEWTON_THRESHOLD              360
+#define REDC_1_TO_REDC_N_THRESHOLD         101
  
  #define MU_DIV_QR_THRESHOLD                979
-#define MU_DIVAPPR_Q_THRESHOLD             998
-#define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD               942
-#define MU_BDIV_Q_THRESHOLD               1334
+#define MU_DIVAPPR_Q_THRESHOLD            1142
+#define MUPI_DIV_QR_THRESHOLD               93
+#define MU_BDIV_QR_THRESHOLD               889
+#define MU_BDIV_Q_THRESHOLD               1187
  
  #define MATRIX22_STRASSEN_THRESHOLD          9
-#define HGCD_THRESHOLD                     240
-#define GCD_DC_THRESHOLD                   689
-#define GCDEXT_DC_THRESHOLD                538
+#define HGCD_THRESHOLD                     234
+#define HGCD_APPR_THRESHOLD                300
+#define HGCD_REDUCE_THRESHOLD             1553
+#define GCD_DC_THRESHOLD                   684
+#define GCDEXT_DC_THRESHOLD                525
  #define JACOBI_BASE_METHOD                   2
  
  #define GET_STR_DC_THRESHOLD                21
diff --git a/mpn/powerpc32/750/gmp-mparam.h b/mpn/powerpc32/750/gmp-mparam.h

index 8e460423e1fea0672f54c4ab9e4426d5d42c390a..fca0fec1b56e5541576ccba4f8efb0cb1f92b307 100644 (file)
--- a/mpn/powerpc32/750/gmp-mparam.h
+++ b/mpn/powerpc32/750/gmp-mparam.h
@@ -156,7 +156,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define DC_BDIV_QR_THRESHOLD                35
  #define DC_BDIV_Q_THRESHOLD                 88
  
-#define INV_MULMOD_BNM1_THRESHOLD           76
+#define INV_MULMOD_BNM1_THRESHOLD           42
  #define INV_NEWTON_THRESHOLD               149
  #define INV_APPR_THRESHOLD                 125
  
diff --git a/mpn/powerpc32/aors_n.asm b/mpn/powerpc32/aors_n.asm

index f9e9b50d522ac0738322ff8eb717a09094585011..12115a9e9d5362b678b855f3b885fe267a7fcfb6 100644 (file)
--- a/mpn/powerpc32/aors_n.asm
+++ b/mpn/powerpc32/aors_n.asm
@@ -19,14 +19,17 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C                cycles/limb
-C 603e:              ?
-C 604e:              ?         old: 3.25
-C 75x (G3):          ?         old: 3.5
-C 7400,7410 (G4):    3.25
-C 744x,745x (G4+):   4
-C power4/ppc970:     ?         old: 2.0
-C power5:            ?         old: 2.5
+C                   cycles/limb
+C 603e:                  ?
+C 604e:                  ?             old: 3.25
+C 75x (G3):              ?             old: 3.5
+C 7400,7410 (G4):        3.25
+C 744x,745x (G4+):       4
+C POWER3/PPC630          2
+C POWER4/PPC970          2.4
+C POWER5                 2.75
+C POWER6               40-140
+C POWER7                 3
  
  C INPUT PARAMETERS
  define(`rp',   `r3')
diff --git a/mpn/powerpc32/gmp-mparam.h b/mpn/powerpc32/gmp-mparam.h

index 7502c51b17d3c7f28ba20f90b1d9db2535c48fc0..b478a471241b2e5b9a38f3754f2618ac518716c8 100644 (file)
--- a/mpn/powerpc32/gmp-mparam.h
+++ b/mpn/powerpc32/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
-2010, 2012 Free Software Foundation, Inc.
+2010 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -34,16 +34,18 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define DIVREM_1_NORM_THRESHOLD              0  /* always */
  #define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      1
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        36
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     37
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        49
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     18
  #define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           69
+#define BMOD_1_TO_MOD_1_THRESHOLD           66
  
  #define MUL_TOOM22_THRESHOLD                14
  #define MUL_TOOM33_THRESHOLD                73
@@ -52,19 +54,22 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MUL_TOOM8H_THRESHOLD               236
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      71
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      72
  #define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD      72
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      82
  
-#define SQR_BASECASE_THRESHOLD               0  /* always */
-#define SQR_TOOM2_THRESHOLD                 24
+#define SQR_BASECASE_THRESHOLD               4
+#define SQR_TOOM2_THRESHOLD                 26
  #define SQR_TOOM3_THRESHOLD                 77
-#define SQR_TOOM4_THRESHOLD                130
+#define SQR_TOOM4_THRESHOLD                136
  #define SQR_TOOM6_THRESHOLD                189
  #define SQR_TOOM8_THRESHOLD                284
  
-#define MULMOD_BNM1_THRESHOLD               10
-#define SQRMOD_BNM1_THRESHOLD               13
+#define MULMID_TOOM42_THRESHOLD             32
+
+#define MULMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD               14
  
  #define MUL_FFT_MODF_THRESHOLD             284  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
@@ -111,9 +116,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      {   1535,12}, {   3071,13}, {   1919,12}, {   3839,15}, \
      {  32768,16} }
  #define MUL_FFT_TABLE3_SIZE 165
-#define MUL_FFT_THRESHOLD                 3712
+#define MUL_FFT_THRESHOLD                 3392
  
-#define SQR_FFT_MODF_THRESHOLD             248  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             236  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
    { {    248, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
      {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
@@ -155,37 +160,44 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      {    767,13}, {   1535,12}, {   3199,13}, {   1919,15}, \
      {  32768,16} }
  #define SQR_FFT_TABLE3_SIZE 153
-#define SQR_FFT_THRESHOLD                 2688
+#define SQR_FFT_THRESHOLD                 2368
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
  #define MULLO_DC_THRESHOLD                  45
  #define MULLO_MUL_N_THRESHOLD             6633
  
  #define DC_DIV_QR_THRESHOLD                 43
-#define DC_DIVAPPR_Q_THRESHOLD             154
-#define DC_BDIV_QR_THRESHOLD                55
+#define DC_DIVAPPR_Q_THRESHOLD             153
+#define DC_BDIV_QR_THRESHOLD                54
  #define DC_BDIV_Q_THRESHOLD                124
  
  #define INV_MULMOD_BNM1_THRESHOLD           42
  #define INV_NEWTON_THRESHOLD               179
  #define INV_APPR_THRESHOLD                 157
  
-#define BINV_NEWTON_THRESHOLD              232
+#define BINV_NEWTON_THRESHOLD              204
  #define REDC_1_TO_REDC_N_THRESHOLD          54
  
-#define MU_DIV_QR_THRESHOLD               1057
-#define MU_DIVAPPR_Q_THRESHOLD            1142
-#define MUPI_DIV_QR_THRESHOLD               83
-#define MU_BDIV_QR_THRESHOLD               872
-#define MU_BDIV_Q_THRESHOLD               1142
+#define MU_DIV_QR_THRESHOLD                998
+#define MU_DIVAPPR_Q_THRESHOLD            1037
+#define MUPI_DIV_QR_THRESHOLD               84
+#define MU_BDIV_QR_THRESHOLD               748
+#define MU_BDIV_Q_THRESHOLD                942
+
+#define POWM_SEC_TABLE  4,23,164,616,1812
  
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     122
+#define MATRIX22_STRASSEN_THRESHOLD         11
+#define HGCD_THRESHOLD                     118
+#define HGCD_APPR_THRESHOLD                167
+#define HGCD_REDUCE_THRESHOLD             1679
  #define GCD_DC_THRESHOLD                   339
-#define GCDEXT_DC_THRESHOLD                278
-#define JACOBI_BASE_METHOD                   1
+#define GCDEXT_DC_THRESHOLD                273
+#define JACOBI_BASE_METHOD                   4
  
-#define GET_STR_DC_THRESHOLD                17
-#define GET_STR_PRECOMPUTE_THRESHOLD        38
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        27
  #define SET_STR_DC_THRESHOLD               781
  #define SET_STR_PRECOMPUTE_THRESHOLD      1505
+
+#define FAC_DSC_THRESHOLD                  141
+#define FAC_ODD_THRESHOLD                   34
diff --git a/mpn/powerpc32/invert_limb.asm b/mpn/powerpc32/invert_limb.asm

new file mode 100644 (file)

index 0000000..84c5de3
--- /dev/null
+++ b/mpn/powerpc32/invert_limb.asm
@@ -0,0 +1,131 @@
+dnl  PowerPC-32 mpn_invert_limb -- Invert a normalized limb.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               cycles/limb
+C 603e:                      ?
+C 604e:                      ?
+C 75x (G3):          ?
+C 7400,7410 (G4):     ?
+C 744x,745x (G4+):   32
+C power4/ppc970:      ?
+C power5:            ?
+
+EXTERN(approx_tab)
+
+ASM_START()
+PROLOGUE(mpn_invert_limb)
+       rlwinm  r6, r3, 11, 22, 30      C extract bits 30..22 to pos 2^1
+       srwi    r10, r3, 11             C extract bits 31..11
+       LEA(    r9, approx_tab)         C N.B. clobbers r0 for ELF and Darwin
+       lhzx    r9, r9, r6              C w2
+       addi    r0, r10, 1
+       mullw   r11, r9, r9
+       slwi    r9, r9, 4
+       mulhwu  r7, r11, r0
+       rlwinm  r11, r3, 0, 31, 31      C extract bit 0
+       addi    r0, r9, -1
+       srwi    r9, r3, 1               C d >> 1
+       subf    r0, r7, r0              C w1
+       add     r9, r9, r11             C d31
+       mullw   r9, r0, r9              C w1 * d31
+       srwi    r10, r0, 1              C w1 >> 1
+       neg     r11, r11
+       and     r11, r10, r11
+       subf    r11, r9, r11
+       mulhwu  r9, r11, r0
+       slwi    r0, r0, 15
+       srwi    r9, r9, 1
+       add     r0, r9, r0              C w0
+       mullw   r10, r0, r3
+       mulhwu  r9, r0, r3
+       addc    r11, r10, r3
+       adde    r3, r9, r3
+       subf    r3, r3, r0
+       blr
+EPILOGUE()
+
+DEF_OBJECT(approx_tab)
+       .short 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
+       .short 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
+       .short 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
+       .short 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
+       .short 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
+       .short 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
+       .short 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
+       .short 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
+       .short 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
+       .short 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
+       .short 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
+       .short 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
+       .short 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
+       .short 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
+       .short 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
+       .short 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
+       .short 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
+       .short 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
+       .short 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
+       .short 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
+       .short 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
+       .short 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
+       .short 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
+       .short 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
+       .short 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
+       .short 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
+       .short 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
+       .short 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
+       .short 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
+       .short 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
+       .short 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
+       .short 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
+       .short 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
+       .short 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
+       .short 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
+       .short 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
+       .short 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
+       .short 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
+       .short 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
+       .short 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
+       .short 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
+       .short 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
+       .short 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
+       .short 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
+       .short 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
+       .short 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
+       .short 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
+       .short 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
+       .short 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
+       .short 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
+       .short 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
+       .short 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
+       .short 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
+       .short 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
+       .short 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
+       .short 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
+       .short 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
+       .short 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
+       .short 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
+       .short 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
+       .short 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
+       .short 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
+       .short 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
+       .short 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
+END_OBJECT(approx_tab)
+ASM_END()
diff --git a/mpn/powerpc32/lshift.asm b/mpn/powerpc32/lshift.asm

index e3061731460578a5431d3c0418b6cfc692d7f31a..00dcc7029b2c2a689959c2ef54e971f5b7268ed9 100644 (file)
--- a/mpn/powerpc32/lshift.asm
+++ b/mpn/powerpc32/lshift.asm
@@ -38,7 +38,7 @@ C cnt r6
  
  ASM_START()
  PROLOGUE(mpn_lshift)
-       cmpwi   cr0, r5, 12     C more than 12 limbs?
+       cmpwi   cr0, r5, 30     C more than 30 limbs?
         slwi    r0, r5, 2
         add     r4, r4, r0      C make r4 point at end of s1
         add     r7, r3, r0      C make r7 point at end of res
@@ -153,4 +153,4 @@ L(loopU):
         stw     r12, -20(r7)
         lmw     r24, -32(r1)    C restore registers
         blr
-EPILOGUE(mpn_lshift)
+EPILOGUE()
diff --git a/mpn/powerpc32/lshiftc.asm b/mpn/powerpc32/lshiftc.asm

new file mode 100644 (file)

index 0000000..3d7a82a
--- /dev/null
+++ b/mpn/powerpc32/lshiftc.asm
@@ -0,0 +1,158 @@
+dnl  PowerPC-32 mpn_lshiftc.
+
+dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            3.0
+C 75x (G3):        3.0
+C 7400,7410 (G4):  3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970:   2.5
+C power5:          2.5
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C n    r5
+C cnt  r6
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+       cmpwi   cr0, r5, 30     C more than 30 limbs?
+       slwi    r0, r5, 2
+       add     r4, r4, r0      C make r4 point at end of s1
+       add     r7, r3, r0      C make r7 point at end of res
+       bgt     L(BIG)          C branch if more than 12 limbs
+
+       mtctr   r5              C copy size into CTR
+       subfic  r8, r6, 32
+       lwzu    r11, -4(r4)     C load first s1 limb
+       srw     r3, r11, r8     C compute function return value
+       bdz     L(end1)
+
+L(oop):        lwzu    r10, -4(r4)
+       slw     r9, r11, r6
+       srw     r12, r10, r8
+       nor     r9, r9, r12
+       stwu    r9, -4(r7)
+       bdz     L(end2)
+       lwzu    r11, -4(r4)
+       slw     r9, r10, r6
+       srw     r12, r11, r8
+       nor     r9, r9, r12
+       stwu    r9, -4(r7)
+       bdnz    L(oop)
+
+L(end1):
+       slw     r0, r11, r6
+       nor     r0, r0, r0
+       stw     r0, -4(r7)
+       blr
+L(end2):
+       slw     r0, r10, r6
+       nor     r0, r0, r0
+       stw     r0, -4(r7)
+       blr
+
+L(BIG):
+       stmw    r24, -32(r1)    C save registers we are supposed to preserve
+       lwzu    r9, -4(r4)
+       subfic  r8, r6, 32
+       srw     r3, r9, r8      C compute function return value
+       slw     r0, r9, r6
+       addi    r5, r5, -1
+
+       andi.   r10, r5, 3      C count for spill loop
+       beq     L(e)
+       mtctr   r10
+       lwzu    r28, -4(r4)
+       bdz     L(xe0)
+
+L(loop0):
+       slw     r12, r28, r6
+       srw     r24, r28, r8
+       lwzu    r28, -4(r4)
+       nor     r24, r0, r24
+       stwu    r24, -4(r7)
+       mr      r0, r12
+       bdnz    L(loop0)        C taken at most once!
+
+L(xe0):        slw     r12, r28, r6
+       srw     r24, r28, r8
+       nor     r24, r0, r24
+       stwu    r24, -4(r7)
+       mr      r0, r12
+
+L(e):  srwi    r5, r5, 2       C count for unrolled loop
+       addi    r5, r5, -1
+       mtctr   r5
+       lwz     r28, -4(r4)
+       lwz     r29, -8(r4)
+       lwz     r30, -12(r4)
+       lwzu    r31, -16(r4)
+
+L(loopU):
+       slw     r9, r28, r6
+       srw     r24, r28, r8
+       lwz     r28, -4(r4)
+       slw     r10, r29, r6
+       srw     r25, r29, r8
+       lwz     r29, -8(r4)
+       slw     r11, r30, r6
+       srw     r26, r30, r8
+       lwz     r30, -12(r4)
+       slw     r12, r31, r6
+       srw     r27, r31, r8
+       lwzu    r31, -16(r4)
+       nor     r24, r0, r24
+       stw     r24, -4(r7)
+       nor     r25, r9, r25
+       stw     r25, -8(r7)
+       nor     r26, r10, r26
+       stw     r26, -12(r7)
+       nor     r27, r11, r27
+       stwu    r27, -16(r7)
+       mr      r0, r12
+       bdnz    L(loopU)
+
+       slw     r9, r28, r6
+       srw     r24, r28, r8
+       slw     r10, r29, r6
+       srw     r25, r29, r8
+       slw     r11, r30, r6
+       srw     r26, r30, r8
+       slw     r12, r31, r6
+       srw     r27, r31, r8
+       nor     r24, r0, r24
+       stw     r24, -4(r7)
+       nor     r25, r9, r25
+       stw     r25, -8(r7)
+       nor     r26, r10, r26
+       stw     r26, -12(r7)
+       nor     r27, r11, r27
+       stw     r27, -16(r7)
+       nor     r12, r12, r12
+       stw     r12, -20(r7)
+       lmw     r24, -32(r1)    C restore registers
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc32/p3-p7/aors_n.asm b/mpn/powerpc32/p3-p7/aors_n.asm

new file mode 100644 (file)

index 0000000..6999182
--- /dev/null
+++ b/mpn/powerpc32/p3-p7/aors_n.asm
@@ -0,0 +1,176 @@
+dnl  PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630          1.5
+C POWER4/PPC970          2
+C POWER5                 2
+C POWER6                 2.78
+C POWER7               2.15-2.87
+
+C This code is based on powerpc64/aors_n.asm.
+
+C INPUT PARAMETERS
+C rp   r3
+C up   r4
+C vp   r5
+C n    r6
+
+ifdef(`OPERATION_add_n',`
+  define(ADDSUBC,      adde)
+  define(ADDSUB,       addc)
+  define(func,         mpn_add_n)
+  define(func_nc,      mpn_add_nc)
+  define(GENRVAL,      `addi   r3, r3, 1')
+  define(SETCBR,       `addic  r0, $1, -1')
+  define(CLRCB,                `addic  r0, r0, 0')
+')
+ifdef(`OPERATION_sub_n',`
+  define(ADDSUBC,      subfe)
+  define(ADDSUB,       subfc)
+  define(func,         mpn_sub_n)
+  define(func_nc,      mpn_sub_nc)
+  define(GENRVAL,      `neg    r3, r3')
+  define(SETCBR,       `subfic r0, $1, 0')
+  define(CLRCB,                `addic  r0, r1, -1')
+')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+       SETCBR(r7)
+       b       L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+       CLRCB
+L(ent):        stw     r31, -4(r1)
+       stw     r30, -8(r1)
+       stw     r29, -12(r1)
+       stw     r28, -16(r1)
+
+       rlwinm. r0, r6, 0,30,31 C r0 = n & 3, set cr0
+       cmpwi   cr6, r0, 2
+       addi    r6, r6, 3       C compute count...
+       srwi    r6, r6, 2       C ...for ctr
+       mtctr   r6              C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       beq     cr6, L(b10)
+
+L(b11):        lwz     r8, 0(r4)       C load s1 limb
+       lwz     r9, 0(r5)       C load s2 limb
+       lwz     r10, 4(r4)      C load s1 limb
+       lwz     r11, 4(r5)      C load s2 limb
+       lwz     r12, 8(r4)      C load s1 limb
+       addi    r4, r4, 12
+       lwz     r0, 8(r5)       C load s2 limb
+       addi    r5, r5, 12
+       ADDSUBC r29, r9, r8
+       ADDSUBC r30, r11, r10
+       ADDSUBC r31, r0, r12
+       stw     r29, 0(r3)
+       stw     r30, 4(r3)
+       stw     r31, 8(r3)
+       addi    r3, r3, 12
+       bdnz    L(go)
+       b       L(ret)
+
+L(b01):        lwz     r12, 0(r4)      C load s1 limb
+       addi    r4, r4, 4
+       lwz     r0, 0(r5)       C load s2 limb
+       addi    r5, r5, 4
+       ADDSUBC r31, r0, r12    C add
+       stw     r31, 0(r3)
+       addi    r3, r3, 4
+       bdnz    L(go)
+       b       L(ret)
+
+L(b10):        lwz     r10, 0(r4)      C load s1 limb
+       lwz     r11, 0(r5)      C load s2 limb
+       lwz     r12, 4(r4)      C load s1 limb
+       addi    r4, r4, 8
+       lwz     r0, 4(r5)       C load s2 limb
+       addi    r5, r5, 8
+       ADDSUBC r30, r11, r10   C add
+       ADDSUBC r31, r0, r12    C add
+       stw     r30, 0(r3)
+       stw     r31, 4(r3)
+       addi    r3, r3, 8
+       bdnz    L(go)
+       b       L(ret)
+
+L(b00):        C INITCY                C clear/set cy
+L(go): lwz     r6, 0(r4)       C load s1 limb
+       lwz     r7, 0(r5)       C load s2 limb
+       lwz     r8, 4(r4)       C load s1 limb
+       lwz     r9, 4(r5)       C load s2 limb
+       lwz     r10, 8(r4)      C load s1 limb
+       lwz     r11, 8(r5)      C load s2 limb
+       lwz     r12, 12(r4)     C load s1 limb
+       lwz     r0, 12(r5)      C load s2 limb
+       bdz     L(end)
+
+       addi    r4, r4, 16
+       addi    r5, r5, 16
+
+       ALIGN(16)
+L(top):        ADDSUBC r28, r7, r6
+       lwz     r6, 0(r4)       C load s1 limb
+       lwz     r7, 0(r5)       C load s2 limb
+       ADDSUBC r29, r9, r8
+       lwz     r8, 4(r4)       C load s1 limb
+       lwz     r9, 4(r5)       C load s2 limb
+       ADDSUBC r30, r11, r10
+       lwz     r10, 8(r4)      C load s1 limb
+       lwz     r11, 8(r5)      C load s2 limb
+       ADDSUBC r31, r0, r12
+       lwz     r12, 12(r4)     C load s1 limb
+       lwz     r0, 12(r5)      C load s2 limb
+       stw     r28, 0(r3)
+       addi    r4, r4, 16
+       stw     r29, 4(r3)
+       addi    r5, r5, 16
+       stw     r30, 8(r3)
+       stw     r31, 12(r3)
+       addi    r3, r3, 16
+       bdnz    L(top)          C decrement ctr and loop back
+
+L(end):        ADDSUBC r28, r7, r6
+       ADDSUBC r29, r9, r8
+       ADDSUBC r30, r11, r10
+       ADDSUBC r31, r0, r12
+       stw     r28, 0(r3)
+       stw     r29, 4(r3)
+       stw     r30, 8(r3)
+       stw     r31, 12(r3)
+
+L(ret):        lwz     r31, -4(r1)
+       lwz     r30, -8(r1)
+       lwz     r29, -12(r1)
+       lwz     r28, -16(r1)
+
+       subfe   r3, r0, r0      C -cy
+       GENRVAL
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc32/p3/gmp-mparam.h b/mpn/powerpc32/p3/gmp-mparam.h

new file mode 100644 (file)

index 0000000..3dd33ad
--- /dev/null
+++ b/mpn/powerpc32/p3/gmp-mparam.h
@@ -0,0 +1,145 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 450 MHz POWER3 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      2
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        18
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
+#define USE_PREINV_DIVREM_1                  1
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                10
+#define MUL_TOOM33_THRESHOLD                38
+#define MUL_TOOM44_THRESHOLD                58
+#define MUL_TOOM6H_THRESHOLD               129
+#define MUL_TOOM8H_THRESHOLD               212
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      63
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      59
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      64
+
+#define SQR_BASECASE_THRESHOLD               0  /* always */
+#define SQR_TOOM2_THRESHOLD                 14
+#define SQR_TOOM3_THRESHOLD                 53
+#define SQR_TOOM4_THRESHOLD                 76
+#define SQR_TOOM6_THRESHOLD                106
+#define SQR_TOOM8_THRESHOLD                284
+
+#define MULMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD                9
+
+#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    220, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {      9, 5}, {     19, 6}, {     13, 7}, {      7, 6}, \
+    {     16, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
+    {     11, 7}, {     23, 9}, {      7, 8}, {     15, 7}, \
+    {     33, 8}, {     23, 9}, {     15, 8}, {     35, 9}, \
+    {     23,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 9}, {     47,10}, {     31, 9}, \
+    {     63, 8}, {    127, 9}, {     71, 8}, {    143, 9}, \
+    {     79,10}, {     47,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255, 9}, {    143,10}, {     79, 9}, \
+    {    159, 8}, {    319, 9}, {    175, 8}, {    351,10}, \
+    {     95, 9}, {    191, 8}, {    383,10}, {    111,11}, \
+    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
+    {    287, 8}, {    575,10}, {    159, 9}, {    319,10}, \
+    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    207, 9}, {    415,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    351, 9}, {    703, 8}, \
+    {   1407,11}, {    191,10}, {    415,11}, {    223,10}, \
+    {    447, 9}, {    895,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 82
+#define MUL_FFT_THRESHOLD                 2688
+
+#define SQR_FFT_MODF_THRESHOLD             176  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    176, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {     13, 7}, {      7, 6}, {     16, 7}, {      9, 6}, \
+    {     19, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
+    {      7, 7}, {     19, 8}, {     11, 7}, {     23, 9}, \
+    {      7, 8}, {     15, 7}, {     31, 8}, {     23, 9}, \
+    {     15, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     71, 8}, {    143, 7}, {    287, 6}, \
+    {    575, 9}, {     79, 8}, {    159,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255, 9}, {    143, 8}, {    287, 7}, {    575,10}, \
+    {     79, 9}, {    159, 8}, {    319, 9}, {    175,10}, \
+    {     95, 9}, {    191, 8}, {    383,10}, {    111, 9}, \
+    {    223,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
+    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    223,12}, {     63,11}, {    127,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    351, 9}, \
+    {    703, 8}, {   1407,11}, {    191,10}, {    383,11}, \
+    {    223,10}, {    447, 9}, {    895,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 87
+#define SQR_FFT_THRESHOLD                 1728
+
+#define MULLO_BASECASE_THRESHOLD             2
+#define MULLO_DC_THRESHOLD                  33
+#define MULLO_MUL_N_THRESHOLD             5240
+
+#define DC_DIV_QR_THRESHOLD                 32
+#define DC_DIVAPPR_Q_THRESHOLD             123
+#define DC_BDIV_QR_THRESHOLD                34
+#define DC_BDIV_Q_THRESHOLD                 84
+
+#define INV_MULMOD_BNM1_THRESHOLD           42
+#define INV_NEWTON_THRESHOLD               129
+#define INV_APPR_THRESHOLD                 124
+
+#define BINV_NEWTON_THRESHOLD              148
+#define REDC_1_TO_REDC_N_THRESHOLD          38
+
+#define MU_DIV_QR_THRESHOLD                748
+#define MU_DIVAPPR_Q_THRESHOLD             748
+#define MUPI_DIV_QR_THRESHOLD               59
+#define MU_BDIV_QR_THRESHOLD               562
+#define MU_BDIV_Q_THRESHOLD                654
+
+#define MATRIX22_STRASSEN_THRESHOLD         11
+#define HGCD_THRESHOLD                      76
+#define GCD_DC_THRESHOLD                   205
+#define GCDEXT_DC_THRESHOLD                174
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        27
+#define SET_STR_DC_THRESHOLD               181
+#define SET_STR_PRECOMPUTE_THRESHOLD       525
diff --git a/mpn/powerpc32/p4/gmp-mparam.h b/mpn/powerpc32/p4/gmp-mparam.h

new file mode 100644 (file)

index 0000000..bb46419
--- /dev/null
+++ b/mpn/powerpc32/p4/gmp-mparam.h
@@ -0,0 +1,144 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* 1800 MHz PowerPC-970 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      1
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        42
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     14
+#define USE_PREINV_DIVREM_1                  1
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           46
+
+#define MUL_TOOM22_THRESHOLD                20
+#define MUL_TOOM33_THRESHOLD                73
+#define MUL_TOOM44_THRESHOLD               121
+#define MUL_TOOM6H_THRESHOLD               222
+#define MUL_TOOM8H_THRESHOLD               363
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      84
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     107
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      88
+
+#define SQR_BASECASE_THRESHOLD               0  /* always */
+#define SQR_TOOM2_THRESHOLD                 30
+#define SQR_TOOM3_THRESHOLD                 74
+#define SQR_TOOM4_THRESHOLD                160
+#define SQR_TOOM6_THRESHOLD                222
+#define SQR_TOOM8_THRESHOLD                357
+
+#define MULMOD_BNM1_THRESHOLD               16
+#define SQRMOD_BNM1_THRESHOLD               18
+
+#define MUL_FFT_MODF_THRESHOLD             444  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    444, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
+    {      9, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
+    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
+    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    159, 8}, {    319,10}, {     95, 8}, \
+    {    383,10}, {    111,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    511,10}, {    143, 9}, {    287, 8}, \
+    {    575, 9}, {    303,10}, {    159, 9}, {    319,11}, \
+    {     95, 9}, {    383,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
+    {   1087,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
+    {    671,10}, {    351, 9}, {    703, 8}, {   1407,10}, \
+    {    383, 9}, {    767,10}, {    415, 9}, {    831,11}, \
+    {    223,10}, {    447,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 90
+#define MUL_FFT_THRESHOLD                 4736
+
+#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    308, 5}, {     15, 6}, {      8, 5}, {     19, 6}, \
+    {     10, 5}, {     21, 6}, {     21, 7}, {     11, 6}, \
+    {     24, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
+    {     47,10}, {     15, 9}, {     31, 8}, {     63, 9}, \
+    {     39, 8}, {     79, 9}, {     47,10}, {     31, 9}, \
+    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
+    {     63, 9}, {    127, 8}, {    255, 9}, {    135,10}, \
+    {     79, 9}, {    159, 8}, {    319, 9}, {    175,10}, \
+    {     95, 9}, {    191, 8}, {    383, 9}, {    207,11}, \
+    {     63,10}, {    127, 9}, {    255, 8}, {    511, 9}, \
+    {    271,10}, {    143, 9}, {    287, 8}, {    575,10}, \
+    {    159, 9}, {    319,10}, {    175,11}, {     95,10}, \
+    {    191, 9}, {    383,10}, {    207,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543, 8}, {   1087,10}, {    287, 9}, {    575,11}, \
+    {    159,10}, {    319, 9}, {    639,10}, {    351, 9}, \
+    {    703,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    415, 9}, {    831,11}, {    223,10}, {    447,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 88
+#define SQR_FFT_THRESHOLD                 3520
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  62
+#define MULLO_MUL_N_THRESHOLD             8907
+
+#define DC_DIV_QR_THRESHOLD                 53
+#define DC_DIVAPPR_Q_THRESHOLD             216
+#define DC_BDIV_QR_THRESHOLD                67
+#define DC_BDIV_Q_THRESHOLD                180
+
+#define INV_MULMOD_BNM1_THRESHOLD           58
+#define INV_NEWTON_THRESHOLD               226
+#define INV_APPR_THRESHOLD                 228
+
+#define BINV_NEWTON_THRESHOLD              252
+#define REDC_1_TO_REDC_N_THRESHOLD          67
+
+#define MU_DIV_QR_THRESHOLD               1187
+#define MU_DIVAPPR_Q_THRESHOLD            1308
+#define MUPI_DIV_QR_THRESHOLD              114
+#define MU_BDIV_QR_THRESHOLD              1017
+#define MU_BDIV_Q_THRESHOLD               1187
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      97
+#define GCD_DC_THRESHOLD                   386
+#define GCDEXT_DC_THRESHOLD                298
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                11
+#define GET_STR_PRECOMPUTE_THRESHOLD        24
+#define SET_STR_DC_THRESHOLD               318
+#define SET_STR_PRECOMPUTE_THRESHOLD       929
diff --git a/mpn/powerpc32/p5/gmp-mparam.h b/mpn/powerpc32/p5/gmp-mparam.h

new file mode 100644 (file)

index 0000000..ba210ec
--- /dev/null
+++ b/mpn/powerpc32/p5/gmp-mparam.h
@@ -0,0 +1,146 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 1650 MHz POWER5 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      1
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        50
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     18
+#define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           61
+
+#define MUL_TOOM22_THRESHOLD                22
+#define MUL_TOOM33_THRESHOLD                57
+#define MUL_TOOM44_THRESHOLD               130
+#define MUL_TOOM6H_THRESHOLD               189
+#define MUL_TOOM8H_THRESHOLD               309
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      99
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      83
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      88
+
+#define SQR_BASECASE_THRESHOLD               6
+#define SQR_TOOM2_THRESHOLD                 40
+#define SQR_TOOM3_THRESHOLD                 77
+#define SQR_TOOM4_THRESHOLD                124
+#define SQR_TOOM6_THRESHOLD                140
+#define SQR_TOOM8_THRESHOLD                238
+
+#define MULMID_TOOM42_THRESHOLD             40
+
+#define MULMOD_BNM1_THRESHOLD               15
+#define SQRMOD_BNM1_THRESHOLD               16
+
+#define POWM_SEC_TABLE  4,29,252,840,2080
+
+#define MUL_FFT_MODF_THRESHOLD             412  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    412, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     21, 8}, \
+    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
+    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 9}, {     55,10}, {     31, 9}, \
+    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
+    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
+    {     95,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    143, 9}, {    287,10}, {    159,11}, {     95,10}, \
+    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
+    {    159,10}, {    335, 9}, {    671,10}, {    351, 9}, \
+    {    703,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    415, 9}, {    831,11}, {    223,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 71
+#define MUL_FFT_THRESHOLD                 4736
+
+#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    340, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
+    {     27, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
+    {     31, 8}, {     67, 9}, {     47,10}, {     31, 9}, \
+    {     71,10}, {     47,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
+    {    143, 9}, {    287, 8}, {    575, 9}, {    303,10}, \
+    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
+    {    671,10}, {    351,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    415,11}, {    223,10}, {    447,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 76
+#define SQR_FFT_THRESHOLD                 3712
+
+#define MULLO_BASECASE_THRESHOLD             2
+#define MULLO_DC_THRESHOLD                  68
+#define MULLO_MUL_N_THRESHOLD             9236
+
+#define DC_DIV_QR_THRESHOLD                 69
+#define DC_DIVAPPR_Q_THRESHOLD             220
+#define DC_BDIV_QR_THRESHOLD                75
+#define DC_BDIV_Q_THRESHOLD                188
+
+#define INV_MULMOD_BNM1_THRESHOLD           54
+#define INV_NEWTON_THRESHOLD               230
+#define INV_APPR_THRESHOLD                 230
+
+#define BINV_NEWTON_THRESHOLD              278
+#define REDC_1_TO_REDC_N_THRESHOLD          87
+
+#define MU_DIV_QR_THRESHOLD               1210
+#define MU_DIVAPPR_Q_THRESHOLD            1308
+#define MUPI_DIV_QR_THRESHOLD              106
+#define MU_BDIV_QR_THRESHOLD              1017
+#define MU_BDIV_Q_THRESHOLD               1210
+
+#define MATRIX22_STRASSEN_THRESHOLD         14
+#define HGCD_THRESHOLD                     110
+#define HGCD_APPR_THRESHOLD                138
+#define HGCD_REDUCE_THRESHOLD             2578
+#define GCD_DC_THRESHOLD                   408
+#define GCDEXT_DC_THRESHOLD                298
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        24
+#define SET_STR_DC_THRESHOLD               527
+#define SET_STR_PRECOMPUTE_THRESHOLD      1090
diff --git a/mpn/powerpc32/p6/gmp-mparam.h b/mpn/powerpc32/p6/gmp-mparam.h

new file mode 100644 (file)

index 0000000..529a66d
--- /dev/null
+++ b/mpn/powerpc32/p6/gmp-mparam.h
@@ -0,0 +1,155 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 3500 MHz POWER6 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      2
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
+#define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                19
+#define MUL_TOOM33_THRESHOLD                55
+#define MUL_TOOM44_THRESHOLD                88
+#define MUL_TOOM6H_THRESHOLD               137
+#define MUL_TOOM8H_THRESHOLD               181
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      57
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      56
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      57
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
+
+#define SQR_BASECASE_THRESHOLD               0  /* always */
+#define SQR_TOOM2_THRESHOLD                 30
+#define SQR_TOOM3_THRESHOLD                 56
+#define SQR_TOOM4_THRESHOLD                130
+#define SQR_TOOM6_THRESHOLD                189
+#define SQR_TOOM8_THRESHOLD                296
+
+#define MULMID_TOOM42_THRESHOLD             26
+
+#define MULMOD_BNM1_THRESHOLD                7
+#define SQRMOD_BNM1_THRESHOLD               12
+
+#define POWM_SEC_TABLE  2,26,127,453,1068
+
+#define MUL_FFT_MODF_THRESHOLD             212  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    212, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {     13, 7}, {      7, 6}, {     16, 7}, {      9, 6}, \
+    {     19, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
+    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
+    {     31, 8}, {     19, 7}, {     39, 8}, {     23, 9}, \
+    {     15, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
+    {     15, 9}, {     31, 8}, {     63, 9}, {     39, 8}, \
+    {     79, 9}, {     47,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     71, 8}, {    143, 7}, {    287, 9}, \
+    {     79,10}, {     47,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255, 7}, {    511, 9}, {    143, 8}, \
+    {    287,10}, {     79, 9}, {    159, 8}, {    319, 9}, \
+    {    175, 8}, {    351,10}, {     95, 9}, {    191, 8}, \
+    {    383, 9}, {    207,10}, {    111,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    511,10}, {    143, 9}, \
+    {    287, 8}, {    575,10}, {    159, 9}, {    319,10}, \
+    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    207, 9}, {    415,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    351, 9}, {    703,11}, \
+    {    191,10}, {    415, 9}, {    831,11}, {    223,10}, \
+    {    447,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 89
+#define MUL_FFT_THRESHOLD                 1728
+
+#define SQR_FFT_MODF_THRESHOLD             184  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    184, 5}, {      6, 4}, {     13, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
+    {     16, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
+    {     11, 7}, {     23, 9}, {      7, 8}, {     23, 9}, \
+    {     15, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
+    {     47,10}, {     31, 9}, {     63, 8}, {    127, 7}, \
+    {    255, 9}, {     71, 8}, {    143, 7}, {    287, 6}, \
+    {    575, 9}, {     79,10}, {     47,11}, {     31,10}, \
+    {     63, 9}, {    127, 8}, {    255, 9}, {    143, 8}, \
+    {    287, 7}, {    575,10}, {     79, 9}, {    159, 8}, \
+    {    319, 9}, {    175, 8}, {    351,10}, {     95, 9}, \
+    {    191, 8}, {    383, 9}, {    207,10}, {    111, 9}, \
+    {    223,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
+    {    319,10}, {    175, 9}, {    351,11}, {     95,10}, \
+    {    191, 9}, {    383,10}, {    207, 9}, {    415,10}, \
+    {    223,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    287, 9}, {    575,11}, {    159,10}, \
+    {    351, 9}, {    703, 8}, {   1407,11}, {    191,10}, \
+    {    415,11}, {    223,10}, {    447, 9}, {    895,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 92
+#define SQR_FFT_THRESHOLD                 1600
+
+#define MULLO_BASECASE_THRESHOLD             2
+#define MULLO_DC_THRESHOLD                  57
+#define MULLO_MUL_N_THRESHOLD             3176
+
+#define DC_DIV_QR_THRESHOLD                 52
+#define DC_DIVAPPR_Q_THRESHOLD             187
+#define DC_BDIV_QR_THRESHOLD                64
+#define DC_BDIV_Q_THRESHOLD                146
+
+#define INV_MULMOD_BNM1_THRESHOLD           68
+#define INV_NEWTON_THRESHOLD               182
+#define INV_APPR_THRESHOLD                 182
+
+#define BINV_NEWTON_THRESHOLD              186
+#define REDC_1_TO_REDC_N_THRESHOLD          60
+
+#define MU_DIV_QR_THRESHOLD                924
+#define MU_DIVAPPR_Q_THRESHOLD             807
+#define MUPI_DIV_QR_THRESHOLD               73
+#define MU_BDIV_QR_THRESHOLD               667
+#define MU_BDIV_Q_THRESHOLD                823
+
+#define MATRIX22_STRASSEN_THRESHOLD          8
+#define HGCD_THRESHOLD                      61
+#define HGCD_APPR_THRESHOLD                 50
+#define HGCD_REDUCE_THRESHOLD              974
+#define GCD_DC_THRESHOLD                   195
+#define GCDEXT_DC_THRESHOLD                134
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                 9
+#define GET_STR_PRECOMPUTE_THRESHOLD        21
+#define SET_STR_DC_THRESHOLD               190
+#define SET_STR_PRECOMPUTE_THRESHOLD       411
diff --git a/mpn/powerpc32/p7/gmp-mparam.h b/mpn/powerpc32/p7/gmp-mparam.h

new file mode 100644 (file)

index 0000000..bd18d40
--- /dev/null
+++ b/mpn/powerpc32/p7/gmp-mparam.h
@@ -0,0 +1,149 @@
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 3550 MHz POWER7/T4 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      1
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
+#define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           34
+
+#define MUL_TOOM22_THRESHOLD                20
+#define MUL_TOOM33_THRESHOLD                89
+#define MUL_TOOM44_THRESHOLD               130
+#define MUL_TOOM6H_THRESHOLD               286
+#define MUL_TOOM8H_THRESHOLD               363
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     121
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     113
+
+#define SQR_BASECASE_THRESHOLD               4
+#define SQR_TOOM2_THRESHOLD                 50
+#define SQR_TOOM3_THRESHOLD                 89
+#define SQR_TOOM4_THRESHOLD                154
+#define SQR_TOOM6_THRESHOLD                222
+#define SQR_TOOM8_THRESHOLD                381
+
+#define MULMID_TOOM42_THRESHOLD             40
+
+#define MULMOD_BNM1_THRESHOLD               18
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define POWM_SEC_TABLE  4,35,225,780,2212
+
+#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    476, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     14, 5}, {     29, 6}, {     21, 7}, {     11, 6}, \
+    {     25, 7}, {     13, 6}, {     29, 7}, {     15, 6}, \
+    {     31, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
+    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
+    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    159,10}, {     95,11}, {     63,10}, \
+    {    159,11}, {     95,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
+    {   1087,11}, {    159,10}, {    319, 9}, {    639,10}, \
+    {    335, 9}, {    671, 8}, {   1343,10}, {    351,11}, \
+    {    191,10}, {    415, 9}, {    831,10}, {    431,11}, \
+    {    223,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 77
+#define MUL_FFT_THRESHOLD                 5312
+
+#define SQR_FFT_MODF_THRESHOLD             344  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    344, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     27, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
+    {     47,10}, {     31, 9}, {     79,10}, {     47,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
+    {    143, 9}, {    287, 8}, {    575, 9}, {    303,10}, \
+    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543, 8}, {   1087,10}, {    287, 9}, {    575,10}, \
+    {    303,11}, {    159,10}, {    319, 9}, {    639,10}, \
+    {    335, 9}, {    671,10}, {    351, 9}, {    703,11}, \
+    {    191,10}, {    383, 9}, {    767,10}, {    415, 9}, \
+    {    831,11}, {    223,10}, {    447,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 79
+#define SQR_FFT_THRESHOLD                 3712
+
+#define MULLO_BASECASE_THRESHOLD             2
+#define MULLO_DC_THRESHOLD                  34
+#define MULLO_MUL_N_THRESHOLD            10323
+
+#define DC_DIV_QR_THRESHOLD                 52
+#define DC_DIVAPPR_Q_THRESHOLD             202
+#define DC_BDIV_QR_THRESHOLD                68
+#define DC_BDIV_Q_THRESHOLD                152
+
+#define INV_MULMOD_BNM1_THRESHOLD           66
+#define INV_NEWTON_THRESHOLD               226
+#define INV_APPR_THRESHOLD                 189
+
+#define BINV_NEWTON_THRESHOLD              292
+#define REDC_1_TO_REDC_N_THRESHOLD          79
+
+#define MU_DIV_QR_THRESHOLD               1442
+#define MU_DIVAPPR_Q_THRESHOLD            1442
+#define MUPI_DIV_QR_THRESHOLD               91
+#define MU_BDIV_QR_THRESHOLD              1308
+#define MU_BDIV_Q_THRESHOLD               1442
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     126
+#define HGCD_APPR_THRESHOLD                139
+#define HGCD_REDUCE_THRESHOLD             2681
+#define GCD_DC_THRESHOLD                   573
+#define GCDEXT_DC_THRESHOLD                448
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                 9
+#define GET_STR_PRECOMPUTE_THRESHOLD        20
+#define SET_STR_DC_THRESHOLD               834
+#define SET_STR_PRECOMPUTE_THRESHOLD      1888
diff --git a/mpn/powerpc32/rshift.asm b/mpn/powerpc32/rshift.asm

index b069a93d124e7ee4153157a5828cf0cd6d749839..d9a74011c6b76eb8b6759883124b3ca3a85637ef 100644 (file)
--- a/mpn/powerpc32/rshift.asm
+++ b/mpn/powerpc32/rshift.asm
@@ -38,7 +38,7 @@ C cnt r6
  
  ASM_START()
  PROLOGUE(mpn_rshift)
-       cmpwi   cr0, r5, 12     C more than 12 limbs?
+       cmpwi   cr0, r5, 30     C more than 30 limbs?
         addi    r7, r3, -4      C dst-4
         bgt     L(BIG)          C branch if more than 12 limbs
  
@@ -151,4 +151,4 @@ L(loopU):
         stw     r12, 20(r7)
         lmw     r24, -32(r1)    C restore registers
         blr
-EPILOGUE(mpn_rshift)
+EPILOGUE()
diff --git a/mpn/powerpc32/tabselect.asm b/mpn/powerpc32/tabselect.asm

new file mode 100644 (file)

index 0000000..155a7b4
--- /dev/null
+++ b/mpn/powerpc32/tabselect.asm
@@ -0,0 +1,98 @@
+dnl  PowerPC-32 mpn_tabselect.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C 603e:              ?
+C 604e:              ?
+C 75x (G3):          ?
+C 7400,7410 (G4):    ?
+C 744x,745x (G4+):   ?
+C power4/ppc970:     3.3
+C power5:            ?
+
+C NOTES
+C  * This has not been tuned for any specific processor.  Its speed should not
+C    be too bad, though.
+C  * Using VMX could result in significant speedup for certain CPUs.
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp',     `r3')
+define(`tp',     `r4')
+define(`n',      `r5')
+define(`nents',  `r6')
+define(`which',  `r7')
+
+define(`mask',   `r8')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_tabselect)
+       addi    r0, n, 1
+       srwi    r0, r0, 1               C inner loop count
+       andi.   r9, n, 1                C set cr0 for use in inner loop
+       subf    which, nents, which
+       slwi    n, n, 2
+
+L(outer):
+       mtctr   r0                      C put inner loop count in ctr
+
+       add     r9, which, nents        C are we at the selected table entry?
+       addic   r9, r9, -1              C set CF iff not selected entry
+       subfe   mask, r0, r0
+
+       beq     cr0, L(top)             C branch to loop entry if n even
+
+       lwz     r9, 0(tp)
+       addi    tp, tp, 4
+       and     r9, r9, mask
+       lwz     r11, 0(rp)
+       andc    r11, r11, mask
+       or      r9, r9, r11
+       stw     r9, 0(rp)
+       addi    rp, rp, 4
+       bdz     L(end)
+
+       ALIGN(16)
+L(top):        lwz     r9, 0(tp)
+       lwz     r10, 4(tp)
+       addi    tp, tp, 8
+       nop
+       and     r9, r9, mask
+       and     r10, r10, mask
+       lwz     r11, 0(rp)
+       lwz     r12, 4(rp)
+       andc    r11, r11, mask
+       andc    r12, r12, mask
+       or      r9, r9, r11
+       or      r10, r10, r12
+       stw     r9, 0(rp)
+       stw     r10, 4(rp)
+       addi    rp, rp, 8
+       bdnz    L(top)
+
+L(end):        subf    rp, n, rp               C move rp back to beginning
+       cmpwi   cr6, nents, 1
+       addi    nents, nents, -1
+       bne     cr6, L(outer)
+
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc32/vmx/mod_34lsub1.asm b/mpn/powerpc32/vmx/mod_34lsub1.asm

index 8aee6f81de2f62a2665c650a740421b3114d8ffc..3c561fc8baad044c37e78380670fbc647d471d80 100644 (file)
--- a/mpn/powerpc32/vmx/mod_34lsub1.asm
+++ b/mpn/powerpc32/vmx/mod_34lsub1.asm
@@ -1,6 +1,7 @@
  dnl  PowerPC-32 mpn_mod_34lsub1 -- mpn remainder mod 2^24-1.
  
-dnl  Copyright 2002, 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2002, 2003, 2005, 2006, 2007, 2012 Free Software Foundation,
+dnl  Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -135,15 +136,15 @@ L(large):
  
         andi.   r7, up, 15
         vxor    a0, v0, v0
-       lis     r0, 0xaaaa
+       lis     r9, 0xaaaa
         vxor    a1, v0, v0
-       ori     r0, r0, 0xaaab
+       ori     r9, r9, 0xaaab
         vxor    a2, v0, v0
         li      r5, 16
         vxor    c0, v0, v0
         li      r6, 32
         vxor    c1, v0, v0
-       LEAL(   r11, cnsts)
+       LEAL(   r11, cnsts)             C CAUTION clobbers r0 for elf, darwin
         vxor    c2, v0, v0
         vxor    z, v0, v0
  
@@ -158,7 +159,7 @@ L(large):
         vsldoi  a2, z, a2, 12
  
         addi    n, n, 9
-       mulhwu  r0, n, r0
+       mulhwu  r0, n, r9
         srwi    r0, r0, 3               C r0 = floor(n/12)
         mtctr   r0
  
@@ -174,7 +175,7 @@ L(na4):     bne     cr7, L(na8)
         vsldoi  a1, z, a1, 8
  
         addi    n, n, 6
-       mulhwu  r0, n, r0
+       mulhwu  r0, n, r9
         srwi    r0, r0, 3               C r0 = floor(n/12)
         mtctr   r0
  
@@ -188,7 +189,7 @@ L(na8):
         vsldoi  a0, z, a0, 4
  
         addi    n, n, 3
-       mulhwu  r0, n, r0
+       mulhwu  r0, n, r9
         srwi    r0, r0, 3               C r0 = floor(n/12)
         mtctr   r0
  
@@ -197,7 +198,7 @@ L(na8):
         b       L(0)
  
  L(aligned16):
-       mulhwu  r0, n, r0
+       mulhwu  r0, n, r9
         srwi    r0, r0, 3               C r0 = floor(n/12)
         mtctr   r0
  
diff --git a/mpn/powerpc32/vmx/popcount.asm b/mpn/powerpc32/vmx/popcount.asm

index 62fcaaee4a7f50ae4ed5f02ee3009c9ae249627a..3e0db3336b6257b93a172491212273972d785b75 100644 (file)
--- a/mpn/powerpc32/vmx/popcount.asm
+++ b/mpn/powerpc32/vmx/popcount.asm
@@ -21,8 +21,3 @@ include(`../config.m4')
  
  MULFUNC_PROLOGUE(mpn_popcount)
  include_mpn(`powerpc64/vmx/popcount.asm')
-
-C                   cycles/limb
-C 7400,7410 (G4):       2.75
-C 744x,745x (G4+):      2.25
-C 970 (G5):             5.3
diff --git a/mpn/powerpc64/README b/mpn/powerpc64/README

index 757357b4d8fd67aaaab1bb96a5bcd32f11736c37..020ad23cd506fe4e0a6aeb06ecb62316e117f944 100644 (file)
--- a/mpn/powerpc64/README
+++ b/mpn/powerpc64/README
@@ -113,7 +113,7 @@ Memory:               2 ld/st.  Stores go to the L2 cache, which can sustain just
                   one store per cycle.
                   L1 load latency: to gregs 3-4 cycles, to fregs 5-6 cycles.
                   Operations that modify the address register might be split
-                 to use also a an integer issue slot.
+                 to use also an integer issue slot.
  Simple integer:          2 operations every cycle, latency 2.
  Integer multiply: 2 operations every 6th cycle, latency 7 cycles.
  Integer divide:          ?
diff --git a/mpn/powerpc64/aix.m4 b/mpn/powerpc64/aix.m4

index 589686a868988d7b29230885c7e21c8e7956d973..88c89b018a42aaaaf27def9992b5d9adb8dc42b5 100644 (file)
--- a/mpn/powerpc64/aix.m4
+++ b/mpn/powerpc64/aix.m4
@@ -1,7 +1,8 @@
  divert(-1)
  dnl  m4 macros for AIX 64-bit assembly.
  
-dnl  Copyright 2000, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2001, 2002, 2005, 2006, 2010, 2012 Free Software
+dnl  Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -19,7 +20,7 @@ dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  define(`ASM_START',
-       `.machine       "ppc64"
+       `.machine       "any"
         .toc')
  
  dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
@@ -35,8 +36,7 @@ m4_assert_numargs(1)
         .csect  [DS], 3
  $1:
         .llong  .$1, TOC[tc0], 0
-       .csect  [PR]
-       .align  4
+       .csect  .$1[PR], 6
  .$1:')
  
  define(`EPILOGUE_cpu',
diff --git a/mpn/powerpc64/com.asm b/mpn/powerpc64/com.asm

index 4fb2e65d747103922f5d7f48da3ab04c8e489508..cb89bade2cc9e785f6e0a93293cedeb40afaeff0 100644 (file)
--- a/mpn/powerpc64/com.asm
+++ b/mpn/powerpc64/com.asm
@@ -19,9 +19,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     1?
-C POWER4/PPC970:     1.6
+C                  cycles/limb
+C POWER3/PPC630          1?
+C POWER4/PPC970          1.6
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 1.45
  
  C TODO
  C  * 8-way unrolling brings timing down to about 1.3 cycles/limb.
diff --git a/mpn/powerpc64/copyd.asm b/mpn/powerpc64/copyd.asm

index 6a46a433c9b335108a15f5890c499bdc79a68270..256e7dc1224ebf9c53ed2c0b36bc60a7c6ef0028 100644 (file)
--- a/mpn/powerpc64/copyd.asm
+++ b/mpn/powerpc64/copyd.asm
@@ -19,9 +19,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     1
-C POWER4/PPC970:     1
+C                  cycles/limb
+C POWER3/PPC630          1
+C POWER4/PPC970          1
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 1.4
  
  C INPUT PARAMETERS
  C rp   r3
diff --git a/mpn/powerpc64/copyi.asm b/mpn/powerpc64/copyi.asm

index 5cb7e485653a8cdd2ea5ce6d55a7ae4199cb0ae5..31d1fc2e78b18bbd4be22b52b7a5962bb76ab34f 100644 (file)
--- a/mpn/powerpc64/copyi.asm
+++ b/mpn/powerpc64/copyi.asm
@@ -19,9 +19,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     1
-C POWER4/PPC970:     1
+C                  cycles/limb
+C POWER3/PPC630          1
+C POWER4/PPC970          1
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 1.4
  
  C INPUT PARAMETERS
  C rp   r3
diff --git a/mpn/powerpc64/darwin.m4 b/mpn/powerpc64/darwin.m4

index 10055be13a5cdc18aa50f01d2f77b778a20c65c2..c0c0b2e84364f24769022e7e621f24133e00069a 100644 (file)
--- a/mpn/powerpc64/darwin.m4
+++ b/mpn/powerpc64/darwin.m4
@@ -30,7 +30,7 @@ define(`PROLOGUE_cpu',
  m4_assert_numargs(1)
  `      .text
         .globl  $1
-       .align  4
+       .align  5
  $1:')
  
  define(`EPILOGUE_cpu',
diff --git a/mpn/powerpc64/elf.m4 b/mpn/powerpc64/elf.m4

index e6da11f90c711fd736672d6b440e27968ab31891..c8d9015a8fdfdd669beb4e215d5fb8e8edb25cb9 100644 (file)
--- a/mpn/powerpc64/elf.m4
+++ b/mpn/powerpc64/elf.m4
@@ -36,7 +36,7 @@ $1:
         .size   $1, 24
         .type   .$1, @function
         .section        ".text"
-       .align  4
+       .align  5
  .$1:')
  
  define(`EPILOGUE_cpu',
diff --git a/mpn/powerpc64/logops_n.asm b/mpn/powerpc64/logops_n.asm

index 917b59f4557a32f1cc69a7c9cca90f2ed3da7e3a..2caa2c7c638a6b2acfa2466f45977b3ee8c5b927 100644 (file)
--- a/mpn/powerpc64/logops_n.asm
+++ b/mpn/powerpc64/logops_n.asm
@@ -20,9 +20,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     1.75
-C POWER4/PPC970:     2.10
+C                  cycles/limb
+C POWER3/PPC630          1.75
+C POWER4/PPC970          2.10
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 1.75
  
  C   n     POWER3/PPC630   POWER4/PPC970
  C     1               15.00           15.33
diff --git a/mpn/powerpc64/lshift.asm b/mpn/powerpc64/lshift.asm

index 41e5ddd8e55b7299951770f1c7e08db6a703c8f5..eb70c498316fa54344cab2327b1315c1055da0d2 100644 (file)
--- a/mpn/powerpc64/lshift.asm
+++ b/mpn/powerpc64/lshift.asm
@@ -1,6 +1,6 @@
  dnl  PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
  
-dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,98 +19,178 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     1.5
-C POWER4/PPC970:     3.0
+C                   cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970          ?
+C POWER5                 2.25
+C POWER6                 9.75
+C POWER7                 2.15
  
-C INPUT PARAMETERS
-define(`rp',`r3')
-define(`up',`r4')
-define(`n',`r5')
-define(`cnt',`r6')
+C TODO
+C  * Try to reduce the number of needed live registers
+C  * Micro-optimise header code
+C  * Keep in synch with rshift.asm and lshiftc.asm
  
-define(`tnc',`r5')
-define(`v0',`r0')
-define(`v1',`r7')
-define(`u0',`r8')
-define(`u1',`r9')
-define(`h0',`r10')
-define(`h1',`r11')
+C INPUT PARAMETERS
+define(`rp',  `r3')
+define(`up',  `r4')
+define(`n',   `r5')
+define(`cnt', `r6')
  
+define(`tnc',`r0')
+define(`u0',`r30')
+define(`u1',`r31')
+define(`retval',`r5')
  
  ASM_START()
  PROLOGUE(mpn_lshift)
+       std     r31, -8(r1)
+       std     r30, -16(r1)
+       subfic  tnc, cnt, 64
+       sldi    r7, n, 3        C byte count corresponding to n
+       add     up, up, r7      C up = up + n
+       add     rp, rp, r7      C rp = rp + n
+       rldicl. r30, n, 0,62    C r30 = n & 3, set cr0
+       cmpdi   cr6, r30, 2
+       addi    r31, n, 3       C compute count...
+       ld      r10, -8(up)     C load 1st limb for b00...b11
+       srd     retval, r10, tnc
  ifdef(`HAVE_ABI_mode32',
-`      rldicl  r7, r5, 0, 32   C zero extend n
-       mtctr   r7',            C copy n to count register
-`      mtctr   n')             C copy n to count register
-
-ifdef(`HAVE_ABI_mode32',
-`      rldic   r0, n, 3, 32',  C byte count corresponding to n
-`      rldicr  r0, n, 3, 60')  C byte count corresponding to n
-
-       add     rp, rp, r0      C rp = rp + n
-       add     up, up, r0      C up = up + n
-       addi    rp, rp, 8       C rp now points 16 beyond end
-       addi    up, up, -8      C up now points to last limb
-       subfic  tnc, cnt, 64    C reverse shift count
-
-       ld      u0, 0(up)
-       sld     h0, u0, cnt
-       srd     r12, u0, tnc    C return value
-       bdz     L(1)            C jump for n = 1
-
-       ld      u1, -8(up)
-       bdz     L(2)            C jump for n = 2
-
-       ldu     u0, -16(up)
-       bdz     L(end)          C jump for n = 3
-
-L(oop):        srd     v1, u1, tnc
-       sld     h1, u1, cnt
-       ld      u1, -8(up)
-       or      h0, v1, h0
-       stdu    h0, -16(rp)
-
-       bdz     L(exit)
-
-       srd     v0, u0, tnc
-       sld     h0, u0, cnt
-       ldu     u0, -16(up)
-       or      h1, v0, h1
-       std     h1, -8(rp)
-
-       bdnz    L(oop)
-
-L(end):        srd     v1, u1, tnc
-       sld     h1, u1, cnt
-       or      h0, v1, h0
-       stdu    h0, -16(rp)
-       srd     v0, u0, tnc
-       sld     h0, u0, cnt
-       or      h1, v0, h1
-       std     h1, -8(rp)
-L(1):  std     h0, -16(rp)
-ifdef(`HAVE_ABI_mode32',
-`      srdi    r3, r12, 32
-       mr      r4, r12
-',`    mr      r3, r12
-')
-       blr
-
-L(exit):       srd     v0, u0, tnc
-       sld     h0, u0, cnt
-       or      h1, v0, h1
-       std     h1, -8(rp)
-L(2):  srd     v1, u1, tnc
-       sld     h1, u1, cnt
-       or      h0, v1, h0
-       stdu    h0, -16(rp)
-       std     h1, -8(rp)
+`      rldicl  r31, r31, 62,34',       C ...branch count
+`      srdi    r31, r31, 2')   C ...for ctr
+       mtctr   r31             C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       ld      r11, -16(up)    C load 2nd limb for b10 and b11
+       beq     cr6, L(b10)
+
+       ALIGN(16)
+L(b11):        sld     r8, r10, cnt
+       srd     r9, r11, tnc
+       ld      u1, -24(up)
+       addi    up, up, -24
+       sld     r12, r11, cnt
+       srd     r7, u1, tnc
+       addi    rp, rp, 16
+       bdnz    L(gt3)
+
+       or      r11, r8, r9
+       sld     r8, u1, cnt
+       b       L(cj3)
+
+       ALIGN(16)
+L(gt3):        ld      u0, -8(up)
+       or      r11, r8, r9
+       sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -16(up)
+       or      r10, r12, r7
+       b       L(L11)
+
+       ALIGN(32)
+L(b10):        sld     r12, r10, cnt
+       addi    rp, rp, 24
+       srd     r7, r11, tnc
+       bdnz    L(gt2)
+
+       sld     r8, r11, cnt
+       or      r10, r12, r7
+       b       L(cj2)
+
+L(gt2):        ld      u0, -24(up)
+       sld     r8, r11, cnt
+       srd     r9, u0, tnc
+       ld      u1, -32(up)
+       or      r10, r12, r7
+       sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       ld      u0, -40(up)
+       or      r11, r8, r9
+       addi    up, up, -16
+       b       L(L10)
+
+       ALIGN(16)
+L(b00):        ld      u1, -16(up)
+       sld     r12, r10, cnt
+       srd     r7, u1, tnc
+       ld      u0, -24(up)
+       sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -32(up)
+       or      r10, r12, r7
+       sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       addi    rp, rp, 8
+       bdz     L(cj4)
+
+L(gt4):        addi    up, up, -32
+       ld      u0, -8(up)
+       or      r11, r8, r9
+       b       L(L00)
+
+       ALIGN(16)
+L(b01):        bdnz    L(gt1)
+       sld     r8, r10, cnt
+       std     r8, -8(rp)
+       b       L(ret)
+
+L(gt1):        ld      u0, -16(up)
+       sld     r8, r10, cnt
+       srd     r9, u0, tnc
+       ld      u1, -24(up)
+       sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       ld      u0, -32(up)
+       or      r11, r8, r9
+       sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -40(up)
+       addi    up, up, -40
+       or      r10, r12, r7
+       bdz     L(end)
+
+       ALIGN(32)
+L(top):        sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       ld      u0, -8(up)
+       std     r11, -8(rp)
+       or      r11, r8, r9
+L(L00):        sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -16(up)
+       std     r10, -16(rp)
+       or      r10, r12, r7
+L(L11):        sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       ld      u0, -24(up)
+       std     r11, -24(rp)
+       or      r11, r8, r9
+L(L10):        sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -32(up)
+       addi    up, up, -32
+       std     r10, -32(rp)
+       addi    rp, rp, -32
+       or      r10, r12, r7
+       bdnz    L(top)
+
+       ALIGN(32)
+L(end):        sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       std     r11, -8(rp)
+L(cj4):        or      r11, r8, r9
+       sld     r8, u1, cnt
+       std     r10, -16(rp)
+L(cj3):        or      r10, r12, r7
+       std     r11, -24(rp)
+L(cj2):        std     r10, -32(rp)
+       std     r8, -40(rp)
+
+L(ret):        ld      r31, -8(r1)
+       ld      r30, -16(r1)
  ifdef(`HAVE_ABI_mode32',
-`      srdi    r3, r12, 32
-       mr      r4, r12
-',`    mr      r3, r12
-')
+`      srdi    r3, retval, 32
+       mr      r4, retval
+',`    mr      r3, retval')
         blr
  EPILOGUE()
diff --git a/mpn/powerpc64/lshiftc.asm b/mpn/powerpc64/lshiftc.asm

new file mode 100644 (file)

index 0000000..8f470a5
--- /dev/null
+++ b/mpn/powerpc64/lshiftc.asm
@@ -0,0 +1,198 @@
+dnl  PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
+
+dnl  Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970          ?
+C POWER5                 2.25
+C POWER6                 9.5
+C POWER7                 2.15
+
+C TODO
+C  * Try to reduce the number of needed live registers
+C  * Micro-optimise header code
+C  * Keep in synch with lshift.asm and rshift.asm
+
+C INPUT PARAMETERS
+define(`rp',  `r3')
+define(`up',  `r4')
+define(`n',   `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`u0',`r30')
+define(`u1',`r31')
+define(`retval',`r5')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+       std     r31, -8(r1)
+       std     r30, -16(r1)
+       subfic  tnc, cnt, 64
+       sldi    r7, n, 3        C byte count corresponding to n
+       add     up, up, r7      C up = up + n
+       add     rp, rp, r7      C rp = rp + n
+       rldicl. r30, n, 0,62    C r30 = n & 3, set cr0
+       cmpdi   cr6, r30, 2
+       addi    r31, n, 3       C compute count...
+       ld      r10, -8(up)     C load 1st limb for b00...b11
+       srd     retval, r10, tnc
+       srdi    r31, r31, 2     C ...for ctr
+       mtctr   r31             C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       ld      r11, -16(up)    C load 2nd limb for b10 and b11
+       beq     cr6, L(b10)
+
+       ALIGN(16)
+L(b11):        sld     r8, r10, cnt
+       srd     r9, r11, tnc
+       ld      u1, -24(up)
+       addi    up, up, -24
+       sld     r12, r11, cnt
+       srd     r7, u1, tnc
+       addi    rp, rp, 16
+       bdnz    L(gt3)
+
+       nor     r11, r8, r9
+       sld     r8, u1, cnt
+       nor     r8, r8, r8
+       b       L(cj3)
+
+       ALIGN(16)
+L(gt3):        ld      u0, -8(up)
+       nor     r11, r8, r9
+       sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -16(up)
+       nor     r10, r12, r7
+       b       L(L11)
+
+       ALIGN(32)
+L(b10):        sld     r12, r10, cnt
+       addi    rp, rp, 24
+       srd     r7, r11, tnc
+       bdnz    L(gt2)
+
+       sld     r8, r11, cnt
+       nor     r10, r12, r7
+       nor     r8, r8, r8
+       b       L(cj2)
+
+L(gt2):        ld      u0, -24(up)
+       sld     r8, r11, cnt
+       srd     r9, u0, tnc
+       ld      u1, -32(up)
+       nor     r10, r12, r7
+       sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       ld      u0, -40(up)
+       nor     r11, r8, r9
+       addi    up, up, -16
+       b       L(L10)
+
+       ALIGN(16)
+L(b00):        ld      u1, -16(up)
+       sld     r12, r10, cnt
+       srd     r7, u1, tnc
+       ld      u0, -24(up)
+       sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -32(up)
+       nor     r10, r12, r7
+       sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       addi    rp, rp, 8
+       bdz     L(cj4)
+
+L(gt4):        addi    up, up, -32
+       ld      u0, -8(up)
+       nor     r11, r8, r9
+       b       L(L00)
+
+       ALIGN(16)
+L(b01):        bdnz    L(gt1)
+       sld     r8, r10, cnt
+       nor     r8, r8, r8
+       std     r8, -8(rp)
+       b       L(ret)
+
+L(gt1):        ld      u0, -16(up)
+       sld     r8, r10, cnt
+       srd     r9, u0, tnc
+       ld      u1, -24(up)
+       sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       ld      u0, -32(up)
+       nor     r11, r8, r9
+       sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -40(up)
+       addi    up, up, -40
+       nor     r10, r12, r7
+       bdz     L(end)
+
+       ALIGN(32)
+L(top):        sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       ld      u0, -8(up)
+       std     r11, -8(rp)
+       nor     r11, r8, r9
+L(L00):        sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -16(up)
+       std     r10, -16(rp)
+       nor     r10, r12, r7
+L(L11):        sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       ld      u0, -24(up)
+       std     r11, -24(rp)
+       nor     r11, r8, r9
+L(L10):        sld     r8, u1, cnt
+       srd     r9, u0, tnc
+       ld      u1, -32(up)
+       addi    up, up, -32
+       std     r10, -32(rp)
+       addi    rp, rp, -32
+       nor     r10, r12, r7
+       bdnz    L(top)
+
+       ALIGN(32)
+L(end):        sld     r12, u0, cnt
+       srd     r7, u1, tnc
+       std     r11, -8(rp)
+L(cj4):        nor     r11, r8, r9
+       sld     r8, u1, cnt
+       std     r10, -16(rp)
+       nor     r8, r8, r8
+L(cj3):        nor     r10, r12, r7
+       std     r11, -24(rp)
+L(cj2):        std     r10, -32(rp)
+       std     r8, -40(rp)
+
+L(ret):        ld      r31, -8(r1)
+       ld      r30, -16(r1)
+ifdef(`HAVE_ABI_mode32',
+`      srdi    r3, retval, 32
+       mr      r4, retval
+',`    mr      r3, retval')
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode32/p4/gmp-mparam.h b/mpn/powerpc64/mode32/p4/gmp-mparam.h

new file mode 100644 (file)

index 0000000..db296f0
--- /dev/null
+++ b/mpn/powerpc64/mode32/p4/gmp-mparam.h
@@ -0,0 +1,162 @@
+/* PowerPC-64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2008, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* 1800 MHz PPC970 */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      1
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         6
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        46
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     14
+#define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_2_PI2_THRESHOLD              12
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           90
+
+#define MUL_TOOM22_THRESHOLD                16
+#define MUL_TOOM33_THRESHOLD                57
+#define MUL_TOOM44_THRESHOLD                94
+#define MUL_TOOM6H_THRESHOLD               125
+#define MUL_TOOM8H_THRESHOLD               187
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      99
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      61
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      70
+
+#define SQR_BASECASE_THRESHOLD               4
+#define SQR_TOOM2_THRESHOLD                 30
+#define SQR_TOOM3_THRESHOLD                 98
+#define SQR_TOOM4_THRESHOLD                136
+#define SQR_TOOM6_THRESHOLD                180
+#define SQR_TOOM8_THRESHOLD                272
+
+#define MULMID_TOOM42_THRESHOLD             34
+
+#define MULMOD_BNM1_THRESHOLD               12
+#define SQRMOD_BNM1_THRESHOLD               13
+
+#define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    244, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {     15, 7}, {      8, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
+    {      9, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 7}, {     29, 8}, {     19, 9}, {     11, 8}, \
+    {     27,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
+    {     19, 8}, {     39, 9}, {     23, 8}, {     47, 9}, \
+    {     27,10}, {     15, 9}, {     39,10}, {     23, 9}, \
+    {     47,11}, {     15,10}, {     31, 9}, {     67,10}, \
+    {     39, 9}, {     83,10}, {     47, 9}, {     95, 8}, \
+    {    191, 9}, {     99,10}, {     55,11}, {     31,10}, \
+    {     63, 9}, {    127, 8}, {    255,10}, {     71, 9}, \
+    {    143, 8}, {    287,10}, {     79, 9}, {    159, 8}, \
+    {    319,11}, {     47,10}, {     95, 9}, {    191, 8}, \
+    {    383,10}, {    103,12}, {     31,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    511,10}, {    143, 9}, \
+    {    287,11}, {     79,10}, {    159, 9}, {    319, 8}, \
+    {    639,10}, {    175, 9}, {    351, 8}, {    703,11}, \
+    {     95,10}, {    191, 9}, {    383, 8}, {    767,10}, \
+    {    207, 9}, {    415,10}, {    223, 9}, {    447,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
+    {    143,10}, {    287, 9}, {    575, 8}, {   1151,11}, \
+    {    159,10}, {    319, 9}, {    639,11}, {    175,10}, \
+    {    351, 9}, {    703,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767,11}, {    207,10}, {    415, 9}, \
+    {    831,11}, {    223,10}, {    447,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 106
+#define MUL_FFT_THRESHOLD                 2688
+
+#define SQR_FFT_MODF_THRESHOLD             212  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    212, 5}, {     13, 6}, {     15, 7}, {      8, 6}, \
+    {     17, 7}, {      9, 6}, {     19, 7}, {     13, 8}, \
+    {      7, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
+    {     11, 7}, {     23, 8}, {     13, 7}, {     27, 9}, \
+    {      7, 8}, {     21, 9}, {     11, 8}, {     25,10}, \
+    {      7, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
+    {     15,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
+    {     67,10}, {     39, 9}, {     79, 8}, {    159,10}, \
+    {     47, 9}, {     95, 8}, {    191,11}, {     31,10}, \
+    {     63, 9}, {    127, 8}, {    255,10}, {     71, 9}, \
+    {    143, 8}, {    287,10}, {     79, 9}, {    159, 8}, \
+    {    319,11}, {     47, 9}, {    191, 8}, {    383,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
+    {    511,10}, {    143, 9}, {    287, 8}, {    575,11}, \
+    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
+    {    175, 9}, {    351, 8}, {    703,10}, {    191, 9}, \
+    {    383, 8}, {    767,10}, {    207, 9}, {    415,11}, \
+    {    111,10}, {    223,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,10}, {    351, 9}, {    703, 8}, {   1407,11}, \
+    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
+    {    415,11}, {    223,10}, {    447,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 102
+#define SQR_FFT_THRESHOLD                 1984
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  55
+#define MULLO_MUL_N_THRESHOLD             5240
+
+#define DC_DIV_QR_THRESHOLD                 27
+#define DC_DIVAPPR_Q_THRESHOLD             108
+#define DC_BDIV_QR_THRESHOLD                51
+#define DC_BDIV_Q_THRESHOLD                126
+
+#define INV_MULMOD_BNM1_THRESHOLD           38
+#define INV_NEWTON_THRESHOLD               129
+#define INV_APPR_THRESHOLD                 116
+
+#define BINV_NEWTON_THRESHOLD              198
+#define REDC_1_TO_REDC_N_THRESHOLD          51
+
+#define MU_DIV_QR_THRESHOLD                807
+#define MU_DIVAPPR_Q_THRESHOLD             807
+#define MUPI_DIV_QR_THRESHOLD               54
+#define MU_BDIV_QR_THRESHOLD               748
+#define MU_BDIV_Q_THRESHOLD                872
+
+#define POWM_SEC_TABLE  4,35,152,780,2145
+
+#define MATRIX22_STRASSEN_THRESHOLD         11
+#define HGCD_THRESHOLD                     104
+#define HGCD_APPR_THRESHOLD                118
+#define HGCD_REDUCE_THRESHOLD             1329
+#define GCD_DC_THRESHOLD                   268
+#define GCDEXT_DC_THRESHOLD                241
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                 9
+#define GET_STR_PRECOMPUTE_THRESHOLD        18
+#define SET_STR_DC_THRESHOLD               996
+#define SET_STR_PRECOMPUTE_THRESHOLD      2170
+
+#define FAC_DSC_THRESHOLD                  442
+#define FAC_ODD_THRESHOLD                   26
diff --git a/mpn/powerpc64/mode32/sqr_diagonal.asm b/mpn/powerpc64/mode32/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..01cfa9d
--- /dev/null
+++ b/mpn/powerpc64/mode32/sqr_diagonal.asm
@@ -0,0 +1,107 @@
+dnl  PowerPC-64 mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002, 2003, 2005, 2006, 20010 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C POWER3/PPC630                18
+C POWER4/PPC970                 ?
+C POWER5                7.25
+C POWER6                9.5
+
+C INPUT PARAMETERS
+define(`rp',  r3)
+define(`up',  r4)
+define(`n',   r5)
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  n, n, 0, 32')           C zero extend n
+
+       rldicl. r0, n, 0,62             C r0 = n & 3, set cr0
+       addi    n, n, 3                 C compute count...
+       cmpdi   cr6, r0, 2
+       srdi    n, n, 2                 C ...for ctr
+       mtctr   n                       C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       beq     cr6, L(b10)
+
+L(b11):        ld      r0, 0(up)
+       ld      r10, 8(up)
+       ld      r12, 16(up)
+       addi    rp, rp, -16
+       mulld   r7, r0, r0
+       mulhdu  r8, r0, r0
+       mulld   r9, r10, r10
+       mulhdu  r10, r10, r10
+       mulld   r11, r12, r12
+       mulhdu  r12, r12, r12
+       addi    up, up, 24
+       b       L(11)
+
+       ALIGN(16)
+L(b01):        ld      r0, 0(up)
+       addi    rp, rp, -48
+       addi    up, up, 8
+       mulld   r11, r0, r0
+       mulhdu  r12, r0, r0
+       b       L(01)
+
+       ALIGN(16)
+L(b10):        ld      r0, 0(up)
+       ld      r12, 8(up)
+       addi    rp, rp, -32
+       addi    up, up, 16
+       mulld   r9, r0, r0
+       mulhdu  r10, r0, r0
+       mulld   r11, r12, r12
+       mulhdu  r12, r12, r12
+       b       L(10)
+
+       ALIGN(32)
+L(b00):
+L(top):        ld      r0, 0(up)
+       ld      r8, 8(up)
+       ld      r10, 16(up)
+       ld      r12, 24(up)
+       mulld   r5, r0, r0
+       mulhdu  r6, r0, r0
+       mulld   r7, r8, r8
+       mulhdu  r8, r8, r8
+       mulld   r9, r10, r10
+       mulhdu  r10, r10, r10
+       mulld   r11, r12, r12
+       mulhdu  r12, r12, r12
+       addi    up, up, 32
+       std     r5, 0(rp)
+       std     r6, 8(rp)
+L(11): std     r7, 16(rp)
+       std     r8, 24(rp)
+L(10): std     r9, 32(rp)
+       std     r10, 40(rp)
+L(01): std     r11, 48(rp)
+       std     r12, 56(rp)
+       addi    rp, rp, 64
+       bdnz    L(top)
+
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/addlsh1_n.asm b/mpn/powerpc64/mode64/addlsh1_n.asm

deleted file mode 100644 (file)

index 15182e1..0000000
--- a/mpn/powerpc64/mode64/addlsh1_n.asm
+++ /dev/null
@@ -1,82 +0,0 @@
-dnl  PowerPC-64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-
-dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C              cycles/limb
-C POWER3/PPC630:     2         (1.5 c/l should be possible)
-C POWER4/PPC970:     4         (2.0 c/l should be possible)
-
-C INPUT PARAMETERS
-C rp   r3
-C up   r4
-C vp   r5
-C n    r6
-
-define(`rp',`r3')
-define(`up',`r4')
-define(`vp',`r5')
-
-define(`s0',`r6')
-define(`s1',`r7')
-define(`u0',`r8')
-define(`v0',`r10')
-define(`v1',`r11')
-
-ASM_START()
-PROLOGUE(mpn_addlsh1_n)
-       mtctr   r6              C copy n in ctr
-       addic   r31, r31, 0     C clear cy
-
-       ld      v0, 0(vp)       C load v limb
-       ld      u0, 0(up)       C load u limb
-       addi    up, up, -8      C update up
-       addi    rp, rp, -8      C update rp
-       sldi    s1, v0, 1
-       bdz     L(end)          C If done, skip loop
-
-L(oop):        ld      v1, 8(vp)       C load v limb
-       adde    s1, s1, u0      C add limbs with cy, set cy
-       std     s1, 8(rp)       C store result limb
-       srdi    s0, v0, 63      C shift down previous v limb
-       ldu     u0, 16(up)      C load u limb and update up
-       rldimi  s0, v1, 1, 0    C left shift v limb and merge with prev v limb
-
-       bdz     L(exit)         C decrement ctr and exit if done
-
-       ldu     v0, 16(vp)      C load v limb and update vp
-       adde    s0, s0, u0      C add limbs with cy, set cy
-       stdu    s0, 16(rp)      C store result limb and update rp
-       srdi    s1, v1, 63      C shift down previous v limb
-       ld      u0, 8(up)       C load u limb
-       rldimi  s1, v0, 1, 0    C left shift v limb and merge with prev v limb
-
-       bdnz    L(oop)          C decrement ctr and loop back
-
-L(end):        adde    r7, s1, u0
-       std     r7, 8(rp)       C store last result limb
-       srdi    r3, v0, 63
-       addze   r3, r3
-       blr
-L(exit):       adde    r7, s0, u0
-       std     r7, 16(rp)      C store last result limb
-       srdi    r3, v1, 63
-       addze   r3, r3
-       blr
-EPILOGUE()
diff --git a/mpn/powerpc64/mode64/addmul_1.asm b/mpn/powerpc64/mode64/addmul_1.asm

deleted file mode 100644 (file)

index cadab3a..0000000
--- a/mpn/powerpc64/mode64/addmul_1.asm
+++ /dev/null
@@ -1,185 +0,0 @@
-dnl  PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add
-dnl  the result to a second limb vector.
-
-dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
-dnl  Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C              cycles/limb
-C POWER3/PPC630:    6-18
-C POWER4/PPC970:     8
-C POWER5:            8
-
-C TODO
-C  * Reduce the number of registers used.  Some mul destination registers could
-C    be coalesced.
-C  * Delay std for preserving registers, and suppress them for n=1.
-C  * Write faster feed-in code.  If nothing else, avoid one or two up updates.
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`n', `r5')
-define(`vl', `r6')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-       std     r31, -8(r1)
-       std     r30, -16(r1)
-       std     r29, -24(r1)
-       std     r28, -32(r1)
-       std     r27, -40(r1)
-       std     r26, -48(r1)
-
-       rldicl. r0, n, 0,62     C r0 = n & 3, set cr0
-       cmpdi   cr6, r0, 2
-       addi    n, n, 3         C compute count...
-       srdi    n, n, 2         C ...for ctr
-       mtctr   n               C copy count into ctr
-       beq     cr0, L(b00)
-       blt     cr6, L(b01)
-       beq     cr6, L(b10)
-
-L(b11):        ld      r26, 0(up)
-       ld      r28, 0(rp)
-       addi    up, up, 8
-       nop
-       mulld   r0, r26, r6
-       mulhdu  r12, r26, r6
-       addc    r0, r0, r28
-       std     r0, 0(rp)
-       addi    rp, rp, 8
-       b       L(fic)
-
-L(b00):        ld      r26, 0(up)
-       ld      r27, 8(up)
-       ld      r28, 0(rp)
-       ld      r29, 8(rp)
-       addi    up, up, 16
-       nop
-       mulld   r0, r26, r6
-       mulhdu  r5, r26, r6
-       mulld   r7, r27, r6
-       mulhdu  r8, r27, r6
-       addc    r7, r7, r5
-       addze   r12, r8
-       addc    r0, r0, r28
-       std     r0, 0(rp)
-       adde    r7, r7, r29
-       std     r7, 8(rp)
-       addi    rp, rp, 16
-       b       L(fic)
-
-L(b01):        bdnz    L(gt1)
-       ld      r26, 0(up)
-       ld      r28, 0(rp)
-       mulld   r0, r26, r6
-       mulhdu  r8, r26, r6
-       addc    r0, r0, r28
-       std     r0, 0(rp)
-       b       L(ret)
-L(gt1):        ld      r26, 0(up)
-       ld      r27, 8(up)
-       mulld   r0, r26, r6
-       mulhdu  r5, r26, r6
-       ld      r26, 16(up)
-       ld      r28, 0(rp)
-       mulld   r7, r27, r6
-       mulhdu  r8, r27, r6
-       ld      r29, 8(rp)
-       ld      r30, 16(rp)
-       mulld   r9, r26, r6
-       mulhdu  r10, r26, r6
-       addc    r7, r7, r5
-       adde    r9, r9, r8
-       addze   r12, r10
-       addc    r0, r0, r28
-       std     r0, 0(rp)
-       adde    r7, r7, r29
-       std     r7, 8(rp)
-       adde    r9, r9, r30
-       std     r9, 16(rp)
-       addi    up, up, 24
-       addi    rp, rp, 24
-       b       L(fic)
-
-L(b10):        addic   r0, r0, 0
-       li      r12, 0          C cy_limb = 0
-L(fic):        ld      r26, 0(up)
-       ld      r27, 8(up)
-       addi    up, up, 16
-       bdz     L(end)
-                               C registers dying
-L(top):        mulld   r0, r26, r6     C
-       mulhdu  r5, r26, r6     C 26
-       ld      r26, 0(up)      C
-       ld      r28, 0(rp)      C
-       mulld   r7, r27, r6     C
-       mulhdu  r8, r27, r6     C 27
-       ld      r27, 8(up)      C
-       ld      r29, 8(rp)      C
-       adde    r0, r0, r12     C 0 12
-       adde    r7, r7, r5      C 5 7
-       mulld   r9, r26, r6     C
-       mulhdu  r10, r26, r6    C 26
-       ld      r26, 16(up)     C
-       ld      r30, 16(rp)     C
-       mulld   r11, r27, r6    C
-       mulhdu  r12, r27, r6    C 27
-       ld      r27, 24(up)     C
-       ld      r31, 24(rp)     C
-       adde    r9, r9, r8      C 8 9
-       adde    r11, r11, r10   C 10 11
-       addze   r12, r12        C 12
-       addc    r0, r0, r28     C 0 28
-       std     r0, 0(rp)       C 0
-       adde    r7, r7, r29     C 7 29
-       std     r7, 8(rp)       C 7
-       adde    r9, r9, r30     C 9 30
-       std     r9, 16(rp)      C 9
-       adde    r11, r11, r31   C 11 31
-       std     r11, 24(rp)     C 11
-       addi    up, up, 32      C
-       addi    rp, rp, 32      C
-       bdnz    L(top)          C
-
-L(end):        mulld   r0, r26, r6
-       mulhdu  r5, r26, r6
-       ld      r28, 0(rp)
-       nop
-       mulld   r7, r27, r6
-       mulhdu  r8, r27, r6
-       ld      r29, 8(rp)
-       nop
-       adde    r0, r0, r12
-       adde    r7, r7, r5
-       addze   r8, r8
-       addc    r0, r0, r28
-       std     r0, 0(rp)
-       adde    r7, r7, r29
-       std     r7, 8(rp)
-L(ret):        addze   r3, r8
-       ld      r31, -8(r1)
-       ld      r30, -16(r1)
-       ld      r29, -24(r1)
-       ld      r28, -32(r1)
-       ld      r27, -40(r1)
-       ld      r26, -48(r1)
-       blr
-EPILOGUE()
diff --git a/mpn/powerpc64/mode64/aors_n.asm b/mpn/powerpc64/mode64/aors_n.asm

index 42b6d79472cc507647dc4a6603828ac310d2c404..8c30871c2dd1642684c5e8c0aaca393ed3fbd588 100644 (file)
--- a/mpn/powerpc64/mode64/aors_n.asm
+++ b/mpn/powerpc64/mode64/aors_n.asm
@@ -1,6 +1,6 @@
  dnl  PowerPC-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
  
-dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007 Free Software
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011 Free Software
  dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
@@ -20,37 +20,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     1.5
-C POWER4/PPC970:     2
-
-C   n     POWER3/PPC630   POWER4/PPC970
-C     1               17.00           19.00
-C     2                9.00           10.49
-C     3                5.33            7.66
-C     4                4.50            5.14
-C     5                4.20            4.80
-C     6                3.83            4.33
-C     7                3.00            3.99
-C     8                2.87            3.55
-C     9                2.89            3.40
-C    10                2.60            3.42
-C    11                2.45            3.15
-C    12                2.41            2.99
-C    13                2.46            3.01
-C    14                2.42            2.97
-C    15                2.20            2.85
-C    50                1.78            2.44
-C   100                1.83            2.20
-C   200                1.55            2.12
-C   400                1.53            2.05
-C  1000                1.98            2.02#
-C  2000                1.50#           2.04
-C  4000                2.55            2.50
-C  8000                2.70            2.45
-C 16000                2.65            5.94
-C 32000                2.62           16.41
-C 64000                2.73           18.94
+C                   cycles/limb
+C POWER3/PPC630          1.5
+C POWER4/PPC970          2
+C POWER5                 2
+C POWER6                 2.63
+C POWER7               2.25-2.87
  
  C This code is a little bit slower for POWER3/PPC630 than the simple code used
  C previously, but it is much faster for POWER4/PPC970.  The reason for the
@@ -162,7 +137,8 @@ L(go):      ld      r6, 0(r4)       C load s1 limb
         addi    r4, r4, 32
         addi    r5, r5, 32
  
-L(oop):        ADDSUBC r28, r7, r6
+       ALIGN(16)
+L(top):        ADDSUBC r28, r7, r6
         ld      r6, 0(r4)       C load s1 limb
         ld      r7, 0(r5)       C load s2 limb
         ADDSUBC r29, r9, r8
@@ -181,7 +157,7 @@ L(oop):     ADDSUBC r28, r7, r6
         std     r30, 16(r3)
         std     r31, 24(r3)
         addi    r3, r3, 32
-       bdnz    L(oop)          C decrement ctr and loop back
+       bdnz    L(top)          C decrement ctr and loop back
  
  L(end):        ADDSUBC r28, r7, r6
         ADDSUBC r29, r9, r8
diff --git a/mpn/powerpc64/mode64/aorscnd_n.asm b/mpn/powerpc64/mode64/aorscnd_n.asm

new file mode 100644 (file)

index 0000000..ad143b6
--- /dev/null
+++ b/mpn/powerpc64/mode64/aorscnd_n.asm
@@ -0,0 +1,185 @@
+dnl  PowerPC-64 mpn_addcnd_n/mpn_subcnd_n.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011, 2012 Free
+dnl  Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970          2.25
+C POWER5                 ?
+C POWER6                 3
+C POWER7                 ?
+
+C INPUT PARAMETERS
+define(`rp',   `r3')
+define(`up',   `r4')
+define(`vp',   `r5')
+define(`n',    `r6')
+define(`cnd',  `r7')
+
+ifdef(`OPERATION_addcnd_n',`
+  define(ADDSUBC,      adde)
+  define(ADDSUB,       addc)
+  define(func,         mpn_addcnd_n)
+  define(GENRVAL,      `addi   r3, r3, 1')
+  define(SETCBR,       `addic  r0, $1, -1')
+  define(CLRCB,                `addic  r0, r0, 0')
+')
+ifdef(`OPERATION_subcnd_n',`
+  define(ADDSUBC,      subfe)
+  define(ADDSUB,       subfc)
+  define(func,         mpn_subcnd_n)
+  define(GENRVAL,      `neg    r3, r3')
+  define(SETCBR,       `subfic r0, $1, 0')
+  define(CLRCB,                `addic  r0, r1, -1')
+')
+
+MULFUNC_PROLOGUE(mpn_addcnd_n mpn_subcnd_n)
+
+ASM_START()
+PROLOGUE(func)
+       std     r31, -8(r1)
+       std     r30, -16(r1)
+       std     r29, -24(r1)
+       std     r28, -32(r1)
+       std     r27, -40(r1)
+
+       subfic  cnd, cnd, 0
+       subfe   cnd, cnd, cnd
+
+       rldicl. r0, r6, 0,62    C r0 = n & 3, set cr0
+       cmpdi   cr6, r0, 2
+       addi    r6, r6, 3       C compute count...
+       srdi    r6, r6, 2       C ...for ctr
+       mtctr   r6              C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       beq     cr6, L(b10)
+
+L(b11):        ld      r8, 0(up)       C load s1 limb
+       ld      r9, 0(vp)       C load s2 limb
+       ld      r10, 8(up)      C load s1 limb
+       ld      r11, 8(vp)      C load s2 limb
+       ld      r12, 16(up)     C load s1 limb
+       addi    up, up, 24
+       ld      r0, 16(vp)      C load s2 limb
+       addi    vp, vp, 24
+       and     r9, r9, cnd
+       and     r11, r11, cnd
+       and     r0, r0, cnd
+       ADDSUB  r29, r9, r8
+       ADDSUBC r30, r11, r10
+       ADDSUBC r31, r0, r12
+       std     r29, 0(rp)
+       std     r30, 8(rp)
+       std     r31, 16(rp)
+       addi    rp, rp, 24
+       bdnz    L(go)
+       b       L(ret)
+
+L(b01):        ld      r12, 0(up)      C load s1 limb
+       addi    up, up, 8
+       ld      r0, 0(vp)       C load s2 limb
+       addi    vp, vp, 8
+       and     r0, r0, cnd
+       ADDSUB  r31, r0, r12    C add
+       std     r31, 0(rp)
+       addi    rp, rp, 8
+       bdnz    L(go)
+       b       L(ret)
+
+L(b10):        ld      r10, 0(up)      C load s1 limb
+       ld      r11, 0(vp)      C load s2 limb
+       ld      r12, 8(up)      C load s1 limb
+       addi    up, up, 16
+       ld      r0, 8(vp)       C load s2 limb
+       addi    vp, vp, 16
+       and     r11, r11, cnd
+       and     r0, r0, cnd
+       ADDSUB  r30, r11, r10   C add
+       ADDSUBC r31, r0, r12    C add
+       std     r30, 0(rp)
+       std     r31, 8(rp)
+       addi    rp, rp, 16
+       bdnz    L(go)
+       b       L(ret)
+
+L(b00):        CLRCB                   C clear/set cy
+L(go): ld      r6, 0(up)       C load s1 limb
+       ld      r27, 0(vp)      C load s2 limb
+       ld      r8, 8(up)       C load s1 limb
+       ld      r9, 8(vp)       C load s2 limb
+       ld      r10, 16(up)     C load s1 limb
+       ld      r11, 16(vp)     C load s2 limb
+       ld      r12, 24(up)     C load s1 limb
+       ld      r0, 24(vp)      C load s2 limb
+       and     r27, r27, cnd
+       and     r9, r9, cnd
+       and     r11, r11, cnd
+       and     r0, r0, cnd
+       bdz     L(end)
+
+       addi    up, up, 32
+       addi    vp, vp, 32
+
+L(top):        ADDSUBC r28, r27, r6
+       ld      r6, 0(up)       C load s1 limb
+       ld      r27, 0(vp)      C load s2 limb
+       ADDSUBC r29, r9, r8
+       ld      r8, 8(up)       C load s1 limb
+       ld      r9, 8(vp)       C load s2 limb
+       ADDSUBC r30, r11, r10
+       ld      r10, 16(up)     C load s1 limb
+       ld      r11, 16(vp)     C load s2 limb
+       ADDSUBC r31, r0, r12
+       ld      r12, 24(up)     C load s1 limb
+       ld      r0, 24(vp)      C load s2 limb
+       std     r28, 0(rp)
+       addi    up, up, 32
+       std     r29, 8(rp)
+       addi    vp, vp, 32
+       std     r30, 16(rp)
+       std     r31, 24(rp)
+       addi    rp, rp, 32
+       and     r27, r27, cnd
+       and     r9, r9, cnd
+       and     r11, r11, cnd
+       and     r0, r0, cnd
+       bdnz    L(top)          C decrement ctr and loop back
+
+L(end):        ADDSUBC r28, r27, r6
+       ADDSUBC r29, r9, r8
+       ADDSUBC r30, r11, r10
+       ADDSUBC r31, r0, r12
+       std     r28, 0(rp)
+       std     r29, 8(rp)
+       std     r30, 16(rp)
+       std     r31, 24(rp)
+
+L(ret):        ld      r31, -8(r1)
+       ld      r30, -16(r1)
+       ld      r29, -24(r1)
+       ld      r28, -32(r1)
+       ld      r27, -40(r1)
+
+       subfe   r3, r0, r0      C -cy
+       GENRVAL
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/aorslsh1_n.asm b/mpn/powerpc64/mode64/aorslsh1_n.asm

new file mode 100644 (file)

index 0000000..0021309
--- /dev/null
+++ b/mpn/powerpc64/mode64/aorslsh1_n.asm
@@ -0,0 +1,44 @@
+dnl  PowerPC-64 mpn_addlsh1_n and mpn_sublsh1_n.
+
+dnl  Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH,            1)
+define(RSH,            63)
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADDSUBC,      addc)
+  define(ADDSUBE,      adde)
+  define(INITCY,       `addic  $1, r1, 0')
+  define(RETVAL,       `addze  r3, $1')
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+  define(ADDSUBC,      subfc)
+  define(ADDSUBE,      subfe)
+  define(INITCY,       `addic  $1, r1, -1')
+  define(RETVAL,       `subfze r3, $1
+                       neg     r3, r3')
+  define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+include_mpn(`powerpc64/mode64/aorslshC_n.asm')
diff --git a/mpn/powerpc64/mode64/aorslsh2_n.asm b/mpn/powerpc64/mode64/aorslsh2_n.asm

new file mode 100644 (file)

index 0000000..99de79c
--- /dev/null
+++ b/mpn/powerpc64/mode64/aorslsh2_n.asm
@@ -0,0 +1,44 @@
+dnl  PowerPC-64 mpn_addlsh2_n and mpn_sublsh2_n.
+
+dnl  Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH,            2)
+define(RSH,            62)
+
+ifdef(`OPERATION_addlsh2_n',`
+  define(ADDSUBC,      addc)
+  define(ADDSUBE,      adde)
+  define(INITCY,       `addic  $1, r1, 0')
+  define(RETVAL,       `addze  r3, $1')
+  define(func, mpn_addlsh2_n)
+')
+ifdef(`OPERATION_sublsh2_n',`
+  define(ADDSUBC,      subfc)
+  define(ADDSUBE,      subfe)
+  define(INITCY,       `addic  $1, r1, -1')
+  define(RETVAL,       `subfze r3, $1
+                       neg     r3, r3')
+  define(func, mpn_sublsh2_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
+
+include_mpn(`powerpc64/mode64/aorslshC_n.asm')
diff --git a/mpn/powerpc64/mode64/aorslshC_n.asm b/mpn/powerpc64/mode64/aorslshC_n.asm

new file mode 100644 (file)

index 0000000..3776d3e
--- /dev/null
+++ b/mpn/powerpc64/mode64/aorslshC_n.asm
@@ -0,0 +1,156 @@
+dnl  PowerPC-64 mpn_addlshC_n and mpn_sublshC_n, where C is a small constant.
+
+dnl  Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+C                  cycles/limb
+C POWER3/PPC630          1.83   (1.5 c/l should be possible)
+C POWER4/PPC970          3      (2.0 c/l should be possible)
+C POWER5                 3
+C POWER6              3.5-47
+C POWER7                 3
+
+C STATUS
+C  * Try combining upx+up, and vpx+vp.
+C  * The worst case 47 c/l for POWER6 happens if the 3rd operand for ldx is
+C    greater than the 2nd operand.  Yes, this addition is non-commutative wrt
+C    performance.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`vp', `r5')
+define(`n',  `r6')
+
+define(`rpx', `r6')
+define(`upx', `r7')
+define(`vpx', `r12')
+
+define(`s0', `r0')  define(`s1', `r9')
+define(`u0', `r8')
+define(`v0', `r10') define(`v1', `r11')
+
+
+ASM_START()
+PROLOGUE(func)
+       cmpldi  cr0, n, 13
+       bgt     L(big)
+
+       mtctr   n               C copy n in ctr
+       INITCY( r0)             C clear cy
+
+       ld      v0, 0(vp)       C load v limb
+       ld      u0, 0(up)       C load u limb
+       addi    up, up, -8      C update up
+       addi    rp, rp, -8      C update rp
+       sldi    s1, v0, LSH
+       bdz     L(ex1)          C If done, skip loop
+
+       ALIGN(16)
+L(lo0):        ld      v1, 8(vp)       C load v limb
+       ADDSUBE s1, s1, u0      C add limbs with cy, set cy
+       ldu     u0, 16(up)      C load u limb and update up
+       srdi    s0, v0, RSH     C shift down previous v limb
+       std     s1, 8(rp)       C store result limb
+       rldimi  s0, v1, LSH, 0  C left shift v limb and merge with prev v limb
+       bdz     L(ex0)          C decrement ctr and exit if done
+       ldu     v0, 16(vp)      C load v limb and update vp
+       ADDSUBE s0, s0, u0      C add limbs with cy, set cy
+       ld      u0, 8(up)       C load u limb
+       srdi    s1, v1, RSH     C shift down previous v limb
+       stdu    s0, 16(rp)      C store result limb and update rp
+       rldimi  s1, v0, LSH, 0  C left shift v limb and merge with prev v limb
+       bdnz    L(lo0)          C decrement ctr and loop back
+
+L(ex1):        ADDSUBE r7, s1, u0
+       std     r7, 8(rp)       C store last result limb
+       srdi    r0, v0, RSH
+       RETVAL( r0)
+       blr
+L(ex0):        ADDSUBE r7, s0, u0
+       std     r7, 16(rp)      C store last result limb
+       srdi    r0, v1, RSH
+       RETVAL( r0)
+       blr
+
+
+L(big):        rldicl. r0, n, 0,63     C r0 = n & 1, set cr0
+       addi    r6, n, -1       C ...for ctr
+       srdi    r6, r6, 1       C ...for ctr
+       mtctr   r6              C copy count into ctr
+       beq     cr0, L(b0)
+
+L(b1): ld      v1, 0(vp)
+       ld      u0, 0(up)
+       sldi    s1, v1, LSH
+       srdi    s0, v1, RSH
+       ld      v0, 8(vp)
+       ADDSUBC s1, s1, u0      C add limbs without cy, set cy
+       addi    rpx, rp, -16
+       addi    rp, rp, -8
+       sub     upx, up, rp
+       sub     vpx, vp, rp
+       sub     up, up, rpx
+       sub     vp, vp, rpx
+       addi    up, up, 8
+       addi    upx, upx, 16
+       addi    vp, vp, 16
+       addi    vpx, vpx, 24
+       b       L(mid)
+
+L(b0): ld      v0, 0(vp)
+       ld      u0, 0(up)
+       sldi    s0, v0, LSH
+       srdi    s1, v0, RSH
+       ld      v1, 8(vp)
+       ADDSUBC s0, s0, u0      C add limbs without cy, set cy
+       addi    rpx, rp, -8
+       addi    rp, rp, -16
+       sub     upx, up, rpx
+       sub     vpx, vp, rpx
+       sub     up, up, rp
+       sub     vp, vp, rp
+       addi    up, up, 8
+       addi    upx, upx, 16
+       addi    vp, vp, 16
+       addi    vpx, vpx, 24
+
+       ALIGN(32)
+L(top):        ldx     u0, rp, up
+       ldx     v0, rp, vp
+       rldimi  s1, v1, LSH, 0
+       stdu    s0, 16(rp)
+       srdi    s0, v1, RSH
+       ADDSUBE s1, s1, u0      C add limbs with cy, set cy
+L(mid):        ldx     u0, rpx, upx
+       ldx     v1, rpx, vpx
+       rldimi  s0, v0, LSH, 0
+       stdu    s1, 16(rpx)
+       srdi    s1, v0, RSH
+       ADDSUBE s0, s0, u0      C add limbs with cy, set cy
+       bdnz    L(top)          C decrement CTR and loop back
+
+       ldx     u0, rp, up
+       rldimi  s1, v1, LSH, 0
+       std     s0, 16(rp)
+       srdi    s0, v1, RSH
+       ADDSUBE s1, s1, u0      C add limbs with cy, set cy
+       std     s1, 24(rp)
+
+       RETVAL( r0)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/aorsmul_1.asm b/mpn/powerpc64/mode64/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..2b5432d
--- /dev/null
+++ b/mpn/powerpc64/mode64/aorsmul_1.asm
@@ -0,0 +1,215 @@
+dnl  PowerPC-64 mpn_addmul_1 and mpn_submul_1.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2010, 2011, 2012
+dnl  Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               mpn_addmul_1    mpn_submul_1
+C               cycles/limb     cycles/limb
+C POWER3/PPC630   6-18             6-18
+C POWER4/PPC970    8                8.3
+C POWER5           8                8.25
+C POWER6          16.25            16.75
+C POWER7           3.77             4.9
+
+C TODO
+C  * Try to reduce the number of needed live registers
+C  * Add support for _1c entry points
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n',  `r5')
+define(`vl', `r6')
+
+ifdef(`OPERATION_addmul_1',`
+  define(ADDSUBC,      adde)
+  define(ADDSUB,       addc)
+  define(func,         mpn_addmul_1)
+  define(func_nc,      mpn_addmul_1c)  C FIXME: not really supported
+  define(SM,           `')
+')
+ifdef(`OPERATION_submul_1',`
+  define(ADDSUBC,      subfe)
+  define(ADDSUB,       subfc)
+  define(func,         mpn_submul_1)
+  define(func_nc,      mpn_submul_1c)  C FIXME: not really supported
+  define(SM,           `$1')
+')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+PROLOGUE(func)
+       std     r31, -8(r1)
+       rldicl. r0, n, 0,62     C r0 = n & 3, set cr0
+       std     r30, -16(r1)
+       cmpdi   cr6, r0, 2
+       std     r29, -24(r1)
+       addi    n, n, 3         C compute count...
+       std     r28, -32(r1)
+       srdi    n, n, 2         C ...for ctr
+       std     r27, -40(r1)
+       mtctr   n               C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       beq     cr6, L(b10)
+
+L(b11):        ld      r9, 0(up)
+       ld      r28, 0(rp)
+       mulld   r0, r9, r6
+       mulhdu  r12, r9, r6
+       ADDSUB  r0, r0, r28
+       std     r0, 0(rp)
+       addi    rp, rp, 8
+       ld      r9, 8(up)
+       ld      r27, 16(up)
+       addi    up, up, 24
+SM(`   subfe   r11, r11, r11 ')
+       b       L(bot)
+
+       ALIGN(16)
+L(b00):        ld      r9, 0(up)
+       ld      r27, 8(up)
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       mulld   r0, r9, r6
+       mulhdu  r5, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       addc    r7, r7, r5
+       addze   r12, r8
+       ADDSUB  r0, r0, r28
+       std     r0, 0(rp)
+       ADDSUBC r7, r7, r29
+       std     r7, 8(rp)
+       addi    rp, rp, 16
+       ld      r9, 16(up)
+       ld      r27, 24(up)
+       addi    up, up, 32
+SM(`   subfe   r11, r11, r11 ')
+       b       L(bot)
+
+       ALIGN(16)
+L(b01):        bdnz    L(gt1)
+       ld      r9, 0(up)
+       ld      r11, 0(rp)
+       mulld   r0, r9, r6
+       mulhdu  r8, r9, r6
+       ADDSUB  r0, r0, r11
+       std     r0, 0(rp)
+SM(`   subfe   r11, r11, r11 ')
+SM(`   addic   r11, r11, 1 ')
+       addze   r3, r8
+       blr
+L(gt1):        ld      r9, 0(up)
+       ld      r27, 8(up)
+       mulld   r0, r9, r6
+       mulhdu  r5, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r9, 16(up)
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       ld      r30, 16(rp)
+       mulld   r11, r9, r6
+       mulhdu  r10, r9, r6
+       addc    r7, r7, r5
+       adde    r11, r11, r8
+       addze   r12, r10
+       ADDSUB  r0, r0, r28
+       std     r0, 0(rp)
+       ADDSUBC r7, r7, r29
+       std     r7, 8(rp)
+       ADDSUBC r11, r11, r30
+       std     r11, 16(rp)
+       addi    rp, rp, 24
+       ld      r9, 24(up)
+       ld      r27, 32(up)
+       addi    up, up, 40
+SM(`   subfe   r11, r11, r11 ')
+       b       L(bot)
+
+L(b10):        addic   r0, r0, 0
+       li      r12, 0          C cy_limb = 0
+       ld      r9, 0(up)
+       ld      r27, 8(up)
+       bdz     L(end)
+       addi    up, up, 16
+
+       ALIGN(16)
+L(top):        mulld   r0, r9, r6
+       mulhdu  r5, r9, r6      C 9
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6     C 27
+       ld      r9, 0(up)
+       ld      r28, 0(rp)
+       ld      r27, 8(up)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12     C 0 12
+       adde    r7, r7, r5      C 5 7
+       mulld   r5, r9, r6
+       mulhdu  r10, r9, r6     C 9
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6    C 27
+       ld      r9, 16(up)
+       ld      r30, 16(rp)
+       ld      r27, 24(up)
+       ld      r31, 24(rp)
+       adde    r5, r5, r8      C 8 5
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       ADDSUB  r0, r0, r28     C 0 28
+       std     r0, 0(rp)       C 0
+       ADDSUBC r7, r7, r29     C 7 29
+       std     r7, 8(rp)       C 7
+       ADDSUBC r5, r5, r30     C 5 30
+       std     r5, 16(rp)      C 5
+       ADDSUBC r11, r11, r31   C 11 31
+       std     r11, 24(rp)     C 11
+       addi    up, up, 32
+SM(`   subfe   r11, r11, r11 ')
+       addi    rp, rp, 32
+L(bot):
+SM(`   addic   r11, r11, 1 ')
+       bdnz    L(top)
+
+L(end):        mulld   r0, r9, r6
+       mulhdu  r5, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12
+       adde    r7, r7, r5
+       addze   r8, r8
+       ADDSUB  r0, r0, r28
+       std     r0, 0(rp)
+       ADDSUBC r7, r7, r29
+       std     r7, 8(rp)
+SM(`   subfe   r11, r11, r11 ')
+SM(`   addic   r11, r11, 1 ')
+       addze   r3, r8
+       ld      r31, -8(r1)
+       ld      r30, -16(r1)
+       ld      r29, -24(r1)
+       ld      r28, -32(r1)
+       ld      r27, -40(r1)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/bdiv_dbm1c.asm b/mpn/powerpc64/mode64/bdiv_dbm1c.asm

index 8c1e87e1eeaec7ef993e5da5eae090ed41601585..e88fc44402cfd443d80e9d014c74f2a46afe6b1e 100644 (file)
--- a/mpn/powerpc64/mode64/bdiv_dbm1c.asm
+++ b/mpn/powerpc64/mode64/bdiv_dbm1c.asm
@@ -1,6 +1,6 @@
  dnl  PPC64 mpn_bdiv_dbm1c.
  
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2010 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,10 +19,13 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:    6-18
-C POWER4/PPC970:    8.5
-C POWER5:           ?
+C                 cycles/limb
+C POWER3/PPC630       6-18
+C POWER4/PPC970       8.5?
+C POWER5              8.5  fluctuating as function of n % 3
+C POWER6             15
+C POWER6             15
+C POWER7              4.75
  
  C TODO
  C  * Nothing to do...
@@ -48,6 +51,7 @@ PROLOGUE(mpn_bdiv_dbm1c)
         blt     cr6, L(b01)
         beq     cr6, L(b10)
  
+       ALIGN(16)
  L(b11):        mulld   r5, r0, r6
         mulhdu  r12, r0, r6
         ld      r0, 8(r4)
@@ -55,13 +59,14 @@ L(b11):     mulld   r5, r0, r6
         addi    r3, r3, -24
         b       L(3)
  
+       ALIGN(16)
  L(b00):        mulld   r9, r0, r6
         mulhdu  r8, r0, r6
-       ld      r0, 8(r4)
         addi    r4, r4, -16
         addi    r3, r3, -16
         b       L(0)
  
+       ALIGN(16)
  L(b01):        mulld   r5, r0, r6
         mulhdu  r12, r0, r6
         addi    r3, r3, -8
@@ -70,42 +75,43 @@ L(b01):     mulld   r5, r0, r6
         addi    r4, r4, -8
         b       L(1)
  
+       ALIGN(16)
  L(b10):        mulld   r9, r0, r6
         mulhdu  r8, r0, r6
-       ld      r0, 8(r4)
         ble     cr7, L(e2)
  
         ALIGN(16)
-L(top):        mulld   r5, r0, r6
-       mulhdu  r12, r0, r6
-       subfc   r11, r9, r7
+L(top):        subfc   r11, r9, r7
+       ld      r10, 8(r4)
         ld      r0, 16(r4)
         subfe   r7, r8, r11
         std     r11, 0(r3)
+       mulld   r5, r10, r6
+       mulhdu  r12, r10, r6
  L(1):  mulld   r9, r0, r6
         mulhdu  r8, r0, r6
         subfc   r11, r5, r7
-       ld      r0, 24(r4)
         subfe   r7, r12, r11
         std     r11, 8(r3)
-L(0):  mulld   r5, r0, r6
-       mulhdu  r12, r0, r6
-       subfc   r11, r9, r7
+L(0):  subfc   r11, r9, r7
+       ld      r10, 24(r4)
         ld      r0, 32(r4)
         subfe   r7, r8, r11
         std     r11, 16(r3)
+       mulld   r5, r10, r6
+       mulhdu  r12, r10, r6
  L(3):  mulld   r9, r0, r6
         mulhdu  r8, r0, r6
         subfc   r11, r5, r7
-       ld      r0, 40(r4)
         subfe   r7, r12, r11
         std     r11, 24(r3)
         addi    r4, r4, 32
         addi    r3, r3, 32
         bdnz    L(top)
  
-L(e2): mulld   r5, r0, r6
-       mulhdu  r12, r0, r6
+L(e2): ld      r10, 8(r4)
+       mulld   r5, r10, r6
+       mulhdu  r12, r10, r6
         subfc   r11, r9, r7
         subfe   r7, r8, r11
         std     r11, 0(r3)
diff --git a/mpn/powerpc64/mode64/dive_1.asm b/mpn/powerpc64/mode64/dive_1.asm

index 1f482bae58b6edbd3f0b24c20fdf2e7affe039f9..0f94154bf6f396d3014a39e4e8b0312645e662ba 100644 (file)
--- a/mpn/powerpc64/mode64/dive_1.asm
+++ b/mpn/powerpc64/mode64/dive_1.asm
@@ -1,6 +1,6 @@
  dnl  PowerPC-64 mpn_divexact_1 -- mpn by limb exact division.
  
-dnl  Copyright 2006 Free Software Foundation, Inc.
+dnl  Copyright 2006, 2010 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,14 +19,16 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:    13-19
-C POWER4/PPC970:     16
-C POWER5:           16
+C                       cycles/limb
+C                       norm    unorm
+C POWER3/PPC630        13-19
+C POWER4/PPC970         16
+C POWER5                16      16
+C POWER6                37      46
+C POWER7                12      12
  
  C TODO
  C  * Check if n=1 code is really an improvement.  It probably isn't.
-C  * Perhaps remove L(norm) code, it is currently unreachable.
  C  * Make more similar to mode1o.asm.
  
  C INPUT PARAMETERS
@@ -61,7 +63,6 @@ L(7):
         mtctr   n
         LEA(    r5, binvert_limb_table)
         rldicl  r11, d, 63, 57
-C      cmpdi   cr7, r0, 0
         lbzx    r0, r5, r11
         mulld   r9, r0, r0
         sldi    r0, r0, 1
@@ -75,26 +76,27 @@ C   cmpdi   cr7, r0, 0
         sldi    r0, r0, 1
         mulld   r9, d, r9
         subf    r7, r9, r0              C r7 = 1/d mod 2^64
-C      beq     cr7, L(norm)
+       bne     cr0, L(norm)
         subfic  r8, r10, 64             C set carry as side effect
         li      r5, 0
+       srd     r11, r12, r10
  
         ALIGN(16)
  L(loop0):
-       srd     r11, r12, r10
         ld      r12, 8(up)
+       nop
         addi    up, up, 8
         sld     r0, r12, r8
         or      r11, r11, r0
         subfe   r9, r5, r11
+       srd     r11, r12, r10
         mulld   r0, r7, r9
+       mulhdu  r5, r0, d
         std     r0, 0(rp)
         addi    rp, rp, 8
-       mulhdu  r5, r0, d
         bdnz    L(loop0)
  
-       srd     r0, r12, r10
-       subfe   r0, r5, r0
+       subfe   r0, r5, r11
         mulld   r0, r7, r0
         std     r0, 0(rp)
         blr
@@ -102,14 +104,15 @@ L(loop0):
         ALIGN(16)
  L(norm):
         mulld   r11, r12, r7
+       mulhdu  r5, r11, d
         std     r11, 0(rp)
         ALIGN(16)
  L(loop1):
-       mulhdu  r5, r11, d
         ld      r9, 8(up)
         addi    up, up, 8
         subfe   r5, r5, r9
         mulld   r11, r7, r5
+       mulhdu  r5, r11, d      C result not used
         std     r11, 8(rp)
         addi    rp, rp, 8
         bdnz    L(loop1)
diff --git a/mpn/powerpc64/mode64/divrem_1.asm b/mpn/powerpc64/mode64/divrem_1.asm

index 895badfe61cb71c4a8f66fbf2d92403babf2895d..60f52904f0b9ac6ce3d0abf15cf3d5e9bcdd6d2f 100644 (file)
--- a/mpn/powerpc64/mode64/divrem_1.asm
+++ b/mpn/powerpc64/mode64/divrem_1.asm
@@ -1,6 +1,7 @@
  dnl  PowerPC-64 mpn_divrem_1 -- Divide an mpn number by an unnormalized limb.
  
-dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,11 +20,13 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C                          cycles/limb
-C                      norm    unorm   frac
-C POWER3/PPC630                16-34   16-34   ~11
-C POWER4/PPC970                 29              19
-C POWER5                29      29     ~20
+C                           cycles/limb
+C                       norm    unorm   frac
+C POWER3/PPC630         16-34   16-34   ~11   outdated figures
+C POWER4/PPC970          28      28      19
+C POWER5                 29      29     ~19
+C POWER6                 49      59     ~42
+C POWER7                 24.5    23     ~14
  
  C INPUT PARAMETERS
  C qp  = r3
@@ -110,23 +113,23 @@ L(71):
         sldi    r6, r6, 3
         ALIGN(16)
  L(uloop):
-       addi    r11, r31, 1
         ldx     r8, r26, r6
+       nop
         mulld   r0, r31, r3
         mulhdu  r10, r31, r3
-       addi    r6, r6, -8
+       addi    r11, r31, 1
         srd     r9, r8, r5
+       addi    r6, r6, -8
         or      r9, r7, r9
         addc    r0, r0, r9
         adde    r10, r10, r11
         mulld   r31, r10, r30
         subf    r31, r31, r9
-       subfc   r0, r0, r31     C r >= ql
-       subfe   r0, r0, r0      C r0 = -(r >= ql)
-       not     r7, r0
-       add     r10, r7, r10    C qh -= (r >= ql)
-       andc    r0, r30, r0
-       add     r31, r31, r0
+       subfc   r0, r31, r0     C r <= ql
+       subfe   r0, r0, r0      C r0 = -(r <= ql)
+       and     r9, r30, r0
+       add     r31, r31, r9
+       add     r10, r0, r10    C qh -= (r >= ql)
         cmpld   cr7, r31, r30
         bge-    cr7, L(164)
  L(123):
@@ -163,19 +166,19 @@ L(110):
  L(ufloop):
         addi    r11, r31, 1
         nop
-       mulld   r7, r3, r31
+       mulld   r0, r3, r31
         mulhdu  r10, r3, r31
         add     r10, r10, r11
         mulld   r31, r9, r10
  ifelse(0,1,`
-       subfc   r0, r7, r31
+       subfc   r0, r0, r31
         subfe   r0, r0, r0      C r0 = -(r >= ql)
         not     r7, r0
         add     r10, r7, r10    C qh -= (r >= ql)
         andc    r0, r30, r0
         add     r31, r31, r0
  ',`
-       cmpld   cr7, r31, r7
+       cmpld   cr7, r31, r0
         blt     cr7, L(29)
         add     r31, r30, r31
         addi    r10, r10, -1
@@ -216,12 +219,11 @@ L(162):
         and     r0, r0, r7
         subf    r31, r0, r31
  L(8):
-L(10):
         mr      r3, r30
         CALL(   mpn_invert_limb)
-       nop
+       li      r27, 0
         addic.  r6, r28, -1
-       blt-    cr0, L(150)
+       blt-    cr0, L(110)
         mtctr   r28
         sldi    r6, r6, 3
         ALIGN(16)
@@ -229,70 +231,25 @@ L(nloop):
         addi    r11, r31, 1
         ldx     r8, r26, r6
         mulld   r0, r31, r3
-       addi    r6, r6, -8
         mulhdu  r10, r31, r3
-       addc    r7, r0, r8
+       addi    r6, r6, -8
+       addc    r0, r0, r8
         adde    r10, r10, r11
         mulld   r31, r10, r30
         subf    r31, r31, r8    C r = nl - qh * d
-       subfc   r0, r7, r31     C r >= ql
-       subfe   r0, r0, r0      C r0 = -(r >= ql)
-       not     r7, r0
-       add     r10, r7, r10    C qh -= (r >= ql)
-       andc    r0, r30, r0
-       add     r31, r31, r0
+       subfc   r0, r31, r0     C r <= ql
+       subfe   r0, r0, r0      C r0 = -(r <= ql)
+       and     r9, r30, r0
+       add     r31, r31, r9
+       add     r10, r0, r10    C qh -= (r >= ql)
         cmpld   cr7, r31, r30
         bge-    cr7, L(167)
  L(51):
         std     r10, 0(r29)
         addi    r29, r29, -8
         bdnz    L(nloop)
+       b       L(110)
  
-L(150):
-       addic.  r9, r25, -1
-       blt-    cr0, L(152)
-       mtctr   r25
-       neg     r9, r30
-       ALIGN(16)
-L(nfloop):
-       addi    r11, r31, 1
-       nop
-       mulld   r7, r3, r31
-       mulhdu  r10, r3, r31
-       add     r10, r10, r11
-       mulld   r31, r9, r10
-ifelse(0,1,`
-       subfc   r0, r7, r31
-       subfe   r0, r0, r0      C r0 = -(r >= ql)
-       not     r7, r0
-       add     r10, r7, r10    C qh -= (r >= ql)
-       andc    r0, r30, r0
-       add     r31, r31, r0
-',`
-       cmpld   cr7, r31, r7
-       blt     cr7, L(28)
-       add     r31, r30, r31
-       addi    r10, r10, -1
-L(28):
-')
-       std     r10, 0(r29)
-       addi    r29, r29, -8
-       bdnz    L(nfloop)
-L(152):
-       addi    r1, r1, 176
-       mr      r3, r31
-       ld      r0, 16(r1)
-       lwz     r12, 8(r1)
-       mtlr    r0
-       ld      r25, -56(r1)
-       ld      r26, -48(r1)
-       mtcrf   8, r12
-       ld      r27, -40(r1)
-       ld      r28, -32(r1)
-       ld      r29, -24(r1)
-       ld      r30, -16(r1)
-       ld      r31, -8(r1)
-       blr
  L(164):
         subf    r31, r30, r31
         addi    r10, r10, 1
diff --git a/mpn/powerpc64/mode64/divrem_2.asm b/mpn/powerpc64/mode64/divrem_2.asm

index 369b5c1f1dfd90aaa5f6830a1a8ab9028e066999..18f549357ca689100f270dcdf2292ead0796e42d 100644 (file)
--- a/mpn/powerpc64/mode64/divrem_2.asm
+++ b/mpn/powerpc64/mode64/divrem_2.asm
@@ -19,14 +19,13 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C                      cycles/limb
-C                      norm    frac
+C                       cycles/limb
+C                       norm    frac
  C POWER3/PPC630
-C POWER4/PPC970                39*     39*
-C POWER5               39*     39*
-
-C STATUS
-C  * Performace fluctuates like crazy
+C POWER4/PPC970         ?       ?
+C POWER5                37      ?
+C POWER6                62      ?
+C POWER6                30.5    ?
  
  C INPUT PARAMETERS
  C qp  = r3
@@ -121,12 +120,12 @@ L(loop):
         mulld   r6, r29, r3
         addc    r6, r6, r31
         adde    r8, r8, r29
+       cmpd    cr7, r27, r25
         mulld   r0, r30, r8
-       subf    r31, r0, r31
         mulhdu  r11, r28, r8
         mulld   r10, r28, r8
+       subf    r31, r0, r31
         li      r7, 0
-       cmpd    cr7, r27, r25
         blt     cr7, L(60)
         ld      r7, 0(r26)
         addi    r26, r26, -8
diff --git a/mpn/powerpc64/mode64/invert_limb.asm b/mpn/powerpc64/mode64/invert_limb.asm

index 02a67a39793d19d74bbfec7f0b628ca4e0de42a4..31b2430010128845f2c9e8a564e246b267d514e5 100644 (file)
--- a/mpn/powerpc64/mode64/invert_limb.asm
+++ b/mpn/powerpc64/mode64/invert_limb.asm
@@ -1,6 +1,6 @@
  dnl  PowerPC-64 mpn_invert_limb -- Invert a normalized limb.
  
-dnl  Copyright 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2006, 2008, 2010 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,91 +19,88 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     ?
-C POWER4/PPC970:     75 (including call+ret)
-
-C TODO:
-C   * Pair multiply instructions.
+C                  cycles/limb (approximate)
+C POWER3/PPC630         80
+C POWER4/PPC970         86
+C POWER5                86
+C POWER6               170
+C POWER7                66
  
  ASM_START()
  PROLOGUE(mpn_invert_limb)
         LEAL(   r12, approx_tab)
-
-       srdi    r11, r3, 32             C r11 = d >> 32
-       rlwinm  r9, r11, 10, 23, 30     C r9 = ((d >> 55) & 0xff) << 1
-       lhzx    r0, r12, r9             C load initial approximation
-       rldic   r10, r0, 6, 42
-       mulld   r8, r10, r10
-       sldi    r9, r10, 17
-       mulld   r0, r8, r11
-       srdi    r0, r0, 31
-       subf    r10, r0, r9
-       mulld   r8, r10, r10
-       sldi    r11, r10, 33
-       mulhdu  r0, r8, r3
-       sldi    r9, r0, 1
-       subf    r10, r9, r11
-       sldi    r11, r10, 2
-       mulhdu  r0, r10, r10
-       mulld   r8, r10, r10
-       mulhdu  r10, r8, r3
-       mulld   r9, r0, r3
-       mulhdu  r0, r0, r3
-       addc    r8, r9, r10
-       addze   r10, r0
-       srdi    r0, r8, 62
-       rldimi  r0, r10, 2, 0
-       sldi    r9, r8, 2
-       subfic  r10, r9, 0
-       subfe   r8, r0, r11
-       mulhdu  r10, r3, r8
-       add     r10, r10, r3
-       mulld   r9, r3, r8
-       subf    r11, r10, r8
-       addi    r0, r10, 1
-       addi    r8, r11, -1
-       and     r0, r3, r0
-       addc    r11, r9, r0
-       addze   r10, r10
-       addc    r0, r11, r3
-       addze   r10, r10
-       subf    r3, r10, r8
+       srdi    r9, r3, 32
+       rlwinm  r9, r9, 10, 23, 30      C (d >> 55) & 0x1fe
+       srdi    r10, r3, 24             C d >> 24
+       lis     r11, 0x1000
+       rldicl  r8, r3, 0, 63           C d mod 2
+       addi    r10, r10, 1             C d40
+       sldi    r11, r11, 32            C 2^60
+       srdi    r7, r3, 1               C d/2
+       add     r7, r7, r8              C d63 = ceil(d/2)
+       neg     r8, r8                  C mask = -(d mod 2)
+       lhzx    r0, r9, r12
+       mullw   r9, r0, r0              C v0*v0
+       sldi    r6, r0, 11              C v0 << 11
+       addi    r0, r6, -1              C (v0 << 11) - 1
+       mulld   r9, r9, r10             C v0*v0*d40
+       srdi    r9, r9, 40              C v0*v0*d40 >> 40
+       subf    r9, r9, r0              C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1
+       mulld   r0, r9, r10             C v1*d40
+       sldi    r6, r9, 13              C v1 << 13
+       subf    r0, r0, r11             C 2^60 - v1*d40
+       mulld   r0, r0, r9              C v1 * (2^60 - v1*d40)
+       srdi    r0, r0, 47              C v1 * (2^60 - v1*d40) >> 47
+       add     r0, r0, r6              C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47)
+       mulld   r11, r0, r7             C v2 * d63
+       srdi    r10, r0, 1              C v2 >> 1
+       sldi    r9, r0, 31              C v2 << 31
+       and     r8, r10, r8             C (v2 >> 1) & mask
+       subf    r8, r11, r8             C ((v2 >> 1) & mask) - v2 * d63
+       mulhdu  r0, r8, r0              C p1 = v2 * (((v2 >> 1) & mask) - v2 * d63)
+       srdi    r0, r0, 1               C p1 >> 1
+       add     r0, r0, r9              C v3 = (v2 << 31) + (p1 >> 1)
+       nop
+       mulhdu  r9, r0, r3
+       mulld   r11, r0, r3
+       addc    r10, r11, r3
+       adde    r3, r9, r3
+       subf    r3, r3, r0
         blr
  EPILOGUE()
  
  DEF_OBJECT(approx_tab)
-       .short  1023,1020,1016,1012,1008,1004,1000,996
-       .short  992,989,985,981,978,974,970,967
-       .short  963,960,956,953,949,946,942,939
-       .short  936,932,929,926,923,919,916,913
-       .short  910,907,903,900,897,894,891,888
-       .short  885,882,879,876,873,870,868,865
-       .short  862,859,856,853,851,848,845,842
-       .short  840,837,834,832,829,826,824,821
-       .short  819,816,814,811,809,806,804,801
-       .short  799,796,794,791,789,787,784,782
-       .short  780,777,775,773,771,768,766,764
-       .short  762,759,757,755,753,751,748,746
-       .short  744,742,740,738,736,734,732,730
-       .short  728,726,724,722,720,718,716,714
-       .short  712,710,708,706,704,702,700,699
-       .short  697,695,693,691,689,688,686,684
-       .short  682,680,679,677,675,673,672,670
-       .short  668,667,665,663,661,660,658,657
-       .short  655,653,652,650,648,647,645,644
-       .short  642,640,639,637,636,634,633,631
-       .short  630,628,627,625,624,622,621,619
-       .short  618,616,615,613,612,611,609,608
-       .short  606,605,604,602,601,599,598,597
-       .short  595,594,593,591,590,589,587,586
-       .short  585,583,582,581,579,578,577,576
-       .short  574,573,572,571,569,568,567,566
-       .short  564,563,562,561,560,558,557,556
-       .short  555,554,553,551,550,549,548,547
-       .short  546,544,543,542,541,540,539,538
-       .short  537,536,534,533,532,531,530,529
-       .short  528,527,526,525,524,523,522,521
-       .short  520,519,518,517,516,515,514,513
+        .short  0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+        .short  0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+        .short  0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+        .short  0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+        .short  0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+        .short  0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+        .short  0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+        .short  0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+        .short  0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+        .short  0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+        .short  0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+        .short  0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+        .short  0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+        .short  0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+        .short  0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+        .short  0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+        .short  0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+        .short  0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+        .short  0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+        .short  0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+        .short  0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+        .short  0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+        .short  0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+        .short  0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+        .short  0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+        .short  0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+        .short  0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+        .short  0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+        .short  0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+        .short  0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+        .short  0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+        .short  0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
  END_OBJECT(approx_tab)
  ASM_END()
diff --git a/mpn/powerpc64/mode64/mod_1_1.asm b/mpn/powerpc64/mode64/mod_1_1.asm

new file mode 100644 (file)

index 0000000..f24ceb2
--- /dev/null
+++ b/mpn/powerpc64/mode64/mod_1_1.asm
@@ -0,0 +1,151 @@
+dnl  PowerPC-64 mpn_mod_1_1p
+
+dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970         17
+C POWER5                16
+C POWER6                30
+C POWER7                10.2
+
+C TODO
+C  * Optimise, in particular the cps function.  This was compiler-generated and
+C    then hand optimised.
+
+C INPUT PARAMETERS
+define(`ap',  `r3')
+define(`n',   `r4')
+define(`d',   `r5')
+define(`cps', `r6')
+
+ASM_START()
+
+EXTERN_FUNC(mpn_invert_limb)
+
+PROLOGUE(mpn_mod_1_1p)
+       sldi    r10, r4, 3
+       addi    r4, r4, -1
+       add     r3, r3, r10
+       ld      r0, 16(r6)              C B1modb
+       ld      r12, 24(r6)             C B2modb
+       ld      r9, -8(r3)
+       ld      r10, -16(r3)
+       mtctr   r4
+       mulhdu  r8, r9, r0
+       mulld   r7, r9, r0
+       addc    r11, r7, r10
+       addze   r9, r8
+       bdz     L(end)
+
+       ALIGN(16)
+L(top):        ld      r4, -24(r3)
+       addi    r3, r3, -8
+       nop
+       mulld   r10, r11, r0
+       mulld   r8, r9, r12
+       mulhdu  r11, r11, r0
+       mulhdu  r9, r9, r12
+       addc    r7, r10, r4
+       addze   r10, r11
+       addc    r11, r8, r7
+       adde    r9, r9, r10
+       bdnz    L(top)
+
+L(end):        lwz     r0, 12(r6)
+       ld      r3, 0(r6)
+       cmpdi   cr7, r0, 0
+       beq-    cr7, L(4)
+       subfic  r10, r0, 64
+       sld     r9, r9, r0
+       srd     r10, r11, r10
+       or      r9, r10, r9
+L(4):  subfc   r10, r5, r9
+       subfe   r10, r10, r10
+       nand    r10, r10, r10
+       sld     r11, r11, r0
+       and     r10, r10, r5
+       subf    r9, r10, r9
+       mulhdu  r10, r9, r3
+       mulld   r3, r9, r3
+       addi    r9, r9, 1
+       addc    r8, r3, r11
+       adde    r3, r10, r9
+       mulld   r3, r3, r5
+       subf    r3, r3, r11
+       cmpld   cr7, r8, r3
+       bge     cr7, L(5)               C FIXME: Make branch-less
+       add     r3, r3, r5
+L(5):  cmpld   cr7, r3, r5
+       bge-    cr7, L(10)
+       srd     r3, r3, r0
+       blr
+
+L(10): subf    r3, r5, r3
+       srd     r3, r3, r0
+       blr
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+       mflr    r0
+       std     r29, -24(r1)
+       std     r30, -16(r1)
+       std     r31, -8(r1)
+       cntlzd  r31, r4
+       std     r0, 16(r1)
+       extsw   r31, r31
+       mr      r29, r3
+       stdu    r1, -144(r1)
+       sld     r30, r4, r31
+       mr      r3, r30
+       CALL(   mpn_invert_limb)
+       nop
+       cmpdi   cr7, r31, 0
+       neg     r0, r30
+       beq-    cr7, L(13)
+       subfic  r11, r31, 64
+       li      r0, 1
+       neg     r9, r30
+       srd     r11, r3, r11
+       sld     r0, r0, r31
+       or      r0, r11, r0
+       mulld   r0, r0, r9
+L(13): mulhdu  r9, r0, r3
+       mulld   r11, r0, r3
+       add     r9, r0, r9
+       nor     r9, r9, r9
+       mulld   r9, r9, r30
+       cmpld   cr7, r11, r9
+       bge     cr7, L(14)
+       add     r9, r9, r30
+L(14): addi    r1, r1, 144
+       srd     r0, r0, r31
+       std     r31, 8(r29)
+       std     r3, 0(r29)
+       std     r0, 16(r29)
+       ld      r0, 16(r1)
+       srd     r9, r9, r31
+       ld      r30, -16(r1)
+       ld      r31, -8(r1)
+       std     r9, 24(r29)
+       ld      r29, -24(r1)
+       mtlr    r0
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/mod_1_4.asm b/mpn/powerpc64/mode64/mod_1_4.asm

new file mode 100644 (file)

index 0000000..b6163c5
--- /dev/null
+++ b/mpn/powerpc64/mode64/mod_1_4.asm
@@ -0,0 +1,257 @@
+dnl  PowerPC-64 mpn_mod_1s_4p
+
+dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970          9
+C POWER5                 9
+C POWER6                13
+C POWER7                3.5
+
+C TODO
+C  * Optimise, in particular the cps function.  This was compiler-generated and
+C    then hand optimised.
+
+C INPUT PARAMETERS
+define(`ap',  `r3')
+define(`n',   `r4')
+define(`d',   `r5')
+define(`cps', `r6')
+
+ASM_START()
+
+EXTERN_FUNC(mpn_invert_limb)
+
+PROLOGUE(mpn_mod_1s_4p)
+       std     r23, -72(r1)
+       ld      r23, 48(cps)
+       std     r24, -64(r1)
+       std     r25, -56(r1)
+       ld      r24, 32(cps)
+       ld      r25, 24(cps)
+       std     r26, -48(r1)
+       std     r27, -40(r1)
+       ld      r26, 16(cps)
+       std     r28, -32(r1)
+       std     r29, -24(r1)
+       std     r30, -16(r1)
+       std     r31, -8(r1)
+       ld      r30, 40(cps)
+
+       rldicl. r0, n, 0,62
+       sldi    r31, n, 3
+       add     ap, ap, r31             C make ap point at end of operand
+
+       cmpdi   cr7, r0, 2
+       beq     cr0, L(b00)
+       blt     cr7, L(b01)
+       beq     cr7, L(b10)
+
+L(b11):        ld      r11, -16(ap)
+       ld      r9, -8(ap)
+       ld      r0, -24(ap)
+       mulhdu  r27, r11, r26
+       mulld   r8, r11, r26
+       mulhdu  r11, r9, r25
+       mulld   r9, r9, r25
+       addc    r31, r8, r0
+       addze   r10, r27
+       addc    r0, r9, r31
+       adde    r9, r11, r10
+       addi    ap, ap, -40
+       b       L(6)
+
+       ALIGN(16)
+L(b00):        ld      r11, -24(ap)
+       ld      r10, -16(ap)
+       ld      r9, -8(ap)
+       ld      r0, -32(ap)
+       mulld   r8, r11, r26
+       mulhdu  r7, r10, r25
+       mulhdu  r27, r11, r26
+       mulhdu  r11, r9, r24
+       mulld   r10, r10, r25
+       mulld   r9, r9, r24
+       addc    r31, r8, r0
+       addze   r0, r27
+       addc    r8, r31, r10
+       adde    r10, r0, r7
+       addc    r0, r9, r8
+       adde    r9, r11, r10
+       addi    ap, ap, -48
+       b       L(6)
+
+       ALIGN(16)
+L(b01):        li      r9, 0
+       ld      r0, -8(ap)
+       addi    ap, ap, -24
+       b       L(6)
+
+       ALIGN(16)
+L(b10):        ld      r9, -8(ap)
+       ld      r0, -16(ap)
+       addi    ap, ap, -32
+
+       ALIGN(16)
+L(6):  addi    r10, n, 3
+       srdi    r7, r10, 2
+       mtctr   r7
+       bdz     L(end)
+
+       ALIGN(16)
+L(top):        ld      r31, -16(ap)
+       ld      r10, -8(ap)
+       ld      r11, 8(ap)
+       ld      r12, 0(ap)
+       mulld   r29, r0, r30            C rl * B4modb
+       mulhdu  r0,  r0, r30            C rl * B4modb
+       mulhdu  r27, r10, r26
+       mulld   r10, r10, r26
+       mulhdu  r7, r9, r23             C rh * B5modb
+       mulld   r9, r9, r23             C rh * B5modb
+       mulhdu  r28, r11, r24
+       mulld   r11, r11, r24
+       mulhdu  r4, r12, r25
+       mulld   r12, r12, r25
+       addc    r8, r10, r31
+       addze   r10, r27
+       addi    ap, ap, -32
+       addc    r27, r8, r12
+       adde    r12, r10, r4
+       addc    r11, r27, r11
+       adde    r31, r12, r28
+       addc    r12, r11, r29
+       adde    r4, r31, r0
+       addc    r0, r9, r12
+       adde    r9, r7, r4
+       bdnz    L(top)
+
+L(end):        lwz     r3, 12(cps)
+       mulld   r10, r9, r26
+       mulhdu  r9, r9, r26
+       addc    r11, r0, r10
+       addze   r9, r9
+       ld      r10, 0(cps)
+       subfic  r8, r3, 64
+       sld     r9, r9, r3
+       srd     r8, r11, r8
+       sld     r11, r11, r3
+       or      r9, r8, r9
+       mulld   r0, r9, r10
+       mulhdu  r10, r9, r10
+       addi    r9, r9, 1
+       addc    r8, r0, r11
+       adde    r0, r10, r9
+       mulld   r0, r0, d
+       subf    r0, r0, r11
+       cmpld   cr7, r8, r0
+       bge     cr7, L(9)
+       add     r0, r0, d
+L(9):  cmpld   cr7, r0, d
+       bge-    cr7, L(16)
+L(10): srd     r3, r0, r3
+       ld      r23, -72(r1)
+       ld      r24, -64(r1)
+       ld      r25, -56(r1)
+       ld      r26, -48(r1)
+       ld      r27, -40(r1)
+       ld      r28, -32(r1)
+       ld      r29, -24(r1)
+       ld      r30, -16(r1)
+       ld      r31, -8(r1)
+       blr
+
+L(16): subf    r0, d, r0
+       b       L(10)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_4p_cps)
+       mflr    r0
+       std     r29, -24(r1)
+       std     r30, -16(r1)
+       mr      r29, r3
+       std     r0, 16(r1)
+       std     r31, -8(r1)
+       stdu    r1, -144(r1)
+       cntlzd  r31, r4
+       sld     r30, r4, r31
+       mr      r3, r30
+       CALL(   mpn_invert_limb)
+       nop
+       subfic  r9, r31, 64
+       li      r10, 1
+       sld     r10, r10, r31
+       srd     r9, r3, r9
+       neg     r0, r30
+       or      r10, r10, r9
+       mulld   r10, r10, r0
+       mulhdu  r11, r10, r3
+       nor     r11, r11, r11
+       subf    r11, r10, r11
+       mulld   r11, r11, r30
+       mulld   r0, r10, r3
+       cmpld   cr7, r0, r11
+       bge     cr7, L(18)
+       add     r11, r11, r30
+L(18): mulhdu  r9, r11, r3
+       add     r9, r11, r9
+       nor     r9, r9, r9
+       mulld   r9, r9, r30
+       mulld   r0, r11, r3
+       cmpld   cr7, r0, r9
+       bge     cr7, L(19)
+       add     r9, r9, r30
+L(19): mulhdu  r0, r9, r3
+       add     r0, r9, r0
+       nor     r0, r0, r0
+       mulld   r0, r0, r30
+       mulld   r8, r9, r3
+       cmpld   cr7, r8, r0
+       bge     cr7, L(20)
+       add     r0, r0, r30
+L(20): mulhdu  r8, r0, r3
+       add     r8, r0, r8
+       nor     r8, r8, r8
+       mulld   r8, r8, r30
+       mulld   r7, r0, r3
+       cmpld   cr7, r7, r8
+       bge     cr7, L(21)
+       add     r8, r8, r30
+L(21): srd     r0, r0, r31
+       addi    r1, r1, 144
+       srd     r8, r8, r31
+       srd     r10, r10, r31
+       srd     r11, r11, r31
+       std     r0, 40(r29)
+       std     r31, 8(r29)
+       srd     r9, r9, r31
+       ld      r0, 16(r1)
+       ld      r30, -16(r1)
+       std     r8, 48(r29)
+       std     r3, 0(r29)
+       mtlr    r0
+       ld      r31, -8(r1)
+       std     r10, 16(r29)
+       std     r11, 24(r29)
+       std     r9, 32(r29)
+       ld      r29, -24(r1)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/mod_34lsub1.asm b/mpn/powerpc64/mode64/mod_34lsub1.asm

index ca46c3933b644a5fa154dd3d7248d0ff339f3ea4..30b9f98bee9dbfb450bbf476c98c19dd660f14cc 100644 (file)
--- a/mpn/powerpc64/mode64/mod_34lsub1.asm
+++ b/mpn/powerpc64/mode64/mod_34lsub1.asm
@@ -1,4 +1,4 @@
-dnl  PowerPC-64 mpn_mod_34lsub1 -- modulo 2^24-1.
+dnl  PowerPC-64 mpn_mod_34lsub1 -- modulo 2^48-1.
  
  dnl  Copyright 2005 Free Software Foundation, Inc.
  
@@ -19,10 +19,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     1.33
-C POWER4/PPC970:     1.5
-C POWER5:           1.57
+C                   cycles/limb
+C POWER3/PPC630          1.33
+C POWER4/PPC970          1.5
+C POWER5                 1.32
+C POWER6                 2.35
+C POWER7                 1
  
  C INPUT PARAMETERS
  define(`up',`r3')
diff --git a/mpn/powerpc64/mode64/mode1o.asm b/mpn/powerpc64/mode64/mode1o.asm

index 489ca855122c9f48a8a3c17114e083b50e6aba6c..37e4028d88bd2ac814c562ddfe3d41ad45bf0436 100644 (file)
--- a/mpn/powerpc64/mode64/mode1o.asm
+++ b/mpn/powerpc64/mode64/mode1o.asm
@@ -19,10 +19,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C               cycles/limb
-C POWER3/PPC630:    13-19
-C POWER4/PPC970:     16
-C POWER5:            16
+C                  cycles/limb
+C POWER3/PPC630        13-19
+C POWER4/PPC970         16
+C POWER5                16
+C POWER6                 ?
+C POWER7                12
  
  C TODO
  C  * Check if n=1 code is really an improvement.  It probably isn't.
diff --git a/mpn/powerpc64/mode64/mul_1.asm b/mpn/powerpc64/mode64/mul_1.asm

index 8f644d871045f8af56b5a30b91181354fce9ad39..e911cf551ec0bf422acb443f9cfe0939a0dc3ff2 100644 (file)
--- a/mpn/powerpc64/mode64/mul_1.asm
+++ b/mpn/powerpc64/mode64/mul_1.asm
@@ -1,7 +1,7 @@
  dnl  PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store
  dnl  the result in a second limb vector.
  
-dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2010 Free Software
  dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
@@ -21,10 +21,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     6-18
-C POWER4/PPC970:     7.25
-C POWER5:            7.75
+C               cycles/limb
+C POWER3/PPC630     6-18
+C POWER4/PPC970     7.25?  not updated for last file revision
+C POWER5            7.25
+C POWER6           14
+C POWER7            2.9
  
  C TODO
  C  * Try to reduce the number of needed live registers (at least r5 and r10
@@ -118,26 +120,18 @@ L(b10):   ld      r27, 8(up)
  
  L(top):        mulld   r0, r26, r6
         mulhdu  r5, r26, r6
-       ld      r26, 0(up)
-       nop
-
         mulld   r7, r27, r6
         mulhdu  r8, r27, r6
+       ld      r26, 0(up)
         ld      r27, 8(up)
-       nop
-
         adde    r0, r0, r12
         adde    r7, r7, r5
-
         mulld   r9, r26, r6
         mulhdu  r10, r26, r6
-       ld      r26, 16(up)
-       nop
-
         mulld   r11, r27, r6
         mulhdu  r12, r27, r6
+       ld      r26, 16(up)
         ld      r27, 24(up)
-
         std     r0, 0(rp)
         adde    r9, r9, r8
         std     r7, 8(rp)
@@ -151,13 +145,10 @@ L(top):   mulld   r0, r26, r6
  
  L(end):        mulld   r0, r26, r6
         mulhdu  r5, r26, r6
-
         mulld   r7, r27, r6
         mulhdu  r8, r27, r6
-
         adde    r0, r0, r12
         adde    r7, r7, r5
-
         std     r0, 0(rp)
         std     r7, 8(rp)
  L(ret):        addze   r3, r8
diff --git a/mpn/powerpc64/mode64/mul_basecase.asm b/mpn/powerpc64/mode64/mul_basecase.asm

index cea5417eb2fd92504ceb3a162e25f77820516768..9a3957f945ea278f0762747e63026af160fc2d49 100644 (file)
--- a/mpn/powerpc64/mode64/mul_basecase.asm
+++ b/mpn/powerpc64/mode64/mul_basecase.asm
@@ -1,4 +1,4 @@
-dnl  PowerPC-64 mpn_basecase.
+dnl  PowerPC-64 mpn_mul_basecase.
  
  dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008 Free Software
  dnl  Foundation, Inc.
@@ -20,11 +20,11 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:    6-18
-C POWER4/PPC970:     8
-C POWER5:            8
-
+C                  cycles/limb
+C POWER3/PPC630         6-18
+C POWER4/PPC970          8
+C POWER5                 8
+C POWER6                24
  
  C INPUT PARAMETERS
  define(`rp', `r3')
diff --git a/mpn/powerpc64/mode64/p3/gmp-mparam.h b/mpn/powerpc64/mode64/p3/gmp-mparam.h

index ba195df128179067a1d0a797151a3acdb41b05a5..03c07e43d8f25324187d6db0264f1017406f66a5 100644 (file)
--- a/mpn/powerpc64/mode64/p3/gmp-mparam.h
+++ b/mpn/powerpc64/mode64/p3/gmp-mparam.h
@@ -23,12 +23,13 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     17
  #define USE_PREINV_DIVREM_1                  0
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
  #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
  
@@ -36,22 +37,25 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MUL_TOOM33_THRESHOLD                33
  #define MUL_TOOM44_THRESHOLD                46
  #define MUL_TOOM6H_THRESHOLD                77
-#define MUL_TOOM8H_THRESHOLD               115
+#define MUL_TOOM8H_THRESHOLD               139
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD      49
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      38
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      33
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      32
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      47
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      49
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      49
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      34
  
-#define SQR_BASECASE_THRESHOLD               0  /* always */
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
  #define SQR_TOOM2_THRESHOLD                 14
-#define SQR_TOOM3_THRESHOLD                 49
+#define SQR_TOOM3_THRESHOLD                 45
  #define SQR_TOOM4_THRESHOLD                 64
-#define SQR_TOOM6_THRESHOLD                 84
-#define SQR_TOOM8_THRESHOLD                127
+#define SQR_TOOM6_THRESHOLD                 85
+#define SQR_TOOM8_THRESHOLD                139
+
+#define MULMID_TOOM42_THRESHOLD             22
  
  #define MULMOD_BNM1_THRESHOLD                8
-#define SQRMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD               10
  
  #define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
@@ -123,35 +127,42 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define SQR_FFT_TABLE3_SIZE 118
  #define SQR_FFT_THRESHOLD                 1728
  
-#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_BASECASE_THRESHOLD             2
  #define MULLO_DC_THRESHOLD                  27
-#define MULLO_MUL_N_THRESHOLD             4940
+#define MULLO_MUL_N_THRESHOLD             2511
  
-#define DC_DIV_QR_THRESHOLD                 27
-#define DC_DIVAPPR_Q_THRESHOLD              95
-#define DC_BDIV_QR_THRESHOLD                28
-#define DC_BDIV_Q_THRESHOLD                 62
+#define DC_DIV_QR_THRESHOLD                 23
+#define DC_DIVAPPR_Q_THRESHOLD              87
+#define DC_BDIV_QR_THRESHOLD                27
+#define DC_BDIV_Q_THRESHOLD                 60
  
-#define INV_MULMOD_BNM1_THRESHOLD           35
-#define INV_NEWTON_THRESHOLD                97
-#define INV_APPR_THRESHOLD                  94
+#define INV_MULMOD_BNM1_THRESHOLD           27
+#define INV_NEWTON_THRESHOLD                91
+#define INV_APPR_THRESHOLD                  91
  
  #define BINV_NEWTON_THRESHOLD              115
-#define REDC_1_TO_REDC_N_THRESHOLD          30
+#define REDC_1_TO_REDC_N_THRESHOLD          31
  
  #define MU_DIV_QR_THRESHOLD                551
  #define MU_DIVAPPR_Q_THRESHOLD             551
-#define MUPI_DIV_QR_THRESHOLD               49
-#define MU_BDIV_QR_THRESHOLD               492
+#define MUPI_DIV_QR_THRESHOLD               42
+#define MU_BDIV_QR_THRESHOLD               483
  #define MU_BDIV_Q_THRESHOLD                492
  
-#define MATRIX22_STRASSEN_THRESHOLD          9
-#define HGCD_THRESHOLD                      55
-#define GCD_DC_THRESHOLD                   162
-#define GCDEXT_DC_THRESHOLD                124
+#define POWM_SEC_TABLE  2,23,140,556,713,746
+
+#define MATRIX22_STRASSEN_THRESHOLD          8
+#define HGCD_THRESHOLD                      56
+#define HGCD_APPR_THRESHOLD                 51
+#define HGCD_REDUCE_THRESHOLD              688
+#define GCD_DC_THRESHOLD                   333
+#define GCDEXT_DC_THRESHOLD                126
  #define JACOBI_BASE_METHOD                   1
  
  #define GET_STR_DC_THRESHOLD                17
-#define GET_STR_PRECOMPUTE_THRESHOLD        27
-#define SET_STR_DC_THRESHOLD               354
+#define GET_STR_PRECOMPUTE_THRESHOLD        28
+#define SET_STR_DC_THRESHOLD               375
  #define SET_STR_PRECOMPUTE_THRESHOLD       812
+
+#define FAC_DSC_THRESHOLD                  351
+#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/mpn/powerpc64/mode64/p4/gmp-mparam.h b/mpn/powerpc64/mode64/p4/gmp-mparam.h

index 1606fab0e6e26ab841ad3b74c4d3b618a209615a..3b4911922e7dee560a334b994709df43c57f7444 100644 (file)
--- a/mpn/powerpc64/mode64/p4/gmp-mparam.h
+++ b/mpn/powerpc64/mode64/p4/gmp-mparam.h
@@ -23,180 +23,139 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         10
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        23
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        20
  #define PREINV_MOD_1_TO_MOD_1_THRESHOLD     16
  #define USE_PREINV_DIVREM_1                  0
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           43
+#define BMOD_1_TO_MOD_1_THRESHOLD           37
  
-#define MUL_TOOM22_THRESHOLD                14
-#define MUL_TOOM33_THRESHOLD                54
-#define MUL_TOOM44_THRESHOLD               154
-#define MUL_TOOM6H_THRESHOLD               206
+#define MUL_TOOM22_THRESHOLD                18
+#define MUL_TOOM33_THRESHOLD                53
+#define MUL_TOOM44_THRESHOLD               106
+#define MUL_TOOM6H_THRESHOLD               180
  #define MUL_TOOM8H_THRESHOLD               309
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      99
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      61
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      92
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      51
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      79
  
-#define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 36
-#define SQR_TOOM3_THRESHOLD                 61
-#define SQR_TOOM4_THRESHOLD                154
-#define SQR_TOOM6_THRESHOLD                206
-#define SQR_TOOM8_THRESHOLD                309
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 28
+#define SQR_TOOM3_THRESHOLD                 73
+#define SQR_TOOM4_THRESHOLD                136
+#define SQR_TOOM6_THRESHOLD                194
+#define SQR_TOOM8_THRESHOLD                272
  
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               14
+#define MULMID_TOOM42_THRESHOLD             32
  
-#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
+#define MULMOD_BNM1_THRESHOLD               11
+#define SQRMOD_BNM1_THRESHOLD               16
+
+#define MUL_FFT_MODF_THRESHOLD             372  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    380, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     10, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     23, 7}, {     12, 6}, {     25, 7}, {     25, 8}, \
-    {     13, 7}, {     30, 6}, {     61, 7}, {     32, 8}, \
-    {     17, 7}, {     35, 8}, {     29, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     55,11}, {     15,10}, {     31, 9}, \
-    {     71,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     99,10}, {     55,11}, {     31,10}, {     63, 9}, \
-    {    127,10}, {     79,11}, {     47,10}, {    103,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    135, 9}, {    271,11}, {     79,10}, {    159, 9}, \
-    {    319,10}, {    167,11}, {     95,10}, {    191, 9}, \
-    {    383, 8}, {    767,10}, {    207,11}, {    111,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271,11}, {    143,10}, {    287, 9}, {    575,10}, \
-    {    303, 9}, {    607,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    335, 9}, {    671,12}, {     95,11}, \
-    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
-    {    415, 9}, {    831,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    271,10}, {    543, 9}, \
-    {   1087,11}, {    287,10}, {    575,11}, {    303,10}, \
-    {    607,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    335,10}, {    671,11}, {    351,10}, {    703,11}, \
-    {    367,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,10}, {    831,12}, {    223,11}, {    447,10}, \
-    {    895,13}, {    127,12}, {    255,11}, {    511,10}, \
-    {   1023,11}, {    543,10}, {   1087,12}, {    287,11}, \
-    {    575,10}, {   1151,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
-    {    351,11}, {    703,10}, {   1407,13}, {    191,12}, \
-    {    383,11}, {    767,12}, {    415,11}, {    831,10}, \
-    {   1663,12}, {    447,11}, {    895,12}, {    479,14}, \
-    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    543,11}, {   1087,10}, {   2175,12}, {    575,11}, \
-    {   1151,12}, {    607,11}, {   1215,13}, {    319,12}, \
-    {    639,11}, {   1279,12}, {    671,11}, {   1343,10}, \
-    {   2687,12}, {    703,11}, {   1407,12}, {    735,13}, \
-    {    383,12}, {    767,11}, {   1535,12}, {    799,11}, \
-    {   1599,12}, {    831,11}, {   1663,13}, {    447,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1087,11}, \
-    {   2175,13}, {    575,12}, {   1215,11}, {   2431,13}, \
-    {    639,12}, {   1343,11}, {   2687,13}, {    703,12}, \
-    {   1407,14}, {    383,13}, {    767,12}, {   1599,13}, \
-    {    831,12}, {   1663,13}, {    959,15}, {    255,14}, \
-    {    511,13}, {   1087,12}, {   2175,13}, {   1215,12}, \
-    {   2431,14}, {    639,13}, {   1343,12}, {   2687,13}, \
-    {   1471,12}, {   2943,14}, {    767,13}, {   1599,12}, \
-    {   3199,13}, {   1663,14}, {    895,13}, {   1855,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 209
-#define MUL_FFT_THRESHOLD                 7296
-
-#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
+  { {    372, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {      8, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     10, 5}, {     21, 6}, {     19, 7}, {     10, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     25, 8}, {     13, 7}, {     31, 8}, \
+    {     17, 7}, {     35, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     49, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     55,11}, \
+    {     15,10}, {     31, 9}, {     71,10}, {     39, 9}, \
+    {     83,10}, {     47, 9}, {     99,10}, {     55,11}, \
+    {     31,10}, {     63, 9}, {    127,10}, {     79,11}, \
+    {     47,10}, {     95, 9}, {    191,10}, {    103, 9}, \
+    {    207,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    135, 9}, {    271,11}, {     79,10}, \
+    {    159, 9}, {    319,10}, {    167,11}, {     95,10}, \
+    {    191, 9}, {    383, 8}, {    767,10}, {    207, 9}, \
+    {    415,11}, {    111,10}, {    223,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,11}, {    143,10}, {    287, 9}, {    575, 8}, \
+    {   1151,10}, {    303, 9}, {    607,10}, {    319, 9}, \
+    {    639,10}, {    335,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767,11}, {    207,10}, {    415, 9}, \
+    {    831,11}, {    223,10}, {    447,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 106
+#define MUL_FFT_THRESHOLD                 3264
+
+#define SQR_FFT_MODF_THRESHOLD             284  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     19, 7}, {     10, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     21, 8}, {     11, 7}, {     24, 8}, \
+  { {    280, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {      8, 5}, {     17, 6}, {     19, 7}, {     10, 6}, \
+    {     21, 7}, {     21, 8}, {     11, 7}, {     24, 8}, \
      {     13, 7}, {     29, 8}, {     15, 7}, {     31, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     33, 9}, {     19, 8}, {     39, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
-    {     47,10}, {     95, 9}, {    191, 8}, {    383,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    135, 9}, {    271,11}, {     79,10}, \
-    {    159, 9}, {    319,10}, {    175, 9}, {    351,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
-    {    415,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511, 8}, {   1023,10}, {    271, 9}, \
-    {    543,10}, {    287, 9}, {    575, 8}, {   1151,10}, \
-    {    303,11}, {    159,10}, {    319, 9}, {    639,11}, \
-    {    175,10}, {    351,12}, {     95,11}, {    191,10}, \
-    {    383, 9}, {    767,11}, {    207,10}, {    415, 9}, \
-    {    831,11}, {    223,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511, 9}, {   1023,11}, {    271,10}, \
-    {    543,11}, {    287,10}, {    575, 9}, {   1151,11}, \
-    {    303,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,10}, {    831,12}, {    223,11}, \
-    {    447,10}, {    895,11}, {    479,10}, {    959,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
-    {    543,12}, {    287,11}, {    575,10}, {   1151,11}, \
-    {    607,12}, {    319,11}, {    639,10}, {   1279,12}, \
-    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,10}, {   1663,12}, \
-    {    447,11}, {    895,12}, {    479,11}, {    959,14}, \
-    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    543,11}, {   1087,10}, {   2175,12}, {    575,11}, \
-    {   1151,12}, {    607,13}, {    319,12}, {    639,11}, \
-    {   1279,12}, {    671,11}, {   1343,12}, {    703,11}, \
-    {   1407,13}, {    383,12}, {    767,11}, {   1535,12}, \
-    {    831,11}, {   1663,13}, {    447,12}, {    959,11}, \
-    {   1919,14}, {    255,13}, {    511,12}, {   1087,11}, \
-    {   2175,13}, {    575,12}, {   1215,11}, {   2431,13}, \
-    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
-    {    383,13}, {    767,12}, {   1535,13}, {    831,12}, \
-    {   1663,13}, {    959,12}, {   1919,15}, {    255,14}, \
-    {    511,13}, {   1087,12}, {   2175,13}, {   1215,12}, \
-    {   2431,14}, {    639,13}, {   1343,12}, {   2687,13}, \
-    {   1407,12}, {   2815,13}, {   1471,14}, {    767,13}, \
-    {   1535,12}, {   3071,13}, {   1663,14}, {    895,13}, \
-    {   1791,12}, {   3839,15}, {  32768,16}, {  65536,17}, \
+    {     17, 7}, {     35, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     83,10}, {     47, 9}, {     95,10}, {     55,11}, \
+    {     31,10}, {     71, 9}, {    143,10}, {     79,11}, \
+    {     47,10}, {     95, 9}, {    191, 8}, {    383,10}, \
+    {    103,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    511,10}, {    135, 9}, {    271,10}, \
+    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
+    {    319, 8}, {    639,10}, {    175, 9}, {    351,11}, \
+    {     95,10}, {    191, 9}, {    383, 8}, {    767,10}, \
+    {    207, 9}, {    415,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271,11}, {    143,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
+    {    639,11}, {    175,10}, {    351,12}, {     95,11}, \
+    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
+    {    415, 9}, {    831,11}, {    223,10}, {    447,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
      { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
      {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 207
+#define SQR_FFT_TABLE3_SIZE 103
  #define SQR_FFT_THRESHOLD                 2752
  
-#define MULLO_BASECASE_THRESHOLD             5
-#define MULLO_DC_THRESHOLD                  34
-#define MULLO_MUL_N_THRESHOLD            10950
+#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_DC_THRESHOLD                  40
+#define MULLO_MUL_N_THRESHOLD             6440
  
-#define DC_DIV_QR_THRESHOLD                 30
-#define DC_DIVAPPR_Q_THRESHOLD             103
-#define DC_BDIV_QR_THRESHOLD                48
-#define DC_BDIV_Q_THRESHOLD                120
+#define DC_DIV_QR_THRESHOLD                 43
+#define DC_DIVAPPR_Q_THRESHOLD             166
+#define DC_BDIV_QR_THRESHOLD                47
+#define DC_BDIV_Q_THRESHOLD                112
  
  #define INV_MULMOD_BNM1_THRESHOLD           50
-#define INV_NEWTON_THRESHOLD               131
-#define INV_APPR_THRESHOLD                 115
+#define INV_NEWTON_THRESHOLD               181
+#define INV_APPR_THRESHOLD                 165
  
-#define BINV_NEWTON_THRESHOLD              204
+#define BINV_NEWTON_THRESHOLD              214
  #define REDC_1_TO_REDC_N_THRESHOLD          55
  
  #define MU_DIV_QR_THRESHOLD                998
-#define MU_DIVAPPR_Q_THRESHOLD             998
-#define MUPI_DIV_QR_THRESHOLD               61
-#define MU_BDIV_QR_THRESHOLD               889
-#define MU_BDIV_Q_THRESHOLD               1078
+#define MU_DIVAPPR_Q_THRESHOLD            1017
+#define MUPI_DIV_QR_THRESHOLD               84
+#define MU_BDIV_QR_THRESHOLD               855
+#define MU_BDIV_Q_THRESHOLD               1017
+
+#define POWM_SEC_TABLE  4,32,327,1100,2826
  
-#define MATRIX22_STRASSEN_THRESHOLD         11
-#define HGCD_THRESHOLD                      96
-#define GCD_DC_THRESHOLD                   249
-#define GCDEXT_DC_THRESHOLD                209
-#define JACOBI_BASE_METHOD                   1
+#define MATRIX22_STRASSEN_THRESHOLD         12
+#define HGCD_THRESHOLD                     109
+#define HGCD_APPR_THRESHOLD                107
+#define HGCD_REDUCE_THRESHOLD             2121
+#define GCD_DC_THRESHOLD                   348
+#define GCDEXT_DC_THRESHOLD                246
+#define JACOBI_BASE_METHOD                   4
  
  #define GET_STR_DC_THRESHOLD                11
  #define GET_STR_PRECOMPUTE_THRESHOLD        23
-#define SET_STR_DC_THRESHOLD               532
-#define SET_STR_PRECOMPUTE_THRESHOLD      1781
+#define SET_STR_DC_THRESHOLD               650
+#define SET_STR_PRECOMPUTE_THRESHOLD      1713
+
+#define FAC_DSC_THRESHOLD                  562
+#define FAC_ODD_THRESHOLD                   23
diff --git a/mpn/powerpc64/mode64/p5/gmp-mparam.h b/mpn/powerpc64/mode64/p5/gmp-mparam.h

index 89e153461cf8ca979a0a58472a6b16b7e7e6aa81..c1670d7c8e47651530fd631ea1bd9fbef8cd4141 100644 (file)
--- a/mpn/powerpc64/mode64/p5/gmp-mparam.h
+++ b/mpn/powerpc64/mode64/p5/gmp-mparam.h
@@ -1,4 +1,4 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
+/* POWER5 gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010 Free
  Software Foundation, Inc.
@@ -25,37 +25,41 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         10
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     16
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        15
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
  #define USE_PREINV_DIVREM_1                  0
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           59
-
-#define MUL_TOOM22_THRESHOLD                16
-#define MUL_TOOM33_THRESHOLD                56
-#define MUL_TOOM44_THRESHOLD               118
-#define MUL_TOOM6H_THRESHOLD               206
-#define MUL_TOOM8H_THRESHOLD               309
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      82
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      88
-
-#define SQR_BASECASE_THRESHOLD              10
-#define SQR_TOOM2_THRESHOLD                 51
-#define SQR_TOOM3_THRESHOLD                 78
-#define SQR_TOOM4_THRESHOLD                100
-#define SQR_TOOM6_THRESHOLD                150
-#define SQR_TOOM8_THRESHOLD                309
-
-#define MULMOD_BNM1_THRESHOLD                5
-#define SQRMOD_BNM1_THRESHOLD                7
-
-#define MUL_FFT_MODF_THRESHOLD             348  /* k = 5 */
+#define BMOD_1_TO_MOD_1_THRESHOLD           40
+
+#define MUL_TOOM22_THRESHOLD                21
+#define MUL_TOOM33_THRESHOLD                24
+#define MUL_TOOM44_THRESHOLD                70
+#define MUL_TOOM6H_THRESHOLD               262
+#define MUL_TOOM8H_THRESHOLD               393
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      49
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     126
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      85
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      94
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      70
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 24
+#define SQR_TOOM3_THRESHOLD                 81
+#define SQR_TOOM4_THRESHOLD                142
+#define SQR_TOOM6_THRESHOLD                189
+#define SQR_TOOM8_THRESHOLD                284
+
+#define MULMID_TOOM42_THRESHOLD             36
+
+#define MULMOD_BNM1_THRESHOLD               12
+#define SQRMOD_BNM1_THRESHOLD               15
+
+#define MUL_FFT_MODF_THRESHOLD             304  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
    { {    348, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
      {     10, 5}, {     21, 6}, {     21, 7}, {     11, 6}, \
@@ -110,9 +114,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
      {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
  #define MUL_FFT_TABLE3_SIZE 208
-#define MUL_FFT_THRESHOLD                 3712
+#define MUL_FFT_THRESHOLD                 4224
  
-#define SQR_FFT_MODF_THRESHOLD             272  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             284  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
    { {    272, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
      {     19, 7}, {     17, 8}, {      9, 7}, {     21, 8}, \
@@ -163,37 +167,42 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
      {4194304,23}, {8388608,24} }
  #define SQR_FFT_TABLE3_SIZE 190
-#define SQR_FFT_THRESHOLD                 2752
+#define SQR_FFT_THRESHOLD                 3264
  
-#define MULLO_BASECASE_THRESHOLD             5
-#define MULLO_DC_THRESHOLD                  25
-#define MULLO_MUL_N_THRESHOLD             6633
+#define MULLO_BASECASE_THRESHOLD             6
+#define MULLO_DC_THRESHOLD                  60
+#define MULLO_MUL_N_THRESHOLD             7463
  
-#define DC_DIV_QR_THRESHOLD                 29
-#define DC_DIVAPPR_Q_THRESHOLD             102
-#define DC_BDIV_QR_THRESHOLD                47
-#define DC_BDIV_Q_THRESHOLD                112
+#define DC_DIV_QR_THRESHOLD                 58
+#define DC_DIVAPPR_Q_THRESHOLD             232
+#define DC_BDIV_QR_THRESHOLD                78
+#define DC_BDIV_Q_THRESHOLD                238
  
-#define INV_MULMOD_BNM1_THRESHOLD           76
-#define INV_NEWTON_THRESHOLD               129
-#define INV_APPR_THRESHOLD                 109
+#define INV_MULMOD_BNM1_THRESHOLD           92
+#define INV_NEWTON_THRESHOLD               155
+#define INV_APPR_THRESHOLD                 157
  
-#define BINV_NEWTON_THRESHOLD              197
-#define REDC_1_TO_REDC_N_THRESHOLD          54
+#define BINV_NEWTON_THRESHOLD              155
+#define REDC_1_TO_REDC_N_THRESHOLD          61
  
-#define MU_DIV_QR_THRESHOLD                872
-#define MU_DIVAPPR_Q_THRESHOLD             855
-#define MUPI_DIV_QR_THRESHOLD               53
-#define MU_BDIV_QR_THRESHOLD               792
+#define MU_DIV_QR_THRESHOLD                998
+#define MU_DIVAPPR_Q_THRESHOLD             979
+#define MUPI_DIV_QR_THRESHOLD               79
+#define MU_BDIV_QR_THRESHOLD               823
  #define MU_BDIV_Q_THRESHOLD                942
  
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                      86
-#define GCD_DC_THRESHOLD                   241
-#define GCDEXT_DC_THRESHOLD                229
-#define JACOBI_BASE_METHOD                   1
+#define MATRIX22_STRASSEN_THRESHOLD         14
+#define HGCD_THRESHOLD                      74
+#define HGCD_APPR_THRESHOLD                155
+#define HGCD_REDUCE_THRESHOLD             2479
+#define GCD_DC_THRESHOLD                   351
+#define GCDEXT_DC_THRESHOLD                288
+#define JACOBI_BASE_METHOD                   4
  
  #define GET_STR_DC_THRESHOLD                12
  #define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD               532
-#define SET_STR_PRECOMPUTE_THRESHOLD      1655
+#define SET_STR_DC_THRESHOLD               650
+#define SET_STR_PRECOMPUTE_THRESHOLD      1585
+
+#define FAC_DSC_THRESHOLD                  662
+#define FAC_ODD_THRESHOLD                   28
diff --git a/mpn/powerpc64/mode64/p6/aorsmul_1.asm b/mpn/powerpc64/mode64/p6/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..4bd5084
--- /dev/null
+++ b/mpn/powerpc64/mode64/p6/aorsmul_1.asm
@@ -0,0 +1,172 @@
+dnl  PowerPC-64 mpn_addmul_1 and mpn_submul_1 optimised for power6.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008, 2010, 2011
+dnl  Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C               mpn_addmul_1    mpn_submul_1
+C               cycles/limb     cycles/limb
+C POWER3/PPC630     ?               ?
+C POWER4/PPC970     ?               ?
+C POWER5            ?               ?
+C POWER6           12.25           12.8
+C POWER7            ?               ?
+
+C TODO
+C  * Reduce register usage.
+C  * Schedule function entry code.
+C  * Unroll more.  8-way unrolling would bring us to 10 c/l, 16-way unrolling
+C    would bring us to 9 c/l.
+C  * Handle n = 1 and perhaps n = 2 seperately, without saving any registers.
+
+C INPUT PARAMETERS
+define(`rp',  `r3')
+define(`up',  `r4')
+define(`n',   `r5')
+define(`v0',  `r6')
+
+ifdef(`OPERATION_addmul_1',`
+  define(ADDSUBC,      adde)
+  define(ADDSUB,       addc)
+  define(func,         mpn_addmul_1)
+  define(func_nc,      mpn_addmul_1c)  C FIXME: not really supported
+  define(AM,           `$1')
+  define(SM,           `')
+  define(CLRRSC,       `addic  $1, r0, 0')
+')
+ifdef(`OPERATION_submul_1',`
+  define(ADDSUBC,      subfe)
+  define(ADDSUB,       subfc)
+  define(func,         mpn_submul_1)
+  define(func_nc,      mpn_submul_1c)  C FIXME: not really supported
+  define(AM,           `')
+  define(SM,           `$1')
+  define(CLRRSC,       `subfc  $1, r0, r0')
+')
+
+ASM_START()
+PROLOGUE(func)
+       std     r31, -8(r1)
+       std     r30, -16(r1)
+       std     r29, -24(r1)
+       std     r28, -32(r1)
+       std     r27, -40(r1)
+
+       rldicl. r0, n, 0,62     C r0 = n & 3, set cr0
+       cmpdi   cr6, r0, 2
+       addi    n, n, 3         C compute count...
+       srdi    n, n, 2         C ...for ctr
+       mtctr   n               C copy loop count into ctr
+       beq     cr0, L(b0)
+       blt     cr6, L(b1)
+       beq     cr6, L(b2)
+
+L(b3): ld      r8, 0(up)
+       ld      r7, 8(up)
+       ld      r27, 16(up)
+       addi    up, up, 16
+       addi    rp, rp, 16
+       mulld   r5,  r8, v0
+       mulhdu  r8,  r8, v0
+       mulld   r9,  r7, v0
+       mulhdu  r7,  r7, v0
+       mulld   r11, r27, v0
+       mulhdu  r27, r27, v0
+       ld      r29, -16(rp)
+       ld      r30, -8(rp)
+       ld      r31, 0(rp)
+       addc    r9, r9, r8
+       adde    r11, r11, r7
+       addze   r12, r27
+       ADDSUB  r5, r5, r29
+       b       L(l3)
+
+L(b2): ld      r7, 0(up)
+       ld      r27, 8(up)
+       addi    up, up, 8
+       addi    rp, rp, 8
+       mulld   r9,  r7, v0
+       mulhdu  r7,  r7, v0
+       mulld   r11, r27, v0
+       mulhdu  r27, r27, v0
+       ld      r30, -8(rp)
+       ld      r31, 0(rp)
+       addc    r11, r11, r7
+       addze   r12, r27
+       ADDSUB  r9, r9, r30
+       b       L(l2)
+
+L(b1): ld      r27, 0(up)
+       ld      r31, 0(rp)
+       mulld   r11, r27, v0
+       mulhdu  r12, r27, v0
+       ADDSUB  r11, r11, r31
+       b       L(l1)
+
+L(b0): addi    up, up, -8
+       addi    rp, rp, -8
+       CLRRSC( r12)            C clear r12 and clr/set cy
+
+       ALIGN(32)
+L(top):
+SM(`   subfe   r11, r0, r0')   C complement...
+SM(`   addic   r11, r11, 1')   C ...carry flag
+       ld      r10, 8(up)
+       ld      r8, 16(up)
+       ld      r7, 24(up)
+       ld      r27, 32(up)
+       addi    up, up, 32
+       addi    rp, rp, 32
+       mulld   r0,  r10, v0
+       mulhdu  r10, r10, v0
+       mulld   r5,  r8, v0
+       mulhdu  r8,  r8, v0
+       mulld   r9,  r7, v0
+       mulhdu  r7,  r7, v0
+       mulld   r11, r27, v0
+       mulhdu  r27, r27, v0
+       ld      r28, -24(rp)
+       adde    r0, r0, r12
+       ld      r29, -16(rp)
+       adde    r5, r5, r10
+       ld      r30, -8(rp)
+       ld      r31, 0(rp)
+       adde    r9, r9, r8
+       adde    r11, r11, r7
+       addze   r12, r27
+       ADDSUB  r0, r0, r28
+       std     r0, -24(rp)
+       ADDSUBC r5, r5, r29
+L(l3): std     r5, -16(rp)
+       ADDSUBC r9, r9, r30
+L(l2): std     r9, -8(rp)
+       ADDSUBC r11, r11, r31
+L(l1): std     r11, 0(rp)
+       bdnz    L(top)
+
+AM(`   addze   r3, r12')
+SM(`   subfe   r11, r0, r0')           C complement...
+       ld      r31, -8(r1)
+SM(`   subf    r3, r11, r12')
+       ld      r30, -16(r1)
+       ld      r29, -24(r1)
+       ld      r28, -32(r1)
+       ld      r27, -40(r1)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/p6/gmp-mparam.h b/mpn/powerpc64/mode64/p6/gmp-mparam.h

index bedb2706517a869b72e03a0cf98f463cb500441d..b6e10df2fb8c7423dba34cb2d9c12b6c2b64b63c 100644 (file)
--- a/mpn/powerpc64/mode64/p6/gmp-mparam.h
+++ b/mpn/powerpc64/mode64/p6/gmp-mparam.h
@@ -1,7 +1,7 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
+/* POWER6 gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2011
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,165 +25,126 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     55
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      6
  #define USE_PREINV_DIVREM_1                  0
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                14
-#define MUL_TOOM33_THRESHOLD                37
-#define MUL_TOOM44_THRESHOLD               160
-#define MUL_TOOM6H_THRESHOLD               177
-#define MUL_TOOM8H_THRESHOLD               321
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      86
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     103
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      90
-
-#define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 22
-#define SQR_TOOM3_THRESHOLD                 43
-#define SQR_TOOM4_THRESHOLD                296
-#define SQR_TOOM6_THRESHOLD                309
-#define SQR_TOOM8_THRESHOLD                562
-
-#define MULMOD_BNM1_THRESHOLD               12
+#define BMOD_1_TO_MOD_1_THRESHOLD           21
+
+#define MUL_TOOM22_THRESHOLD                20
+#define MUL_TOOM33_THRESHOLD                50
+#define MUL_TOOM44_THRESHOLD               106
+#define MUL_TOOM6H_THRESHOLD               274
+#define MUL_TOOM8H_THRESHOLD               339
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      62
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      76
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      66
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      88
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 24
+#define SQR_TOOM3_THRESHOLD                 49
+#define SQR_TOOM4_THRESHOLD                130
+#define SQR_TOOM6_THRESHOLD                226
+#define SQR_TOOM8_THRESHOLD                272
+
+#define MULMID_TOOM42_THRESHOLD             36
+
+#define MULMOD_BNM1_THRESHOLD               14
  #define SQRMOD_BNM1_THRESHOLD               14
  
-#define MUL_FFT_MODF_THRESHOLD             272  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    272, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {      8, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
-    {     11, 7}, {     24, 8}, {     21, 9}, {     11, 8}, \
-    {     25, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
-    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
-    {     31, 9}, {     63,10}, {     47,11}, {     31,10}, \
-    {     71,11}, {     47,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511,10}, {    143,11}, \
-    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
-    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
-    {    383,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,11}, {    143,10}, {    287, 9}, {    575,11}, \
-    {    159,10}, {    319, 9}, {    639,11}, {    175,10}, \
-    {    351,12}, {     95,11}, {    191,10}, {    383, 9}, \
-    {    767,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511, 9}, {   1023,11}, {    271,10}, {    543,11}, \
-    {    287,10}, {    575,12}, {    159,11}, {    319,10}, \
-    {    639,11}, {    351,10}, {    703, 9}, {   1407,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831, 9}, {   1663,12}, {    223,11}, {    447,10}, \
-    {    959, 9}, {   1919,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,12}, {    287,11}, {    575,10}, \
-    {   1151,12}, {    319,11}, {    639,12}, {    351,11}, \
-    {    703,10}, {   1407,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,10}, {   1663,12}, \
-    {    447,11}, {    959,10}, {   1919, 9}, {   3839,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
-    {   1087,10}, {   2175,12}, {    575,11}, {   1151,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    703,11}, \
-    {   1407,10}, {   2815,13}, {    383,12}, {    831,11}, \
-    {   1663,13}, {    447,12}, {    959,11}, {   1919,10}, \
-    {   3839,14}, {    255,13}, {    511,12}, {   1087,11}, \
-    {   2175,13}, {    575,12}, {   1151,13}, {    639,12}, \
-    {   1279,13}, {    703,12}, {   1407,11}, {   2815,14}, \
-    {    383,13}, {    831,12}, {   1663,13}, {    959,12}, \
-    {   1919,11}, {   3839,15}, {    255,14}, {    511,13}, \
-    {   1087,12}, {   2175,13}, {   1151,14}, {    639,13}, \
-    {   1407,12}, {   2815,13}, {   1471,14}, {    767,13}, \
-    {   1663,14}, {    895,13}, {   1919,12}, {   3839,11}, \
-    {   7679,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 178
-#define MUL_FFT_THRESHOLD                 1856
-
-#define SQR_FFT_MODF_THRESHOLD             208  /* k = 5 */
+  { {    340, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
+    {     23, 7}, {     12, 6}, {     25, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     21, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
+    {     33, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
+    {     15,10}, {     31, 9}, {     63,10}, {     47,11}, \
+    {     31,10}, {     71,11}, {     47,12}, {     31,11}, \
+    {     63,10}, {    127, 9}, {    255, 8}, {    511,10}, \
+    {    135, 9}, {    271,11}, {     79, 9}, {    319, 8}, \
+    {    639,10}, {    175,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    207,12}, {     63,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,11}, {    143,10}, \
+    {    287, 9}, {    575,10}, {    303, 9}, {    607,10}, \
+    {    319, 9}, {    639,11}, {    175,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,10}, {    415,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 79
+#define MUL_FFT_THRESHOLD                 3520
+
+#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    208, 5}, {      7, 4}, {     15, 5}, {     13, 6}, \
-    {      7, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
-    {     17, 7}, {      9, 6}, {     19, 7}, {     17, 8}, \
-    {      9, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
-    {     19, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
+  { {    280, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     21, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
      {     33, 9}, {     19, 8}, {     39, 9}, {     23, 8}, \
-    {     47,10}, {     15, 9}, {     39,10}, {     23, 9}, \
-    {     47,11}, {     15,10}, {     31, 9}, {     63,10}, \
-    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255,10}, {     71, 9}, {    143, 8}, {    287,11}, \
-    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
-    {    255, 8}, {    511,10}, {    143, 9}, {    287,11}, \
-    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
-    {    175, 9}, {    351, 8}, {    703,11}, {     95,10}, \
-    {    191, 9}, {    383, 8}, {    767,10}, {    207, 9}, \
-    {    415,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511, 8}, {   1023,11}, {    143,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
-    {    175,10}, {    351, 9}, {    703,12}, {     95,11}, \
-    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
-    {    415,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511, 9}, {   1023,11}, {    287,10}, {    575,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,10}, {    831, 9}, {   1663,12}, {    223,11}, \
-    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,12}, {    287,11}, {    575,10}, \
-    {   1151,12}, {    319,11}, {    639,12}, {    351,11}, \
-    {    703,10}, {   1407,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,10}, {   1663,12}, \
-    {    447,11}, {    959,14}, {    127,13}, {    255,12}, \
-    {    511,11}, {   1023,12}, {    543,11}, {   1087,10}, \
-    {   2175,12}, {    575,11}, {   1151,13}, {    319,12}, \
-    {    639,11}, {   1279,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    831,11}, {   1663,13}, {    447,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1087,11}, \
-    {   2175,13}, {    575,12}, {   1215,13}, {    639,12}, \
-    {   1279,13}, {    703,12}, {   1407,14}, {    383,13}, \
-    {    831,12}, {   1663,13}, {    959,15}, {    255,14}, \
-    {    511,13}, {   1087,12}, {   2303,13}, {   1215,14}, \
-    {    639,13}, {   1407,12}, {   2815,14}, {    767,13}, \
-    {   1663,14}, {    895,13}, {   1919,12}, {   3839,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 177
-#define SQR_FFT_THRESHOLD                 1856
-
-#define MULLO_BASECASE_THRESHOLD             3
-#define MULLO_DC_THRESHOLD                  37
-#define MULLO_MUL_N_THRESHOLD             3574
-
-#define DC_DIV_QR_THRESHOLD                 23
-#define DC_DIVAPPR_Q_THRESHOLD              95
-#define DC_BDIV_QR_THRESHOLD                41
-#define DC_BDIV_Q_THRESHOLD                 90
-
-#define INV_MULMOD_BNM1_THRESHOLD           45
-#define INV_NEWTON_THRESHOLD                85
-#define INV_APPR_THRESHOLD                  85
-
-#define BINV_NEWTON_THRESHOLD              151
-#define REDC_1_TO_REDC_N_THRESHOLD          43
-
-#define MU_DIV_QR_THRESHOLD                748
-#define MU_DIVAPPR_Q_THRESHOLD            1210
-#define MUPI_DIV_QR_THRESHOLD               42
-#define MU_BDIV_QR_THRESHOLD               618
-#define MU_BDIV_Q_THRESHOLD                807
-
-#define MATRIX22_STRASSEN_THRESHOLD         10
-#define HGCD_THRESHOLD                      77
-#define GCD_DC_THRESHOLD                   358
-#define GCDEXT_DC_THRESHOLD                241
-#define JACOBI_BASE_METHOD                   3
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        25
-#define SET_STR_DC_THRESHOLD               552
-#define SET_STR_PRECOMPUTE_THRESHOLD      1416
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
+    {     63,10}, {     47,11}, {     31,10}, {     71, 9}, \
+    {    143,11}, {     47,12}, {     31,11}, {     63, 9}, \
+    {    255, 8}, {    511, 9}, {    271,10}, {    143,11}, \
+    {     79,10}, {    159, 9}, {    319,10}, {    175, 9}, \
+    {    351,11}, {     95,10}, {    191, 9}, {    383,10}, \
+    {    207,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511, 8}, {   1023,10}, {    271, 9}, {    543,11}, \
+    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
+    {    319, 9}, {    639,11}, {    175,10}, {    351,12}, \
+    {     95,11}, {    191,10}, {    383,11}, {    207,10}, \
+    {    415,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 80
+#define SQR_FFT_THRESHOLD                 2752
+
+#define MULLO_BASECASE_THRESHOLD             5
+#define MULLO_DC_THRESHOLD                  62
+#define MULLO_MUL_N_THRESHOLD             2995
+
+#define DC_DIV_QR_THRESHOLD                 59
+#define DC_DIVAPPR_Q_THRESHOLD             200
+#define DC_BDIV_QR_THRESHOLD                70
+#define DC_BDIV_Q_THRESHOLD                168
+
+#define INV_MULMOD_BNM1_THRESHOLD           53
+#define INV_NEWTON_THRESHOLD               170
+#define INV_APPR_THRESHOLD                 166
+
+#define BINV_NEWTON_THRESHOLD              220
+#define REDC_1_TO_REDC_N_THRESHOLD          67
+
+#define MU_DIV_QR_THRESHOLD                998
+#define MU_DIVAPPR_Q_THRESHOLD             942
+#define MUPI_DIV_QR_THRESHOLD               57
+#define MU_BDIV_QR_THRESHOLD               889
+#define MU_BDIV_Q_THRESHOLD               1078
+
+#define POWM_SEC_TABLE  4,26,216,804,1731
+
+#define MATRIX22_STRASSEN_THRESHOLD         13
+#define HGCD_THRESHOLD                     106
+#define HGCD_APPR_THRESHOLD                109
+#define HGCD_REDUCE_THRESHOLD             2205
+#define GCD_DC_THRESHOLD                   492
+#define GCDEXT_DC_THRESHOLD                327
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                16
+#define GET_STR_PRECOMPUTE_THRESHOLD        28
+#define SET_STR_DC_THRESHOLD               537
+#define SET_STR_PRECOMPUTE_THRESHOLD      1576
+
+#define FAC_DSC_THRESHOLD                  426
+#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/mpn/powerpc64/mode64/p6/mul_basecase.asm b/mpn/powerpc64/mode64/p6/mul_basecase.asm

new file mode 100644 (file)

index 0000000..52c5af8
--- /dev/null
+++ b/mpn/powerpc64/mode64/p6/mul_basecase.asm
@@ -0,0 +1,579 @@
+dnl  PowerPC-64 mpn_mul_basecase.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008, 2010 Free
+dnl  Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C POWER3/PPC630                 ?
+C POWER4/PPC970                 ?
+C POWER5                ?
+C POWER6               12.25
+
+C TODO
+C  * Reduce register usage.  At least 4 register less can be used.
+C  * Unroll more.  8-way unrolling would bring us to 10 c/l, 16-way unrolling
+C    would bring us to 9 c/l.
+C  * The bdz insns for b1 and b2 will never branch,
+C  * Align things better, perhaps by moving things like pointer updates from
+C    before to after loops.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`un', `r5')
+define(`vp', `r6')
+define(`vn', `r7')
+
+define(`v0',      `r25')
+define(`outer_rp', `r22')
+define(`outer_up', `r23')
+
+ASM_START()
+PROLOGUE(mpn_mul_basecase)
+
+C Special code for un <= 2, for efficiency of these important cases,
+C and since it simplifies the default code.
+       cmpdi   cr0, un, 2
+       bgt     cr0, L(un_gt2)
+       cmpdi   cr6, vn, 1
+       ld      r7, 0(vp)
+       ld      r5, 0(up)
+       mulld   r8, r5, r7      C weight 0
+       mulhdu  r9, r5, r7      C weight 1
+       std     r8, 0(rp)
+       beq     cr0, L(2x)
+       std     r9, 8(rp)
+       blr
+       ALIGN(16)
+L(2x): ld      r0, 8(up)
+       mulld   r8, r0, r7      C weight 1
+       mulhdu  r10, r0, r7     C weight 2
+       addc    r9, r9, r8
+       addze   r10, r10
+       bne     cr6, L(2x2)
+       std     r9, 8(rp)
+       std     r10, 16(rp)
+       blr
+       ALIGN(16)
+L(2x2):        ld      r6, 8(vp)
+       nop
+       mulld   r8, r5, r6      C weight 1
+       mulhdu  r11, r5, r6     C weight 2
+       mulld   r12, r0, r6     C weight 2
+       mulhdu  r0, r0, r6      C weight 3
+       addc    r9, r9, r8
+       std     r9, 8(rp)
+       adde    r11, r11, r10
+       addze   r0, r0
+       addc    r11, r11, r12
+       addze   r0, r0
+       std     r11, 16(rp)
+       std     r0, 24(rp)
+       blr
+
+L(un_gt2):
+       std     r31, -8(r1)
+       std     r30, -16(r1)
+       std     r29, -24(r1)
+       std     r28, -32(r1)
+       std     r27, -40(r1)
+       std     r26, -48(r1)
+       std     r25, -56(r1)
+       std     r24, -64(r1)
+       std     r23, -72(r1)
+       std     r22, -80(r1)
+       std     r21, -88(r1)
+       std     r20, -96(r1)
+
+       mr      outer_rp, rp
+       mr      outer_up, up
+
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       ld      r26, 0(up)
+
+       rldicl. r0, un, 0,62    C r0 = n & 3, set cr0
+       cmpdi   cr6, r0, 2
+       addi    un, un, 4       C compute count...
+       srdi    un, un, 2       C ...for ctr
+       mtctr   un              C copy inner loop count into ctr
+       beq     cr0, L(b0)
+       blt     cr6, L(b1)
+       beq     cr6, L(b2)
+
+
+       ALIGN(16)
+L(b3):
+       ld      r27, 8(up)
+       ld      r20, 16(up)
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       mulld   r9, r20, v0
+       mulhdu  r10, r20, v0
+       addc    r24, r24, r31
+       adde    r9, r9, r8
+       addze   r12, r10
+       std     r0, 0(rp)
+       std     r24, 8(rp)
+       std     r9, 16(rp)
+       addi    up, up, 16
+       addi    rp, rp, 16
+       bdz     L(end_m_3)
+
+       ALIGN(32)
+L(lo_m_3):
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       ld      r20, 24(up)
+       ld      r21, 32(up)
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       mulld   r9, r20, v0
+       mulhdu  r27, r20, v0
+       mulld   r11, r21, v0
+       mulhdu  r26, r21, v0
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+       std     r0, 8(rp)
+       adde    r9, r9, r8
+       std     r24, 16(rp)
+       adde    r11, r11, r27
+       std     r9, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       mr      r12, r26
+       bdnz    L(lo_m_3)
+
+       ALIGN(16)
+L(end_m_3):
+       addze   r12, r12
+       addic.  vn, vn, -1
+       std     r12, 8(rp)
+       beq     L(ret)
+
+       ALIGN(16)
+L(outer_lo_3):
+       mtctr   un              C copy inner loop count into ctr
+       addi    rp, outer_rp, 24
+       addi    up, outer_up, 16
+       addi    outer_rp, outer_rp, 8
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       ld      r26, -16(up)
+       ld      r27, -8(up)
+       ld      r20, 0(up)
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       mulld   r9, r20, v0
+       mulhdu  r10, r20, v0
+       ld      r28, -16(rp)
+       ld      r29, -8(rp)
+       ld      r30, 0(rp)
+       addc    r24, r24, r31
+       adde    r9, r9, r8
+       addze   r12, r10
+       addc    r0, r0, r28
+       std     r0, -16(rp)
+       adde    r24, r24, r29
+       std     r24, -8(rp)
+       adde    r9, r9, r30
+       std     r9, 0(rp)
+       bdz     L(end_3)
+
+       ALIGN(32)               C registers dying
+L(lo_3):
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       ld      r20, 24(up)     C
+       ld      r21, 32(up)     C
+       addi    up, up, 32      C
+       addi    rp, rp, 32      C
+       mulld   r0, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       mulld   r24, r27, v0    C
+       mulhdu  r8, r27, v0     C 27
+       mulld   r9, r20, v0     C
+       mulhdu  r27, r20, v0    C 26
+       mulld   r11, r21, v0    C
+       mulhdu  r26, r21, v0    C 27
+       ld      r28, -24(rp)    C
+       adde    r0, r0, r12     C 0 12
+       ld      r29, -16(rp)    C
+       adde    r24, r24, r10   C 24 10
+       ld      r30, -8(rp)     C
+       ld      r31, 0(rp)      C
+       adde    r9, r9, r8      C 8 9
+       adde    r11, r11, r27   C 27 11
+       addze   r12, r26        C 26
+       addc    r0, r0, r28     C 0 28
+       std     r0, -24(rp)     C 0
+       adde    r24, r24, r29   C 7 29
+       std     r24, -16(rp)    C 7
+       adde    r9, r9, r30     C 9 30
+       std     r9, -8(rp)      C 9
+       adde    r11, r11, r31   C 11 31
+       std     r11, 0(rp)      C 11
+       bdnz    L(lo_3)         C
+
+       ALIGN(16)
+L(end_3):
+       addze   r12, r12
+       addic.  vn, vn, -1
+       std     r12, 8(rp)
+       bne     L(outer_lo_3)
+       b       L(ret)
+
+
+       ALIGN(16)
+L(b1):
+       mulld   r0, r26, v0
+       mulhdu  r12, r26, v0
+       addic   r0, r0, 0
+       std     r0, 0(rp)
+       bdz     L(end_m_1)
+
+       ALIGN(16)
+L(lo_m_1):
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       ld      r20, 24(up)
+       ld      r21, 32(up)
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       mulld   r9, r20, v0
+       mulhdu  r27, r20, v0
+       mulld   r11, r21, v0
+       mulhdu  r26, r21, v0
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+       std     r0, 8(rp)
+       adde    r9, r9, r8
+       std     r24, 16(rp)
+       adde    r11, r11, r27
+       std     r9, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       mr      r12, r26
+       bdnz    L(lo_m_1)
+
+       ALIGN(16)
+L(end_m_1):
+       addze   r12, r12
+       addic.  vn, vn, -1
+       std     r12, 8(rp)
+       beq     L(ret)
+
+       ALIGN(16)
+L(outer_lo_1):
+       mtctr   un              C copy inner loop count into ctr
+       addi    rp, outer_rp, 8
+       mr      up, outer_up
+       addi    outer_rp, outer_rp, 8
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       ld      r26, 0(up)
+       ld      r28, 0(rp)
+       mulld   r0, r26, v0
+       mulhdu  r12, r26, v0
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       bdz     L(end_1)
+
+       ALIGN(32)               C registers dying
+L(lo_1):
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       ld      r20, 24(up)     C
+       ld      r21, 32(up)     C
+       addi    up, up, 32      C
+       addi    rp, rp, 32      C
+       mulld   r0, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       mulld   r24, r27, v0    C
+       mulhdu  r8, r27, v0     C 27
+       mulld   r9, r20, v0     C
+       mulhdu  r27, r20, v0    C 26
+       mulld   r11, r21, v0    C
+       mulhdu  r26, r21, v0    C 27
+       ld      r28, -24(rp)    C
+       adde    r0, r0, r12     C 0 12
+       ld      r29, -16(rp)    C
+       adde    r24, r24, r10   C 24 10
+       ld      r30, -8(rp)     C
+       ld      r31, 0(rp)      C
+       adde    r9, r9, r8      C 8 9
+       adde    r11, r11, r27   C 27 11
+       addze   r12, r26        C 26
+       addc    r0, r0, r28     C 0 28
+       std     r0, -24(rp)     C 0
+       adde    r24, r24, r29   C 7 29
+       std     r24, -16(rp)    C 7
+       adde    r9, r9, r30     C 9 30
+       std     r9, -8(rp)      C 9
+       adde    r11, r11, r31   C 11 31
+       std     r11, 0(rp)      C 11
+       bdnz    L(lo_1)         C
+
+       ALIGN(16)
+L(end_1):
+       addze   r12, r12
+       addic.  vn, vn, -1
+       std     r12, 8(rp)
+       bne     L(outer_lo_1)
+       b       L(ret)
+
+
+       ALIGN(16)
+L(b0):
+       addi    up, up, -8
+       addi    rp, rp, -8
+       li      r12, 0
+       addic   r12, r12, 0
+       bdz     L(end_m_0)
+
+       ALIGN(16)
+L(lo_m_0):
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       ld      r20, 24(up)
+       ld      r21, 32(up)
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       mulld   r9, r20, v0
+       mulhdu  r27, r20, v0
+       mulld   r11, r21, v0
+       mulhdu  r26, r21, v0
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+       std     r0, 8(rp)
+       adde    r9, r9, r8
+       std     r24, 16(rp)
+       adde    r11, r11, r27
+       std     r9, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       mr      r12, r26
+       bdnz    L(lo_m_0)
+
+       ALIGN(16)
+L(end_m_0):
+       addze   r12, r12
+       addic.  vn, vn, -1
+       std     r12, 8(rp)
+       beq     L(ret)
+
+       ALIGN(16)
+L(outer_lo_0):
+       mtctr   un              C copy inner loop count into ctr
+       addi    rp, outer_rp, 0
+       addi    up, outer_up, -8
+       addi    outer_rp, outer_rp, 8
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       li      r12, 0
+       addic   r12, r12, 0
+       bdz     L(end_0)
+
+       ALIGN(32)               C registers dying
+L(lo_0):
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       ld      r20, 24(up)     C
+       ld      r21, 32(up)     C
+       addi    up, up, 32      C
+       addi    rp, rp, 32      C
+       mulld   r0, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       mulld   r24, r27, v0    C
+       mulhdu  r8, r27, v0     C 27
+       mulld   r9, r20, v0     C
+       mulhdu  r27, r20, v0    C 26
+       mulld   r11, r21, v0    C
+       mulhdu  r26, r21, v0    C 27
+       ld      r28, -24(rp)    C
+       adde    r0, r0, r12     C 0 12
+       ld      r29, -16(rp)    C
+       adde    r24, r24, r10   C 24 10
+       ld      r30, -8(rp)     C
+       ld      r31, 0(rp)      C
+       adde    r9, r9, r8      C 8 9
+       adde    r11, r11, r27   C 27 11
+       addze   r12, r26        C 26
+       addc    r0, r0, r28     C 0 28
+       std     r0, -24(rp)     C 0
+       adde    r24, r24, r29   C 7 29
+       std     r24, -16(rp)    C 7
+       adde    r9, r9, r30     C 9 30
+       std     r9, -8(rp)      C 9
+       adde    r11, r11, r31   C 11 31
+       std     r11, 0(rp)      C 11
+       bdnz    L(lo_0)         C
+
+       ALIGN(16)
+L(end_0):
+       addze   r12, r12
+       addic.  vn, vn, -1
+       std     r12, 8(rp)
+       bne     L(outer_lo_0)
+       b       L(ret)
+
+
+       ALIGN(16)
+L(b2): ld      r27, 8(up)
+       addi    up, up, 8
+       mulld   r0, r26, v0
+       mulhdu  r10, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       addc    r24, r24, r10
+       addze   r12, r8
+       std     r0, 0(rp)
+       std     r24, 8(rp)
+       addi    rp, rp, 8
+       bdz     L(end_m_2)
+
+       ALIGN(16)
+L(lo_m_2):
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       ld      r20, 24(up)
+       ld      r21, 32(up)
+       mulld   r0, r26, v0
+       mulhdu  r31, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       mulld   r9, r20, v0
+       mulhdu  r27, r20, v0
+       mulld   r11, r21, v0
+       mulhdu  r26, r21, v0
+       adde    r0, r0, r12
+       adde    r24, r24, r31
+       std     r0, 8(rp)
+       adde    r9, r9, r8
+       std     r24, 16(rp)
+       adde    r11, r11, r27
+       std     r9, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       mr      r12, r26
+       bdnz    L(lo_m_2)
+
+       ALIGN(16)
+L(end_m_2):
+       addze   r12, r12
+       addic.  vn, vn, -1
+       std     r12, 8(rp)
+       beq     L(ret)
+
+       ALIGN(16)
+L(outer_lo_2):
+       mtctr   un              C copy inner loop count into ctr
+       addi    rp, outer_rp, 16
+       addi    up, outer_up, 8
+       addi    outer_rp, outer_rp, 8
+       ld      v0, 0(vp)       C new v limb
+       addi    vp, vp, 8
+       ld      r26, -8(up)
+       ld      r27, 0(up)
+       ld      r28, -8(rp)
+       ld      r29, 0(rp)
+       mulld   r0, r26, v0
+       mulhdu  r10, r26, v0
+       mulld   r24, r27, v0
+       mulhdu  r8, r27, v0
+       addc    r24, r24, r10
+       addze   r12, r8
+       addc    r0, r0, r28
+       std     r0, -8(rp)
+       adde    r24, r24, r29
+       std     r24, 0(rp)
+       bdz     L(end_2)
+
+       ALIGN(16)               C registers dying
+L(lo_2):
+       ld      r26, 8(up)
+       ld      r27, 16(up)
+       ld      r20, 24(up)     C
+       ld      r21, 32(up)     C
+       addi    up, up, 32      C
+       addi    rp, rp, 32      C
+       mulld   r0, r26, v0     C
+       mulhdu  r10, r26, v0    C 26
+       mulld   r24, r27, v0    C
+       mulhdu  r8, r27, v0     C 27
+       mulld   r9, r20, v0     C
+       mulhdu  r27, r20, v0    C 26
+       mulld   r11, r21, v0    C
+       mulhdu  r26, r21, v0    C 27
+       ld      r28, -24(rp)    C
+       adde    r0, r0, r12     C 0 12
+       ld      r29, -16(rp)    C
+       adde    r24, r24, r10   C 24 10
+       ld      r30, -8(rp)     C
+       ld      r31, 0(rp)      C
+       adde    r9, r9, r8      C 8 9
+       adde    r11, r11, r27   C 27 11
+       addze   r12, r26        C 26
+       addc    r0, r0, r28     C 0 28
+       std     r0, -24(rp)     C 0
+       adde    r24, r24, r29   C 7 29
+       std     r24, -16(rp)    C 7
+       adde    r9, r9, r30     C 9 30
+       std     r9, -8(rp)      C 9
+       adde    r11, r11, r31   C 11 31
+       std     r11, 0(rp)      C 11
+       bdnz    L(lo_2)         C
+
+       ALIGN(16)
+L(end_2):
+       addze   r12, r12
+       addic.  vn, vn, -1
+       std     r12, 8(rp)
+       bne     L(outer_lo_2)
+C      b       L(ret)
+
+L(ret):        ld      r31, -8(r1)
+       ld      r30, -16(r1)
+       ld      r29, -24(r1)
+       ld      r28, -32(r1)
+       ld      r27, -40(r1)
+       ld      r26, -48(r1)
+       ld      r25, -56(r1)
+       ld      r24, -64(r1)
+       ld      r23, -72(r1)
+       ld      r22, -80(r1)
+       ld      r21, -88(r1)
+       ld      r20, -96(r1)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/p7/gmp-mparam.h b/mpn/powerpc64/mode64/p7/gmp-mparam.h

index 884bf130f3a674da5a38abfe53585b5353b6bc9f..a2a8f7317506e4c3daa5d7229a6b13e2ae3b9bbd 100644 (file)
--- a/mpn/powerpc64/mode64/p7/gmp-mparam.h
+++ b/mpn/powerpc64/mode64/p7/gmp-mparam.h
@@ -1,7 +1,7 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
+/* POWER7 gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2011
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,178 +25,139 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         12
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         7
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     16
-#define USE_PREINV_DIVREM_1                  1
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        33
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     17
+#define USE_PREINV_DIVREM_1                  0
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           28
+#define BMOD_1_TO_MOD_1_THRESHOLD           38
  
  #define MUL_TOOM22_THRESHOLD                22
  #define MUL_TOOM33_THRESHOLD                73
-#define MUL_TOOM44_THRESHOLD               202
-#define MUL_TOOM6H_THRESHOLD               393
-#define MUL_TOOM8H_THRESHOLD               592
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     137
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     149
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     137
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     149
-
-#define SQR_BASECASE_THRESHOLD              18
-#define SQR_TOOM2_THRESHOLD                 64
-#define SQR_TOOM3_THRESHOLD                 89
-#define SQR_TOOM4_THRESHOLD                184
-#define SQR_TOOM6_THRESHOLD                294
-#define SQR_TOOM8_THRESHOLD                430
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               13
-
-#define MUL_FFT_MODF_THRESHOLD             408  /* k = 5 */
+#define MUL_TOOM44_THRESHOLD               154
+#define MUL_TOOM6H_THRESHOLD               270
+#define MUL_TOOM8H_THRESHOLD               369
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     105
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     112
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 30
+#define SQR_TOOM3_THRESHOLD                109
+#define SQR_TOOM4_THRESHOLD                178
+#define SQR_TOOM6_THRESHOLD                303
+#define SQR_TOOM8_THRESHOLD                357
+
+#define MULMID_TOOM42_THRESHOLD             62
+
+#define MULMOD_BNM1_THRESHOLD               16
+#define SQRMOD_BNM1_THRESHOLD               18
+
+#define MUL_FFT_MODF_THRESHOLD             444  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    408, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     12, 6}, {     25, 7}, {     13, 6}, \
-    {     27, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
-    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
-    {     19, 7}, {     39, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     43, 9}, {     23, 8}, {     49, 9}, {     27,10}, \
-    {     15, 9}, {     31, 8}, {     63, 9}, {     43,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     79,10}, {     55,11}, \
+  { {    436, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
+    {     13, 7}, {     28, 8}, {     15, 7}, {     32, 8}, \
+    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     21, 9}, {     11, 8}, {     29, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     43,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
+    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
      {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
      {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
-    {    167,11}, {     95,10}, {    191,11}, {    111,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
-    {    143, 7}, {   2303,10}, {    303,11}, {    159,10}, \
-    {    319, 9}, {    639,12}, {     95,11}, {    191,10}, \
-    {    383,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511,11}, {    271,10}, {    543,11}, {    287,10}, \
-    {    575,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    335,10}, {    671, 9}, {   1343,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    383,10}, {    799,11}, \
-    {    415,10}, {    831,12}, {    223,11}, {    447,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
-    {    543,10}, {   1087,12}, {    287,11}, {    575,10}, \
-    {   1151,11}, {    607,10}, {   1215,12}, {    319,11}, \
-    {    639,10}, {   1279,11}, {    671,10}, {   1343,12}, \
-    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
-    {    799,10}, {   1599,12}, {    415,11}, {    831,10}, \
-    {   1663,12}, {    447,11}, {    895,14}, {    127,13}, \
-    {    255,12}, {    543,13}, {    319,12}, {    671,11}, \
-    {   1343,12}, {    703,11}, {   1407,12}, {    735,13}, \
-    {    447,12}, {    959,11}, {   1919,14}, {    255,12}, \
-    {   1087,13}, {    575,12}, {   1215,13}, {    639,12}, \
-    {   1343,11}, {   2687,12}, {   1471,14}, {    383,13}, \
-    {    767,12}, {   1599,13}, {    831,10}, {   6655,12}, \
-    {   1727,13}, {    959,12}, {   1919,11}, {   3839,14}, \
-    {    511,11}, {   4095,13}, {   1087,12}, {   2303,13}, \
-    {   1215,12}, {   2431,14}, {    639,13}, {   1343,12}, \
-    {   2687,13}, {   1471,12}, {   2943,14}, {    767,13}, \
-    {   1599,12}, {   3199,13}, {   1663,14}, {    895,13}, \
-    {   1919,12}, {   3839,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
-    {   1407,13}, {   2943,15}, {    767,14}, {   1663,13}, \
-    {   3327,12}, {   6655,14}, {   1919,13}, {   3839,16}, \
-    {    511,15}, {   1023,14}, {   2175,13}, {   4351,14}, \
-    {   2303,12}, {   9215,13}, {   4863,15}, {   1279,13}, \
-    {   5119,14}, {   2815,13}, {   5887,15}, {   1535,14}, \
+    {    159,11}, {     95,10}, {    191,11}, {    111,12}, \
+    {     63,11}, {    127,10}, {    255,11}, {    143,10}, \
+    {    287, 9}, {    575,10}, {    303,11}, {    159,12}, \
+    {     95,11}, {    191,10}, {    383,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543, 9}, {   1087,11}, {    287,10}, {    575,11}, \
+    {    303,12}, {    159,11}, {    319,10}, {    639,11}, \
+    {    335,10}, {    671,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,12}, {    223,11}, {    447,13}, {   8192,14}, \
      {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
      { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
      {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 202
-#define MUL_FFT_THRESHOLD                 3712
+#define MUL_FFT_TABLE3_SIZE 106
+#define MUL_FFT_THRESHOLD                 5248
  
-#define SQR_FFT_MODF_THRESHOLD             332  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             380  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    332, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
      {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
      {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
      {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
      {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
      {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
      {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     55,11}, {     31,10}, {     79,11}, \
+    {     79,10}, {     47,11}, {     31,10}, {     79,11}, \
      {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255,11}, {     79,10}, {    159, 9}, \
-    {    319,11}, {     95,10}, {    191, 9}, {    383,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543,11}, {    143,10}, {    287, 9}, \
-    {    575,10}, {    303, 9}, {    607,10}, {    319, 9}, \
-    {    639,12}, {     95,11}, {    191,10}, {    383,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543, 9}, {   1087,11}, {    287,10}, \
-    {    575,11}, {    303,10}, {    607, 9}, {   1215,11}, \
-    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,10}, {    831,12}, {    223,11}, \
-    {    447,10}, {    895,11}, {    479,10}, {    959,12}, \
-    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
-    {   1087,12}, {    287,11}, {    575,10}, {   1151,11}, \
-    {    607,10}, {   1215,12}, {    319,11}, {    639,10}, \
-    {   1279,11}, {    671,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,10}, {   1535,11}, \
-    {    831,10}, {   1663,12}, {    447,11}, {    895,12}, \
-    {    479,11}, {    959,14}, {    127,13}, {    255,12}, \
-    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
-    {    575,11}, {   1151,12}, {    607,11}, {   1215,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
-    {   1343,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    831,11}, {   1663,13}, \
-    {    447,12}, {    959,11}, {   1919,14}, {    255,13}, \
-    {    511,12}, {   1087,13}, {    639,12}, {   1343,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
-    {   1535,13}, {    831,12}, {   1663,13}, {    959,12}, \
-    {   1919,15}, {    255,13}, {   1151,12}, {   2303,13}, \
-    {   1215,12}, {   2431,14}, {    639,13}, {   1343,12}, \
-    {   2687,13}, {   1407,12}, {   2815,13}, {   1471,11}, \
-    {   5887,13}, {   1663,14}, {    895,13}, {   1919,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,12}, {   4863,13}, {   2687,14}, {   1407,13}, \
-    {   2815,15}, {    767,14}, {   1663,13}, {   3455,14}, \
-    {   1791,13}, {   3583,14}, {   1919,13}, {   3839,16}, \
-    {    511,15}, {   1023,14}, {   2175,13}, {   4351,15}, \
-    {   1279,14}, {   2943,13}, {   5887,15}, {   1535,14}, \
-    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 206
-#define SQR_FFT_THRESHOLD                 2752
+    {    127, 9}, {    255,10}, {    135,11}, {     79,10}, \
+    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
+    {    383,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,11}, {    143,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
+    {    639,11}, {    175,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767,11}, {    207,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
+    {    159,11}, {    319,10}, {    639, 9}, {   1279,10}, \
+    {    671,11}, {    351,10}, {    703,12}, {    191,11}, \
+    {    383,10}, {    767,11}, {    415,10}, {    831,12}, \
+    {    223,11}, {    447,10}, {    895,11}, {    479,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 103
+#define SQR_FFT_THRESHOLD                 3712
  
  #define MULLO_BASECASE_THRESHOLD             5
-#define MULLO_DC_THRESHOLD                  23
-#define MULLO_MUL_N_THRESHOLD             7246
+#define MULLO_DC_THRESHOLD                  33
+#define MULLO_MUL_N_THRESHOLD            10323
+
+#define DC_DIV_QR_THRESHOLD                 57
+#define DC_DIVAPPR_Q_THRESHOLD             185
+#define DC_BDIV_QR_THRESHOLD                63
+#define DC_BDIV_Q_THRESHOLD                158
  
-#define DC_DIV_QR_THRESHOLD                 16
-#define DC_DIVAPPR_Q_THRESHOLD              64
-#define DC_BDIV_QR_THRESHOLD                62
-#define DC_BDIV_Q_THRESHOLD                156
+#define INV_MULMOD_BNM1_THRESHOLD           58
+#define INV_NEWTON_THRESHOLD               212
+#define INV_APPR_THRESHOLD                 187
  
-#define INV_MULMOD_BNM1_THRESHOLD           62
-#define INV_NEWTON_THRESHOLD                93
-#define INV_APPR_THRESHOLD                  66
+#define BINV_NEWTON_THRESHOLD              276
+#define REDC_1_TO_REDC_N_THRESHOLD          63
  
-#define BINV_NEWTON_THRESHOLD              294
-#define REDC_1_TO_REDC_N_THRESHOLD          74
+#define MU_DIV_QR_THRESHOLD               1442
+#define MU_DIVAPPR_Q_THRESHOLD            1442
+#define MUPI_DIV_QR_THRESHOLD               91
+#define MU_BDIV_QR_THRESHOLD              1142
+#define MU_BDIV_Q_THRESHOLD               1442
  
-#define MU_DIV_QR_THRESHOLD               1387
-#define MU_DIVAPPR_Q_THRESHOLD            1414
-#define MUPI_DIV_QR_THRESHOLD               31
-#define MU_BDIV_QR_THRESHOLD              1210
-#define MU_BDIV_Q_THRESHOLD               1558
+#define POWM_SEC_TABLE  3,38,270,1487
  
  #define MATRIX22_STRASSEN_THRESHOLD         14
-#define HGCD_THRESHOLD                     108
-#define GCD_DC_THRESHOLD                   333
-#define GCDEXT_DC_THRESHOLD                333
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                10
-#define GET_STR_PRECOMPUTE_THRESHOLD        22
-#define SET_STR_DC_THRESHOLD              1532
-#define SET_STR_PRECOMPUTE_THRESHOLD      3850
+#define HGCD_THRESHOLD                     138
+#define HGCD_APPR_THRESHOLD                157
+#define HGCD_REDUCE_THRESHOLD             2578
+#define GCD_DC_THRESHOLD                   573
+#define GCDEXT_DC_THRESHOLD                440
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        32
+#define SET_STR_DC_THRESHOLD              1517
+#define SET_STR_PRECOMPUTE_THRESHOLD      3007
+
+#define FAC_DSC_THRESHOLD                  680
+#define FAC_ODD_THRESHOLD                   24
diff --git a/mpn/powerpc64/mode64/rsh1add_n.asm b/mpn/powerpc64/mode64/rsh1add_n.asm

index 0cd6cf4e8c69eb21399030e6f60c09f8bd8af1ef..2a5ef30605ea9ba173654094445a0c9df1944404 100644 (file)
--- a/mpn/powerpc64/mode64/rsh1add_n.asm
+++ b/mpn/powerpc64/mode64/rsh1add_n.asm
@@ -1,6 +1,6 @@
  dnl  PowerPC-64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
  
-dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,15 +19,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     2         (1.5 c/l should be possible)
-C POWER4/PPC970:     4         (2.0 c/l should be possible)
-
-C INPUT PARAMETERS
-C rp   r3
-C up   r4
-C vp   r5
-C n    r6
+C                  cycles/limb
+C POWER3/PPC630          2              (1.5 c/l should be possible)
+C POWER4/PPC970          4              (2.0 c/l should be possible)
+C POWER5                 3.5            (2.0 c/l should be possible)
+C POWER6                 4.5
+C POWER7                 3.5
  
  define(`rp',`r3')
  define(`up',`r4')
@@ -60,14 +57,15 @@ PROLOGUE(mpn_rsh1add_n)
  
         bdz     L(end)
  
-L(oop):        ldu     u1, 16(up)
+       ALIGN(32)
+L(top):        ldu     u1, 16(up)
         ldu     v1, 16(vp)
         adde    x, v0, u0
         srdi    s0, x, 1
         rldimi  s1, x, 63, 0
         std     s1, 8(rp)
  
-       bdz     L(exit)
+       bdz     L(exi)
  
         ld      u0, 8(up)
         ld      v0, 8(vp)
@@ -76,7 +74,7 @@ L(oop):       ldu     u1, 16(up)
         rldimi  s0, x, 63, 0
         stdu    s0, 16(rp)
  
-       bdnz    L(oop)
+       bdnz    L(top)
  
  L(end):        adde    x, v0, u0
         srdi    s0, x, 1
@@ -90,7 +88,7 @@ L(end):       adde    x, v0, u0
         mr      r3, r12
         blr
  
-L(exit):       adde    x, v1, u1
+L(exi):        adde    x, v1, u1
         srdi    s1, x, 1
         rldimi  s0, x, 63, 0
         stdu    s0, 16(rp)
diff --git a/mpn/powerpc64/mode64/rsh1sub_n.asm b/mpn/powerpc64/mode64/rsh1sub_n.asm

index e4c78ff2b56437d463dd96ef2b8fc7059a5fb04b..b10eb8ab7056c36d2cf104e38f164037739760ee 100644 (file)
--- a/mpn/powerpc64/mode64/rsh1sub_n.asm
+++ b/mpn/powerpc64/mode64/rsh1sub_n.asm
@@ -1,6 +1,6 @@
  dnl  PowerPC-64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
  
-dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,15 +19,12 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     2         (1.5 c/l should be possible)
-C POWER4/PPC970:     4         (2.0 c/l should be possible)
-
-C INPUT PARAMETERS
-C rp   r3
-C up   r4
-C vp   r5
-C n    r6
+C                  cycles/limb
+C POWER3/PPC630          2              (1.5 c/l should be possible)
+C POWER4/PPC970          4              (2.0 c/l should be possible)
+C POWER5                 3.5            (2.0 c/l should be possible)
+C POWER6                 4.5
+C POWER7                 3.5
  
  define(`rp',`r3')
  define(`up',`r4')
@@ -60,14 +57,15 @@ PROLOGUE(mpn_rsh1sub_n)
  
         bdz     L(end)
  
-L(oop):        ldu     u1, 16(up)
+       ALIGN(32)
+L(top):        ldu     u1, 16(up)
         ldu     v1, 16(vp)
         subfe   x, v0, u0
         srdi    s0, x, 1
         rldimi  s1, x, 63, 0
         std     s1, 8(rp)
  
-       bdz     L(exit)
+       bdz     L(exi)
  
         ld      u0, 8(up)
         ld      v0, 8(vp)
@@ -76,7 +74,7 @@ L(oop):       ldu     u1, 16(up)
         rldimi  s0, x, 63, 0
         stdu    s0, 16(rp)
  
-       bdnz    L(oop)
+       bdnz    L(top)
  
  L(end):        subfe   x, v0, u0
         srdi    s0, x, 1
@@ -89,7 +87,7 @@ L(end):       subfe   x, v0, u0
         mr      r3, r12
         blr
  
-L(exit):       subfe   x, v1, u1
+L(exi):        subfe   x, v1, u1
         srdi    s1, x, 1
         rldimi  s0, x, 63, 0
         stdu    s0, 16(rp)
diff --git a/mpn/powerpc64/mode64/sqr_basecase.asm b/mpn/powerpc64/mode64/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..d32ef7e
--- /dev/null
+++ b/mpn/powerpc64/mode64/sqr_basecase.asm
@@ -0,0 +1,852 @@
+dnl  PowerPC-64 mpn_sqr_basecase.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008, 2010, 2011 Free
+dnl  Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C POWER3/PPC630         6-18
+C POWER4/PPC970          8
+C POWER5                 8
+C POWER6                16.25
+C POWER7                 3.77
+
+C NOTES
+C  * This is very crude, cleanup!
+C  * Try to reduce the number of needed live registers.
+C  * Rewrite for POWER6 to use 8 consecutive muls, not 2 groups of 4.  The
+C    cost will be more live registers.
+C  * Rewrite for POWER7 to use addmul_2 building blocks; this will reduce code
+C    size a lot and speed things up perhaps 25%.
+C  * Use computed goto in order to compress the code.
+C  * Implement a larger final corner.
+C  * Schedule callee-saves register saves into other insns.  This could save
+C    about 5 cycles/call.  (We cannot analogously optimise the restores, since
+C    the sqr_diag_addlsh1 loop has no wind-down code as currently written.)
+C  * Should the alternating std/adde sequences be split?  Some pipelines handle
+C    adde poorly, and might sequentialise all these instructions.
+C  * The sqr_diag_addlsh1 loop was written for POWER6 and its preferences for
+C    adjacent integer multiply insns.  Except for the multiply insns, the code
+C    was not carefully optimised for POWER6 or any other CPU.
+C  * Perform cross-jumping in sqr_diag_addlsh1's feed-in code, into the loop.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n',  `r5')
+
+define(`rp_outer', `r25')
+define(`up_outer', `r21')
+define(`rp_saved', `r22')
+define(`up_saved', `r23')
+define(`n_saved',  `r24')
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+       cmpdi   cr0, n, 2
+       bge     cr0, L(ge2)
+       ld      r5, 0(up)       C n = 1
+       nop
+       mulld   r8, r5, r5      C weight 0
+       mulhdu  r9, r5, r5      C weight 1
+       std     r8, 0(rp)
+       std     r9, 8(rp)
+       blr
+       ALIGN(16)
+L(ge2):        bgt     cr0, L(gt2)
+       ld      r0, 0(up)       C n = 2
+       nop
+       mulld   r8, r0, r0      C u0 * u0
+       mulhdu  r9, r0, r0      C u0 * u0
+       ld      r6, 8(up)
+       mulld   r10, r6, r6     C u1 * u1
+       mulhdu  r11, r6, r6     C u1 * u1
+       mulld   r4, r6, r0      C u1 * u0
+       mulhdu  r5, r6, r0      C u1 * u0
+       addc    r4, r4, r4
+       adde    r5, r5, r5
+       addze   r11, r11
+       addc    r9, r9, r4
+       adde    r10, r10, r5
+       addze   r11, r11
+       std     r8, 0(rp)
+       std     r9, 8(rp)
+       std     r10, 16(rp)
+       std     r11, 24(rp)
+       blr
+
+       ALIGN(16)
+L(gt2):        std     r31,  -8(r1)
+       std     r30, -16(r1)
+       std     r29, -24(r1)
+       std     r28, -32(r1)
+       std     r27, -40(r1)
+       std     r26, -48(r1)
+       std     r25, -56(r1)
+       std     r24, -64(r1)
+       std     r23, -72(r1)
+       std     r22, -80(r1)
+       std     r21, -88(r1)
+
+       mr      rp_saved, rp
+       mr      up_saved, up
+       mr      n_saved, n
+       mr      rp_outer, rp
+       mr      up_outer, up
+
+       rldicl. r0, n, 0,62     C r0 = n & 3, set cr0
+       cmpdi   cr6, r0, 2
+       addic   r7, n, 2        C compute count...
+       srdi    r7, r7, 2       C ...for ctr
+       mtctr   r7              C copy count into ctr
+       beq-    cr0, L(b0)
+       blt-    cr6, L(b1)
+       beq-    cr6, L(b2)
+
+L(b3): ld      r6, 0(up)
+       ld      r9, 8(up)
+       ld      r27, 16(up)
+       addi    up, up, 24
+       li      r12, 0          C carry limb
+       bdz     L(em3)
+
+       ALIGN(16)
+L(tm3):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r9, 0(up)
+       ld      r27, 8(up)
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       mulld   r26, r9, r6
+       mulhdu  r10, r9, r6
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6
+       ld      r9, 16(up)
+       ld      r27, 24(up)
+       std     r0, 8(rp)
+       adde    r26, r26, r8
+       std     r7, 16(rp)
+       adde    r11, r11, r10
+       std     r26, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       bdnz    L(tm3)
+
+L(em3):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       std     r0, 8(rp)
+       std     r7, 16(rp)
+       addze   r8, r8
+       std     r8, 24(rp)
+       addi    n, n, 2
+       b       L(outer_loop)
+
+L(b0): ld      r6, 0(up)
+       ld      r27, 8(up)
+       mulld   r7, r27, r6
+       mulhdu  r12, r27, r6
+       std     r7, 8(rp)
+       addi    rp, rp, 8
+       ld      r9, 16(up)
+       ld      r27, 24(up)
+       addi    up, up, 32
+       bdz     L(em0)
+
+       ALIGN(16)
+L(tm0):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r9, 0(up)
+       ld      r27, 8(up)
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       mulld   r26, r9, r6
+       mulhdu  r10, r9, r6
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6
+       ld      r9, 16(up)
+       ld      r27, 24(up)
+       std     r0, 8(rp)
+       adde    r26, r26, r8
+       std     r7, 16(rp)
+       adde    r11, r11, r10
+       std     r26, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       bdnz    L(tm0)
+
+L(em0):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       std     r0, 8(rp)
+       std     r7, 16(rp)
+       addze   r8, r8
+       std     r8, 24(rp)
+       addi    n, n, 2
+       b       L(outer_loop_ent_2)
+
+L(b1): ld      r6, 0(up)
+       ld      r9, 8(up)
+       ld      r27, 16(up)
+       mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r12, r27, r6
+       addc    r7, r7, r26
+       std     r0, 8(rp)
+       std     r7, 16(rp)
+       addi    rp, rp, 16
+       ld      r9, 24(up)
+       ld      r27, 32(up)
+       addi    up, up, 40
+       bdz     L(em1)
+
+       ALIGN(16)
+L(tm1):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r9, 0(up)
+       ld      r27, 8(up)
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       mulld   r26, r9, r6
+       mulhdu  r10, r9, r6
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6
+       ld      r9, 16(up)
+       ld      r27, 24(up)
+       std     r0, 8(rp)
+       adde    r26, r26, r8
+       std     r7, 16(rp)
+       adde    r11, r11, r10
+       std     r26, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       bdnz    L(tm1)
+
+L(em1):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       std     r0, 8(rp)
+       std     r7, 16(rp)
+       addze   r8, r8
+       std     r8, 24(rp)
+       addi    n, n, 2
+       b       L(outer_loop_ent_3)
+
+L(b2): addi    r7, r7, -1      C FIXME
+       mtctr   r7              C FIXME
+       ld      r6, 0(up)
+       ld      r9, 8(up)
+       ld      r27, 16(up)
+       mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r9, 24(up)
+       mulld   r11, r9, r6
+       mulhdu  r10, r9, r6
+       addc    r7, r7, r26
+       adde    r11, r11, r8
+       addze   r12, r10
+       std     r0, 8(rp)
+       std     r7, 16(rp)
+       std     r11, 24(rp)
+       addi    rp, rp, 24
+       ld      r9, 32(up)
+       ld      r27, 40(up)
+       addi    up, up, 48
+       bdz     L(em2)
+
+       ALIGN(16)
+L(tm2):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r9, 0(up)
+       ld      r27, 8(up)
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       mulld   r26, r9, r6
+       mulhdu  r10, r9, r6
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6
+       ld      r9, 16(up)
+       ld      r27, 24(up)
+       std     r0, 8(rp)
+       adde    r26, r26, r8
+       std     r7, 16(rp)
+       adde    r11, r11, r10
+       std     r26, 24(rp)
+       addi    up, up, 32
+       std     r11, 32(rp)
+       addi    rp, rp, 32
+       bdnz    L(tm2)
+
+L(em2):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       std     r0, 8(rp)
+       std     r7, 16(rp)
+       addze   r8, r8
+       std     r8, 24(rp)
+       addi    n, n, 2
+       b       L(outer_loop_ent_0)
+
+
+L(outer_loop):
+       addi    n, n, -1
+       addi    up_outer, up_outer, 8
+       addi    rp_outer, rp_outer, 16
+
+       mr      up, up_outer
+       addi    rp, rp_outer, 8
+
+       srdi    r0, n, 2
+       mtctr   r0
+
+       bdz     L(outer_end)
+
+       ld      r6, 0(up)
+       ld      r9, 8(up)
+       ld      r27, 16(up)
+       mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r9, 24(up)
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       ld      r30, 16(rp)
+       mulld   r11, r9, r6
+       mulhdu  r10, r9, r6
+       addc    r7, r7, r26
+       adde    r11, r11, r8
+       addze   r12, r10
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       adde    r7, r7, r29
+       std     r7, 8(rp)
+       adde    r11, r11, r30
+       std     r11, 16(rp)
+       addi    rp, rp, 24
+       ld      r9, 32(up)
+       ld      r27, 40(up)
+       addi    up, up, 48
+       bdz     L(ea1)
+
+       ALIGN(16)
+L(ta1):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6     C 9
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6     C 27
+       ld      r9, 0(up)
+       ld      r28, 0(rp)
+       ld      r27, 8(up)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12     C 0 12
+       adde    r7, r7, r26     C 5 7
+       mulld   r26, r9, r6
+       mulhdu  r10, r9, r6     C 9
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6    C 27
+       ld      r9, 16(up)
+       ld      r30, 16(rp)
+       ld      r27, 24(up)
+       ld      r31, 24(rp)
+       adde    r26, r26, r8    C 8 5
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       addc    r0, r0, r28     C 0 28
+       std     r0, 0(rp)       C 0
+       adde    r7, r7, r29     C 7 29
+       std     r7, 8(rp)       C 7
+       adde    r26, r26, r30   C 5 30
+       std     r26, 16(rp)     C 5
+       adde    r11, r11, r31   C 11 31
+       std     r11, 24(rp)     C 11
+       addi    up, up, 32
+       addi    rp, rp, 32
+       bdnz    L(ta1)
+
+L(ea1):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       addze   r8, r8
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       adde    r7, r7, r29
+       std     r7, 8(rp)
+       addze   r8, r8
+       std     r8, 16(rp)
+
+L(outer_loop_ent_0):
+       addi    n, n, -1
+       addi    up_outer, up_outer, 8
+       addi    rp_outer, rp_outer, 16
+
+       mr      up, up_outer
+       addi    rp, rp_outer, 8
+
+       srdi    r0, n, 2
+       mtctr   r0
+
+       ld      r6, 0(up)
+       ld      r9, 8(up)
+       ld      r27, 16(up)
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       addc    r0, r0, r28
+       adde    r7, r7, r26
+       addze   r12, r8
+       std     r0, 0(rp)
+       adde    r7, r7, r29
+       std     r7, 8(rp)
+       addi    rp, rp, 16
+       ld      r9, 24(up)
+       ld      r27, 32(up)
+       addi    up, up, 40
+       bdz     L(ea0)
+
+       ALIGN(16)
+L(ta0):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6     C 9
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6     C 27
+       ld      r9, 0(up)
+       ld      r28, 0(rp)
+       ld      r27, 8(up)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12     C 0 12
+       adde    r7, r7, r26     C 5 7
+       mulld   r26, r9, r6
+       mulhdu  r10, r9, r6     C 9
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6    C 27
+       ld      r9, 16(up)
+       ld      r30, 16(rp)
+       ld      r27, 24(up)
+       ld      r31, 24(rp)
+       adde    r26, r26, r8    C 8 5
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       addc    r0, r0, r28     C 0 28
+       std     r0, 0(rp)       C 0
+       adde    r7, r7, r29     C 7 29
+       std     r7, 8(rp)       C 7
+       adde    r26, r26, r30   C 5 30
+       std     r26, 16(rp)     C 5
+       adde    r11, r11, r31   C 11 31
+       std     r11, 24(rp)     C 11
+       addi    up, up, 32
+       addi    rp, rp, 32
+       bdnz    L(ta0)
+
+L(ea0):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       addze   r8, r8
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       adde    r7, r7, r29
+       std     r7, 8(rp)
+       addze   r8, r8
+       std     r8, 16(rp)
+
+L(outer_loop_ent_3):
+       addi    n, n, -1
+       addi    up_outer, up_outer, 8
+       addi    rp_outer, rp_outer, 16
+
+       mr      up, up_outer
+       addi    rp, rp_outer, 8
+
+       srdi    r0, n, 2
+       mtctr   r0
+
+       ld      r6, 0(up)
+       ld      r9, 8(up)
+       ld      r28, 0(rp)
+       mulld   r0, r9, r6
+       mulhdu  r12, r9, r6
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       addi    rp, rp, 8
+       ld      r9, 16(up)
+       ld      r27, 24(up)
+       addi    up, up, 32
+       bdz     L(ea3)
+
+       ALIGN(16)
+L(ta3):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6     C 9
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6     C 27
+       ld      r9, 0(up)
+       ld      r28, 0(rp)
+       ld      r27, 8(up)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12     C 0 12
+       adde    r7, r7, r26     C 5 7
+       mulld   r26, r9, r6
+       mulhdu  r10, r9, r6     C 9
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6    C 27
+       ld      r9, 16(up)
+       ld      r30, 16(rp)
+       ld      r27, 24(up)
+       ld      r31, 24(rp)
+       adde    r26, r26, r8    C 8 5
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       addc    r0, r0, r28     C 0 28
+       std     r0, 0(rp)       C 0
+       adde    r7, r7, r29     C 7 29
+       std     r7, 8(rp)       C 7
+       adde    r26, r26, r30   C 5 30
+       std     r26, 16(rp)     C 5
+       adde    r11, r11, r31   C 11 31
+       std     r11, 24(rp)     C 11
+       addi    up, up, 32
+       addi    rp, rp, 32
+       bdnz    L(ta3)
+
+L(ea3):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       addze   r8, r8
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       adde    r7, r7, r29
+       std     r7, 8(rp)
+       addze   r8, r8
+       std     r8, 16(rp)
+
+
+L(outer_loop_ent_2):
+       addi    n, n, -1
+       addi    up_outer, up_outer, 8
+       addi    rp_outer, rp_outer, 16
+
+       mr      up, up_outer
+       addi    rp, rp_outer, 8
+
+       srdi    r0, n, 2
+       mtctr   r0
+
+       addic   r0, r0, 0
+       li      r12, 0          C cy_limb = 0
+       ld      r6, 0(up)
+       ld      r9, 8(up)
+       ld      r27, 16(up)
+       bdz     L(ea2)
+       addi    up, up, 24
+
+       ALIGN(16)
+L(ta2):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6     C 9
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6     C 27
+       ld      r9, 0(up)
+       ld      r28, 0(rp)
+       ld      r27, 8(up)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12     C 0 12
+       adde    r7, r7, r26     C 5 7
+       mulld   r26, r9, r6
+       mulhdu  r10, r9, r6     C 9
+       mulld   r11, r27, r6
+       mulhdu  r12, r27, r6    C 27
+       ld      r9, 16(up)
+       ld      r30, 16(rp)
+       ld      r27, 24(up)
+       ld      r31, 24(rp)
+       adde    r26, r26, r8    C 8 5
+       adde    r11, r11, r10   C 10 11
+       addze   r12, r12        C 12
+       addc    r0, r0, r28     C 0 28
+       std     r0, 0(rp)       C 0
+       adde    r7, r7, r29     C 7 29
+       std     r7, 8(rp)       C 7
+       adde    r26, r26, r30   C 5 30
+       std     r26, 16(rp)     C 5
+       adde    r11, r11, r31   C 11 31
+       std     r11, 24(rp)     C 11
+       addi    up, up, 32
+       addi    rp, rp, 32
+       bdnz    L(ta2)
+
+L(ea2):        mulld   r0, r9, r6
+       mulhdu  r26, r9, r6
+       mulld   r7, r27, r6
+       mulhdu  r8, r27, r6
+       ld      r28, 0(rp)
+       ld      r29, 8(rp)
+       adde    r0, r0, r12
+       adde    r7, r7, r26
+       addze   r8, r8
+       addc    r0, r0, r28
+       std     r0, 0(rp)
+       adde    r7, r7, r29
+       std     r7, 8(rp)
+       addze   r8, r8
+       std     r8, 16(rp)
+
+       b       L(outer_loop)
+
+L(outer_end):
+       ld      r6, 0(up)
+       ld      r9, 8(up)
+       ld      r11, 0(rp)
+       mulld   r0, r9, r6
+       mulhdu  r8, r9, r6
+       addc    r0, r0, r11
+       std     r0, 0(rp)
+       addze   r8, r8
+       std     r8, 8(rp)
+
+define(`rp',  `rp_saved')
+define(`up',  `r5')
+define(`n',   `r6')
+define(`climb',        `r0')
+
+       addi    r4, rp_saved, 8
+       mr      r5, up_saved
+       mr      r6, n_saved
+
+       rldicl. r0, n, 0,62             C r0 = n & 3, set cr0
+       cmpdi   cr6, r0, 2
+       addi    n, n, 2                 C compute count...
+       srdi    n, n, 2                 C ...for ctr
+       mtctr   n                       C put loop count into ctr
+       beq     cr0, L(xb0)
+       blt     cr6, L(xb1)
+       beq     cr6, L(xb2)
+
+L(xb3):        ld      r6,   0(up)
+       ld      r7,   8(up)
+       ld      r12, 16(up)
+       addi    up, up, 24
+       mulld   r24, r6, r6
+       mulhdu  r25, r6, r6
+       mulld   r26, r7, r7
+       mulhdu  r27, r7, r7
+       mulld   r28, r12, r12
+       mulhdu  r29, r12, r12
+       ld      r10,  8(rp)
+       ld      r11, 16(rp)
+       ld      r6,  24(rp)
+       ld      r7,  32(rp)
+       addc    r10, r10, r10
+       adde    r11, r11, r11
+       adde    r6, r6, r6
+       adde    r7, r7, r7
+       addze   climb, r29
+       addc    r10, r10, r25
+       adde    r11, r11, r26
+       adde    r6, r6, r27
+       adde    r7, r7, r28
+       std     r24,  0(rp)
+       std     r10,  8(rp)
+       std     r11, 16(rp)
+       std     r6,  24(rp)
+       std     r7,  32(rp)
+       addi    rp, rp, 40
+       bdnz    L(top)
+       b       L(end)
+
+L(xb2):        ld      r6,  0(up)
+       ld      r7,  8(up)
+       addi    up, up, 16
+       mulld   r24, r6, r6
+       mulhdu  r25, r6, r6
+       mulld   r26, r7, r7
+       mulhdu  r27, r7, r7
+       ld      r10,  8(rp)
+       ld      r11, 16(rp)
+       addc    r10, r10, r10
+       adde    r11, r11, r11
+       addze   climb, r27
+       addc    r10, r10, r25
+       adde    r11, r11, r26
+       std     r24,  0(rp)
+       std     r10,  8(rp)
+       std     r11, 16(rp)
+       addi    rp, rp, 24
+       bdnz    L(top)
+       b       L(end)
+
+L(xb0):        ld      r6,   0(up)
+       ld      r7,   8(up)
+       ld      r12, 16(up)
+       ld      r23, 24(up)
+       addi    up, up, 32
+       mulld   r24, r6, r6
+       mulhdu  r25, r6, r6
+       mulld   r26, r7, r7
+       mulhdu  r27, r7, r7
+       mulld   r28, r12, r12
+       mulhdu  r29, r12, r12
+       mulld   r30, r23, r23
+       mulhdu  r31, r23, r23
+       ld      r10,  8(rp)
+       ld      r11, 16(rp)
+       ld      r6,  24(rp)
+       ld      r7,  32(rp)
+       ld      r12, 40(rp)
+       ld      r23, 48(rp)
+       addc    r10, r10, r10
+       adde    r11, r11, r11
+       adde    r6, r6, r6
+       adde    r7, r7, r7
+       adde    r12, r12, r12
+       adde    r23, r23, r23
+       addze   climb, r31
+       std     r24,  0(rp)
+       addc    r10, r10, r25
+       std     r10,  8(rp)
+       adde    r11, r11, r26
+       std     r11, 16(rp)
+       adde    r6, r6, r27
+       std     r6,  24(rp)
+       adde    r7, r7, r28
+       std     r7,  32(rp)
+       adde    r12, r12, r29
+       std     r12, 40(rp)
+       adde    r23, r23, r30
+       std     r23, 48(rp)
+       addi    rp, rp, 56
+       bdnz    L(top)
+       b       L(end)
+
+L(xb1):        ld      r6,  0(up)
+       addi    up, up, 8
+       mulld   r24, r6, r6
+       mulhdu  climb, r6, r6
+       std     r24, 0(rp)
+       addic   rp, rp, 8               C clear carry as side-effect
+
+       ALIGN(32)
+L(top):        ld      r6,   0(up)
+       ld      r7,   8(up)
+       ld      r12, 16(up)
+       ld      r23, 24(up)
+       addi    up, up, 32
+       mulld   r24, r6, r6
+       mulhdu  r25, r6, r6
+       mulld   r26, r7, r7
+       mulhdu  r27, r7, r7
+       mulld   r28, r12, r12
+       mulhdu  r29, r12, r12
+       mulld   r30, r23, r23
+       mulhdu  r31, r23, r23
+       ld      r8,   0(rp)
+       ld      r9,   8(rp)
+       adde    r8, r8, r8
+       adde    r9, r9, r9
+       ld      r10, 16(rp)
+       ld      r11, 24(rp)
+       adde    r10, r10, r10
+       adde    r11, r11, r11
+       ld      r6,  32(rp)
+       ld      r7,  40(rp)
+       adde    r6, r6, r6
+       adde    r7, r7, r7
+       ld      r12, 48(rp)
+       ld      r23, 56(rp)
+       adde    r12, r12, r12
+       adde    r23, r23, r23
+       addze   r31, r31
+       addc    r8, r8, climb
+       std     r8,   0(rp)
+       adde    r9, r9, r24
+       std     r9,   8(rp)
+       adde    r10, r10, r25
+       std     r10, 16(rp)
+       adde    r11, r11, r26
+       std     r11, 24(rp)
+       adde    r6, r6, r27
+       std     r6,  32(rp)
+       adde    r7, r7, r28
+       std     r7,  40(rp)
+       adde    r12, r12, r29
+       std     r12, 48(rp)
+       adde    r23, r23, r30
+       std     r23, 56(rp)
+       mr      climb, r31
+       addi    rp, rp, 64
+       bdnz    L(top)
+
+L(end):        addze   climb, climb
+       std     climb,  0(rp)
+
+       ld      r31,  -8(r1)
+       ld      r30, -16(r1)
+       ld      r29, -24(r1)
+       ld      r28, -32(r1)
+       ld      r27, -40(r1)
+       ld      r26, -48(r1)
+       ld      r25, -56(r1)
+       ld      r24, -64(r1)
+       ld      r23, -72(r1)
+       ld      r22, -80(r1)
+       ld      r21, -88(r1)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/mode64/sublsh1_n.asm b/mpn/powerpc64/mode64/sublsh1_n.asm

deleted file mode 100644 (file)

index 69e0dfa..0000000
--- a/mpn/powerpc64/mode64/sublsh1_n.asm
+++ /dev/null
@@ -1,83 +0,0 @@
-dnl  PowerPC-64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
-
-dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C              cycles/limb
-C POWER3/PPC630:     2         (1.5 c/l should be possible)
-C POWER4/PPC970:     4         (2.0 c/l should be possible)
-
-C INPUT PARAMETERS
-C rp   r3
-C up   r4
-C vp   r5
-C n    r6
-
-define(`rp',`r3')
-define(`up',`r4')
-define(`vp',`r5')
-
-define(`s0',`r6')
-define(`s1',`r7')
-define(`u0',`r8')
-define(`v0',`r10')
-define(`v1',`r11')
-
-ASM_START()
-PROLOGUE(mpn_sublsh1_n)
-       mtctr   r6              C put n in ctr
-
-       ld      v0, 0(vp)       C load v limb
-       ld      u0, 0(up)       C load u limb
-       addic   up, up, -8      C update up; set cy
-       addi    rp, rp, -8      C update rp
-       sldi    s1, v0, 1
-       bdz     L(end)          C If done, skip loop
-
-L(oop):        ld      v1, 8(vp)       C load v limb
-       subfe   s1, s1, u0      C add limbs with cy, set cy
-       std     s1, 8(rp)       C store result limb
-       srdi    s0, v0, 63      C shift down previous v limb
-       ldu     u0, 16(up)      C load u limb and update up
-       rldimi  s0, v1, 1, 0    C left shift v limb and merge with prev v limb
-
-       bdz     L(exit)         C decrement ctr and exit if done
-
-       ldu     v0, 16(vp)      C load v limb and update vp
-       subfe   s0, s0, u0      C add limbs with cy, set cy
-       stdu    s0, 16(rp)      C store result limb and update rp
-       srdi    s1, v1, 63      C shift down previous v limb
-       ld      u0, 8(up)       C load u limb
-       rldimi  s1, v0, 1, 0    C left shift v limb and merge with prev v limb
-
-       bdnz    L(oop)          C decrement ctr and loop back
-
-L(end):        subfe   r7, s1, u0
-       std     r7, 8(rp)       C store last result limb
-       srdi    r3, v0, 63
-       subfze  r3, r3
-       neg     r3, r3
-       blr
-L(exit):       subfe   r7, s0, u0
-       std     r7, 16(rp)      C store last result limb
-       srdi    r3, v1, 63
-       subfze  r3, r3
-       neg     r3, r3
-       blr
-EPILOGUE()
diff --git a/mpn/powerpc64/mode64/submul_1.asm b/mpn/powerpc64/mode64/submul_1.asm

deleted file mode 100644 (file)

index 3c1e8a5..0000000
--- a/mpn/powerpc64/mode64/submul_1.asm
+++ /dev/null
@@ -1,62 +0,0 @@
-dnl  PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
-dnl  the result from a second limb vector.
-
-dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
-dnl  Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C              cycles/limb
-C POWER3/PPC630:    6-18
-C POWER4/PPC970:    10
-C POWER5:           10.5
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`n', `r5')
-define(`vl', `r6')
-define(`cy', `r7')
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
-       li      cy, 0                   C cy_limb = 0
-
-PROLOGUE(mpn_submul_1c)
-       mtctr   n
-       addic   r0, r0, 0
-       addi    rp, rp, -8
-       ALIGN(16)
-L(top):
-       ld      r0, 0(up)
-       ld      r10, 8(rp)
-       mulld   r9, r0, vl
-       mulhdu  r5, r0, vl
-       adde    r9, r9, cy
-       addi    up, up, 8
-       addze   cy, r5
-       subf    r12, r9, r10
-       not     r0, r10
-       addc    r11, r9, r0             C inverted carry from subf
-       stdu    r12, 8(rp)
-       bdnz    L(top)
-
-       addze   r3, cy
-       blr
-EPILOGUE(mpn_submul_1)
-EPILOGUE(mpn_submul_1c)
diff --git a/mpn/powerpc64/p6/lshift.asm b/mpn/powerpc64/p6/lshift.asm

new file mode 100644 (file)

index 0000000..15283d0
--- /dev/null
+++ b/mpn/powerpc64/p6/lshift.asm
@@ -0,0 +1,121 @@
+dnl  PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
+
+dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C POWER3/PPC630                 ?
+C POWER4/PPC970                 ?
+C POWER5                2.25
+C POWER6                4
+
+C TODO
+C  * Micro-optimise header code
+C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4236
+C    bytes, 4-way code would become about 50% larger.
+
+C INPUT PARAMETERS
+define(`rp_param',  `r3')
+define(`up',  `r4')
+define(`n',   `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`retval',`r3')
+define(`rp',  `r7')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+
+ifdef(`HAVE_ABI_mode32',`
+       rldicl  n, n, 0,32              C FIXME: avoid this zero extend
+')
+       mflr    r12
+       sldi    r8, n, 3
+       sldi    r10, cnt, 6             C multiply cnt by size of a SHIFT block
+       LEAL(   r11, L(e1))             C address of L(e1) label in SHIFT(1)
+       add     up, up, r8              C make up point at end of up[]
+       add     r11, r11, r10           C address of L(oN) for N = cnt
+       srdi    r10, n, 1
+       add     rp, rp_param, r8        C make rp point at end of rp[]
+       subfic  tnc, cnt, 64
+       rlwinm. r8, n, 0,31,31          C extract bit 0
+       mtctr   r10
+       beq     L(evn)
+
+L(odd):        ld      r9, -8(up)
+       cmpdi   cr0, n, 1               C n = 1?
+       beq     L(1)
+       ld      r8, -16(up)
+       addi    r11, r11, -84           C L(o1) - L(e1) - 64
+       mtlr    r11
+       srd     r3, r9, tnc             C retval
+       addi    up, up, 8
+       addi    rp, rp, -8
+       blr                             C branch to L(oN)
+
+L(evn):        ld      r8, -8(up)
+       ld      r9, -16(up)
+       addi    r11, r11, -64
+       mtlr    r11
+       srd     r3, r8, tnc             C retval
+       blr                             C branch to L(eN)
+
+L(1):  srd     r3, r9, tnc             C retval
+       sld     r8, r9, cnt
+       std     r8, -8(rp)
+       mtlr    r12
+ifdef(`HAVE_ABI_mode32',
+`      mr      r4, r3
+       srdi    r3, r3, 32
+')
+       blr
+
+
+define(SHIFT,`
+L(lo$1):ld     r8, -24(up)
+       std     r11, -8(rp)
+       addi    rp, rp, -16
+L(o$1):        srdi    r10, r8, eval(64-$1)
+       rldimi  r10, r9, $1, 0
+       ld      r9, -32(up)
+       addi    up, up, -16
+       std     r10, 0(rp)
+L(e$1):        srdi    r11, r9, eval(64-$1)
+       rldimi  r11, r8, $1, 0
+       bdnz    L(lo$1)
+       std     r11, -8(rp)
+       sldi    r10, r9, $1
+       b       L(com)
+       nop
+       nop
+')
+
+       ALIGN(64)
+forloop(`i',1,63,`SHIFT(i)')
+
+L(com):        std     r10, -16(rp)
+       mtlr    r12
+ifdef(`HAVE_ABI_mode32',
+`      mr      r4, r3
+       srdi    r3, r3, 32
+')
+       blr
+EPILOGUE()
+ASM_END()
diff --git a/mpn/powerpc64/p6/lshiftc.asm b/mpn/powerpc64/p6/lshiftc.asm

new file mode 100644 (file)

index 0000000..146579a
--- /dev/null
+++ b/mpn/powerpc64/p6/lshiftc.asm
@@ -0,0 +1,125 @@
+dnl  PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
+
+dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C POWER3/PPC630                 ?
+C POWER4/PPC970                 ?
+C POWER5                2.25
+C POWER6                4
+
+C TODO
+C  * Micro-optimise header code
+C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4236
+C    bytes, 4-way code would become about 50% larger.
+
+C INPUT PARAMETERS
+define(`rp_param',  `r3')
+define(`up',  `r4')
+define(`n',   `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`retval',`r3')
+define(`rp',  `r7')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+
+ifdef(`HAVE_ABI_mode32',`
+       rldicl  n, n, 0,32              C FIXME: avoid this zero extend
+')
+       mflr    r12
+       sldi    r8, n, 3
+       sldi    r10, cnt, 6             C multiply cnt by size of a SHIFT block
+       LEAL(   r11, L(e1))             C address of L(e1) label in SHIFT(1)
+       add     up, up, r8              C make up point at end of up[]
+       add     r11, r11, r10           C address of L(oN) for N = cnt
+       srdi    r10, n, 1
+       add     rp, rp_param, r8        C make rp point at end of rp[]
+       subfic  tnc, cnt, 64
+       rlwinm. r8, n, 0,31,31          C extract bit 0
+       mtctr   r10
+       beq     L(evn)
+
+L(odd):        ld      r9, -8(up)
+       cmpdi   cr0, n, 1               C n = 1?
+       beq     L(1)
+       ld      r8, -16(up)
+       addi    r11, r11, -88           C L(o1) - L(e1) - 64
+       mtlr    r11
+       srd     r3, r9, tnc             C retval
+       addi    up, up, 8
+       addi    rp, rp, -8
+       blr                             C branch to L(oN)
+
+L(evn):        ld      r8, -8(up)
+       ld      r9, -16(up)
+       addi    r11, r11, -64
+       mtlr    r11
+       srd     r3, r8, tnc             C retval
+       blr                             C branch to L(eN)
+
+L(1):  srd     r3, r9, tnc             C retval
+       sld     r8, r9, cnt
+       nor     r8, r8, r8
+       std     r8, -8(rp)
+       mtlr    r12
+ifdef(`HAVE_ABI_mode32',
+`      mr      r4, r3
+       srdi    r3, r3, 32
+')
+       blr
+
+
+define(SHIFT,`
+L(lo$1):ld     r8, -24(up)
+       nor     r11, r11, r11
+       std     r11, -8(rp)
+       addi    rp, rp, -16
+L(o$1):        srdi    r10, r8, eval(64-$1)
+       rldimi  r10, r9, $1, 0
+       ld      r9, -32(up)
+       addi    up, up, -16
+       nor     r10, r10, r10
+       std     r10, 0(rp)
+L(e$1):        srdi    r11, r9, eval(64-$1)
+       rldimi  r11, r8, $1, 0
+       bdnz    L(lo$1)
+       sldi    r10, r9, $1
+       b       L(com)
+       nop
+')
+
+       ALIGN(64)
+forloop(`i',1,63,`SHIFT(i)')
+
+L(com):        nor     r11, r11, r11
+       nor     r10, r10, r10
+       std     r11, -8(rp)
+       std     r10, -16(rp)
+       mtlr    r12
+ifdef(`HAVE_ABI_mode32',
+`      mr      r4, r3
+       srdi    r3, r3, 32
+')
+       blr
+EPILOGUE()
+ASM_END()
diff --git a/mpn/powerpc64/p6/rshift.asm b/mpn/powerpc64/p6/rshift.asm

new file mode 100644 (file)

index 0000000..6a1e0c2
--- /dev/null
+++ b/mpn/powerpc64/p6/rshift.asm
@@ -0,0 +1,120 @@
+dnl  PowerPC-64 mpn_rshift -- rp[] = up[] << cnt
+
+dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C POWER3/PPC630                 ?
+C POWER4/PPC970                 ?
+C POWER5                2
+C POWER6                3.5  (mysteriously 3.0 for cnt=1)
+
+C TODO
+C  * Micro-optimise header code
+C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4248
+C    bytes, 4-way code would become about 50% larger.
+
+C INPUT PARAMETERS
+define(`rp_param',  `r3')
+define(`up',  `r4')
+define(`n',   `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`retval',`r3')
+define(`rp',  `r7')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+
+ifdef(`HAVE_ABI_mode32',`
+       rldicl  n, n, 0,32              C FIXME: avoid this zero extend
+')
+       mflr    r12
+       LEAL(   r11, L(e1))             C address of L(e1) label in SHIFT(1)
+       sldi    r10, cnt, 6             C multiply cnt by size of a SHIFT block
+       add     r11, r11, r10           C address of L(oN) for N = cnt
+       srdi    r10, n, 1
+       mr      rp, rp_param
+       subfic  tnc, cnt, 64
+       rlwinm. r8, n, 0,31,31          C extract bit 0
+       mtctr   r10
+       beq     L(evn)
+
+L(odd):        ld      r9, 0(up)
+       cmpdi   cr0, n, 1               C n = 1?
+       beq     L(1)
+       ld      r8, 8(up)
+       addi    r11, r11, -84           C L(o1) - L(e1) - 64
+       mtlr    r11
+       sld     r3, r9, tnc             C retval
+       addi    up, up, 8
+       addi    rp, rp, 8
+       blr                             C branch to L(oN)
+
+L(evn):        ld      r8, 0(up)
+       ld      r9, 8(up)
+       addi    r11, r11, -64
+       mtlr    r11
+       sld     r3, r8, tnc             C retval
+       addi    up, up, 16
+       blr                             C branch to L(eN)
+
+L(1):  sld     r3, r9, tnc             C retval
+       srd     r8, r9, cnt
+       std     r8, 0(rp)
+       mtlr    r12
+ifdef(`HAVE_ABI_mode32',
+`      mr      r4, r3
+       srdi    r3, r3, 32
+')
+       blr
+
+
+define(SHIFT,`
+L(lo$1):ld     r8, 0(up)
+       std     r11, 0(rp)
+       addi    rp, rp, 16
+L(o$1):        srdi    r10, r9, $1
+       rldimi  r10, r8, eval(64-$1), 0
+       ld      r9, 8(up)
+       addi    up, up, 16
+       std     r10, -8(rp)
+L(e$1):        srdi    r11, r8, $1
+       rldimi  r11, r9, eval(64-$1), 0
+       bdnz    L(lo$1)
+       std     r11, 0(rp)
+       srdi    r10, r9, $1
+       b       L(com)
+       nop
+       nop
+')
+
+       ALIGN(64)
+forloop(`i',1,63,`SHIFT(i)')
+
+L(com):        std     r10, 8(rp)
+       mtlr    r12
+ifdef(`HAVE_ABI_mode32',
+`      mr      r4, r3
+       srdi    r3, r3, 32
+')
+       blr
+EPILOGUE()
+ASM_END()
diff --git a/mpn/powerpc64/p7/hamdist.asm b/mpn/powerpc64/p7/hamdist.asm

new file mode 100644 (file)

index 0000000..07fe41f
--- /dev/null
+++ b/mpn/powerpc64/p7/hamdist.asm
@@ -0,0 +1,99 @@
+dnl  PowerPC-64 mpn_hamdist.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630          -
+C POWER4/PPC970          -
+C POWER5                 -
+C POWER6                 -
+C POWER7                 2.87
+
+define(`up', r3)
+define(`vp', r4)
+define(`n',  r5)
+
+ASM_START()
+PROLOGUE(mpn_hamdist)
+       std     r30, -16(r1)
+       std     r31, -8(r1)
+
+       addi    r0, n, 1
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  r0, r0, 63,33', C ...branch count
+`      srdi    r0, r0, 1')     C ...for ctr
+       mtctr   r0
+
+       andi.   r0, n, 1
+
+       li      r0, 0
+       li      r12, 0
+
+       beq     L(evn)
+
+L(odd):        ld      r6, 0(up)
+       addi    up, up, 8
+       ld      r8, 0(vp)
+       addi    vp, vp, 8
+       xor     r10, r6, r8
+       popcntd r0, r10
+       bdz     L(e1)
+
+L(evn):        ld      r6, 0(up)
+       ld      r8, 0(vp)
+       ld      r7, 8(up)
+       ld      r9, 8(vp)
+       xor     r10, r6, r8
+       addi    up, up, 16
+       addi    vp, vp, 16
+       li      r30, 0
+       li      r31, 0
+       bdz     L(end)
+
+       nop
+       nop
+C      ALIGN(16)
+L(top):        add     r0, r0, r30
+       ld      r6, 0(up)
+       ld      r8, 0(vp)
+       xor     r11, r7, r9
+       popcntd r30, r10
+       add     r12, r12, r31
+       ld      r7, 8(up)
+       ld      r9, 8(vp)
+       xor     r10, r6, r8
+       popcntd r31, r11
+       addi    up, up, 16
+       addi    vp, vp, 16
+       bdnz    L(top)
+
+L(end):        add     r0, r0, r30
+       xor     r11, r7, r9
+       popcntd r30, r10
+       add     r12, r12, r31
+       popcntd r31, r11
+
+       add     r0, r0, r30
+       add     r12, r12, r31
+L(e1): add     r3, r0, r12
+       ld      r30, -16(r1)
+       ld      r31, -8(r1)
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/p7/popcount.asm b/mpn/powerpc64/p7/popcount.asm

new file mode 100644 (file)

index 0000000..9a11309
--- /dev/null
+++ b/mpn/powerpc64/p7/popcount.asm
@@ -0,0 +1,79 @@
+dnl  PowerPC-64 mpn_popcount.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                   cycles/limb
+C POWER3/PPC630          -
+C POWER4/PPC970          -
+C POWER5                 -
+C POWER6                 -
+C POWER7                 2
+
+define(`up', r3)
+define(`n',  r4)
+
+ASM_START()
+PROLOGUE(mpn_popcount)
+       addi    r0, n, 1
+ifdef(`HAVE_ABI_mode32',
+`      rldicl  r0, r0, 63,33', C ...branch count
+`      srdi    r0, r0, 1')     C ...for ctr
+       mtctr   r0
+
+       andi.   r0, n, 1
+
+       li      r0, 0
+       li      r12, 0
+       beq     L(evn)
+
+L(odd):        ld      r4, 0(up)
+       addi    up, up, 8
+       popcntd r0, r4
+       bdz     L(e1)
+
+L(evn):        ld      r4, 0(up)
+       ld      r5, 8(up)
+       popcntd r8, r4
+       popcntd r9, r5
+       bdz     L(e2)
+
+       ld      r4, 16(up)
+       ld      r5, 24(up)
+       bdz     L(e4)
+       addi    up, up, 32
+
+L(top):        add     r0, r0, r8
+       popcntd r8, r4
+       ld      r4, 0(up)
+       add     r12, r12, r9
+       popcntd r9, r5
+       ld      r5, 8(up)
+       addi    up, up, 16
+       bdnz    L(top)
+
+L(e4): add     r0, r0, r8
+       popcntd r8, r4
+       add     r12, r12, r9
+       popcntd r9, r5
+L(e2): add     r0, r0, r8
+       add     r12, r12, r9
+L(e1): add     r3, r0, r12
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/rshift.asm b/mpn/powerpc64/rshift.asm

index e73640d08c0dbdccbf4a51143adb78ad91a3dcc3..18406c57e92f9bff9222f27faabd2743ecef707c 100644 (file)
--- a/mpn/powerpc64/rshift.asm
+++ b/mpn/powerpc64/rshift.asm
@@ -1,6 +1,6 @@
  dnl  PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt
  
-dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,89 +19,178 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C              cycles/limb
-C POWER3/PPC630:     1.5
-C POWER4/PPC970:     3.0
+C                   cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970          ?
+C POWER5                 2.25
+C POWER6                 9.75
+C POWER7                 2.15
  
-C INPUT PARAMETERS
-define(`rp',`r3')
-define(`up',`r4')
-define(`n',`r5')
-define(`cnt',`r6')
+C TODO
+C  * Try to reduce the number of needed live registers
+C  * Micro-optimise header code
+C  * Keep in synch with lshift.asm and lshiftc.asm
  
-define(`tnc',`r5')
-define(`v0',`r0')
-define(`v1',`r7')
-define(`u0',`r8')
-define(`u1',`r9')
-define(`h0',`r10')
-define(`h1',`r11')
+C INPUT PARAMETERS
+define(`rp',  `r3')
+define(`up',  `r4')
+define(`n',   `r5')
+define(`cnt', `r6')
  
+define(`tnc',`r0')
+define(`u0',`r30')
+define(`u1',`r31')
+define(`retval',`r5')
  
  ASM_START()
  PROLOGUE(mpn_rshift)
+       std     r31, -8(r1)
+       std     r30, -16(r1)
+       subfic  tnc, cnt, 64
+C      sldi    r30, n, 3       C byte count corresponding to n
+C      add     rp, rp, r30     C rp = rp + n
+C      add     up, up, r30     C up = up + n
+       rldicl. r30, n, 0,62    C r30 = n & 3, set cr0
+       cmpdi   cr6, r30, 2
+       addi    r31, n, 3       C compute count...
+       ld      r10, 0(up)      C load 1st limb for b00...b11
+       sld     retval, r10, tnc
  ifdef(`HAVE_ABI_mode32',
-`      rldicl  n, n, 0, 32')   C zero extend n
-       mtctr   n               C copy n to count register
+`      rldicl  r31, r31, 62,34',       C ...branch count
+`      srdi    r31, r31, 2')   C ...for ctr
+       mtctr   r31             C copy count into ctr
+       beq     cr0, L(b00)
+       blt     cr6, L(b01)
+       ld      r11, 8(up)      C load 2nd limb for b10 and b11
+       beq     cr6, L(b10)
+
+       ALIGN(16)
+L(b11):        srd     r8, r10, cnt
+       sld     r9, r11, tnc
+       ld      u1, 16(up)
+       addi    up, up, 24
+       srd     r12, r11, cnt
+       sld     r7, u1, tnc
         addi    rp, rp, -16
-       subfic  tnc, cnt, 64    C reverse shift count
+       bdnz    L(gt3)
  
-       ld      u0, 0(up)
-       srd     h0, u0, cnt
-       sld     r12, u0, tnc    C return value
-       bdz     L(1)            C jump for n = 1
+       or      r11, r8, r9
+       srd     r8, u1, cnt
+       b       L(cj3)
  
+       ALIGN(16)
+L(gt3):        ld      u0, 0(up)
+       or      r11, r8, r9
+       srd     r8, u1, cnt
+       sld     r9, u0, tnc
         ld      u1, 8(up)
-       bdz     L(2)            C jump for n = 2
-
-       ldu     u0, 16(up)
-       bdz     L(end)          C jump for n = 3
-
-L(oop):        sld     v1, u1, tnc
-       srd     h1, u1, cnt
+       or      r10, r12, r7
+       b       L(L11)
+
+       ALIGN(32)
+L(b10):        srd     r12, r10, cnt
+       addi    rp, rp, -24
+       sld     r7, r11, tnc
+       bdnz    L(gt2)
+
+       srd     r8, r11, cnt
+       or      r10, r12, r7
+       b       L(cj2)
+
+L(gt2):        ld      u0, 16(up)
+       srd     r8, r11, cnt
+       sld     r9, u0, tnc
+       ld      u1, 24(up)
+       or      r10, r12, r7
+       srd     r12, u0, cnt
+       sld     r7, u1, tnc
+       ld      u0, 32(up)
+       or      r11, r8, r9
+       addi    up, up, 16
+       b       L(L10)
+
+       ALIGN(16)
+L(b00):        ld      u1, 8(up)
+       srd     r12, r10, cnt
+       sld     r7, u1, tnc
+       ld      u0, 16(up)
+       srd     r8, u1, cnt
+       sld     r9, u0, tnc
+       ld      u1, 24(up)
+       or      r10, r12, r7
+       srd     r12, u0, cnt
+       sld     r7, u1, tnc
+       addi    rp, rp, -8
+       bdz     L(cj4)
+
+L(gt4):        addi    up, up, 32
+       ld      u0, 0(up)
+       or      r11, r8, r9
+       b       L(L00)
+
+       ALIGN(16)
+L(b01):        bdnz    L(gt1)
+       srd     r8, r10, cnt
+       std     r8, 0(rp)
+       b       L(ret)
+
+L(gt1):        ld      u0, 8(up)
+       srd     r8, r10, cnt
+       sld     r9, u0, tnc
+       ld      u1, 16(up)
+       srd     r12, u0, cnt
+       sld     r7, u1, tnc
+       ld      u0, 24(up)
+       or      r11, r8, r9
+       srd     r8, u1, cnt
+       sld     r9, u0, tnc
+       ld      u1, 32(up)
+       addi    up, up, 40
+       or      r10, r12, r7
+       bdz     L(end)
+
+       ALIGN(32)
+L(top):        srd     r12, u0, cnt
+       sld     r7, u1, tnc
+       ld      u0, 0(up)
+       std     r11, 0(rp)
+       or      r11, r8, r9
+L(L00):        srd     r8, u1, cnt
+       sld     r9, u0, tnc
         ld      u1, 8(up)
-       or      h0, v1, h0
-       stdu    h0, 16(rp)
-
-       bdz     L(exit)
-
-       sld     v0, u0, tnc
-       srd     h0, u0, cnt
-       ldu     u0, 16(up)
-       or      h1, v0, h1
-       std     h1, 8(rp)
-
-       bdnz    L(oop)
-
-L(end):        sld     v1, u1, tnc
-       srd     h1, u1, cnt
-       or      h0, v1, h0
-       stdu    h0, 16(rp)
-       sld     v0, u0, tnc
-       srd     h0, u0, cnt
-       or      h1, v0, h1
-       std     h1, 8(rp)
-L(1):  std     h0, 16(rp)
-ifdef(`HAVE_ABI_mode32',
-`      srdi    r3, r12, 32
-       mr      r4, r12
-',`    mr      r3, r12
-')
-       blr
-
-L(exit):       sld     v0, u0, tnc
-       srd     h0, u0, cnt
-       or      h1, v0, h1
-       std     h1, 8(rp)
-L(2):  sld     v1, u1, tnc
-       srd     h1, u1, cnt
-       or      h0, v1, h0
-       stdu    h0, 16(rp)
-       std     h1, 8(rp)
+       std     r10, 8(rp)
+       or      r10, r12, r7
+L(L11):        srd     r12, u0, cnt
+       sld     r7, u1, tnc
+       ld      u0, 16(up)
+       std     r11, 16(rp)
+       or      r11, r8, r9
+L(L10):        srd     r8, u1, cnt
+       sld     r9, u0, tnc
+       ld      u1, 24(up)
+       addi    up, up, 32
+       std     r10, 24(rp)
+       addi    rp, rp, 32
+       or      r10, r12, r7
+       bdnz    L(top)
+
+       ALIGN(32)
+L(end):        srd     r12, u0, cnt
+       sld     r7, u1, tnc
+       std     r11, 0(rp)
+L(cj4):        or      r11, r8, r9
+       srd     r8, u1, cnt
+       std     r10, 8(rp)
+L(cj3):        or      r10, r12, r7
+       std     r11, 16(rp)
+L(cj2):        std     r10, 24(rp)
+       std     r8, 32(rp)
+
+L(ret):        ld      r31, -8(r1)
+       ld      r30, -16(r1)
  ifdef(`HAVE_ABI_mode32',
-`      srdi    r3, r12, 32
-       mr      r4, r12
-',`    mr      r3, r12
-')
+`      srdi    r3, retval, 32
+       mr      r4, retval
+',`    mr      r3, retval')
         blr
  EPILOGUE()
diff --git a/mpn/powerpc64/sqr_diagonal.asm b/mpn/powerpc64/sqr_diagonal.asm

deleted file mode 100644 (file)

index 07f60e0..0000000
--- a/mpn/powerpc64/sqr_diagonal.asm
+++ /dev/null
@@ -1,55 +0,0 @@
-dnl  PowerPC-64 mpn_sqr_diagonal.
-
-dnl  Copyright 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C              cycles/limb
-C POWER3/PPC630:    18
-C POWER4/PPC970:     8
-
-C INPUT PARAMETERS
-C rp   r3
-C up   r4
-C n    r5
-
-ASM_START()
-PROLOGUE(mpn_sqr_diagonal)
-ifdef(`HAVE_ABI_mode32',
-`      rldicl  r5, r5, 0, 32')         C zero extend n
-       mtctr   r5
-       ld      r0, 0(r4)
-       bdz     L(end)
-       ALIGN(16)
-
-L(top):        mulld   r5, r0, r0
-       mulhdu  r6, r0, r0
-       ld      r0, 8(r4)
-       addi    r4, r4, 8
-       std     r5, 0(r3)
-       std     r6, 8(r3)
-       addi    r3, r3, 16
-       bdnz    L(top)
-
-L(end):        mulld   r5, r0, r0
-       mulhdu  r6, r0, r0
-       std     r5, 0(r3)
-       std     r6, 8(r3)
-
-       blr
-EPILOGUE()
diff --git a/mpn/powerpc64/tabselect.asm b/mpn/powerpc64/tabselect.asm

new file mode 100644 (file)

index 0000000..7d18938
--- /dev/null
+++ b/mpn/powerpc64/tabselect.asm
@@ -0,0 +1,96 @@
+dnl  PowerPC-64 mpn_tabselect.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C POWER3/PPC630          ?
+C POWER4/PPC970          3.3
+C POWER5                 ?
+C POWER6                 ?
+C POWER7                 2.5
+
+C NOTES
+C  * This has not been tuned for any specific processor.  Its speed should not
+C    be too bad, though.
+C  * Using VMX could result in significant speedup for certain CPUs.
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp',     `r3')
+define(`tp',     `r4')
+define(`n',      `r5')
+define(`nents',  `r6')
+define(`which',  `r7')
+
+define(`mask',   `r8')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_tabselect)
+       addi    r0, n, 1
+       srdi    r0, r0, 1               C inner loop count
+       andi.   r9, n, 1                C set cr0 for use in inner loop
+       subf    which, nents, which
+       sldi    n, n, 3
+
+L(outer):
+       mtctr   r0                      C put inner loop count in ctr
+
+       add     r9, which, nents        C are we at the selected table entry?
+       addic   r9, r9, -1              C set CF iff not selected entry
+       subfe   mask, r0, r0
+
+       beq     cr0, L(top)             C branch to loop entry if n even
+
+       ld      r9, 0(tp)
+       addi    tp, tp, 8
+       and     r9, r9, mask
+       ld      r11, 0(rp)
+       andc    r11, r11, mask
+       or      r9, r9, r11
+       std     r9, 0(rp)
+       addi    rp, rp, 8
+       bdz     L(end)
+
+       ALIGN(16)
+L(top):        ld      r9, 0(tp)
+       ld      r10, 8(tp)
+       addi    tp, tp, 16
+       nop
+       and     r9, r9, mask
+       and     r10, r10, mask
+       ld      r11, 0(rp)
+       ld      r12, 8(rp)
+       andc    r11, r11, mask
+       andc    r12, r12, mask
+       or      r9, r9, r11
+       or      r10, r10, r12
+       std     r9, 0(rp)
+       std     r10, 8(rp)
+       addi    rp, rp, 16
+       bdnz    L(top)
+
+L(end):        subf    rp, n, rp               C move rp back to beginning
+       cmpdi   cr6, nents, 1
+       addi    nents, nents, -1
+       bne     cr6, L(outer)
+
+       blr
+EPILOGUE()
diff --git a/mpn/powerpc64/vmx/popcount.asm b/mpn/powerpc64/vmx/popcount.asm

index b9f5896fb7683965495a1ba7a0e2fc91f0a1200c..9734f8c48943c2873bc6051ab04c3b4a20d27e5e 100644 (file)
--- a/mpn/powerpc64/vmx/popcount.asm
+++ b/mpn/powerpc64/vmx/popcount.asm
@@ -1,6 +1,6 @@
  dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_popcount.
  
-dnl  Copyright 2006 Free Software Foundation, Inc.
+dnl  Copyright 2006, 2010 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -20,17 +20,13 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C                   cycles/limb
-C 7400,7410 (G4):       2.75
-C 744x,745x (G4+):      2.25
-C 970 (G5):             5.3
-
-C STATUS
-C  * Works for all sizes and alignments.
+C 7400,7410 (G4):       ?
+C 744x,745x (G4+):      1.125
+C 970 (G5):             2.25
  
  C TODO
-C  * Tune the awkward huge n outer loop code.
+C  * Rewrite the awkward huge n outer loop code.
  C  * Two lvx, two vperm, and two vxor could make us a similar hamdist.
-C  * For the 970, a combined VMX+intop approach might be best.
  C  * Compress cnsts table in 64-bit mode, only half the values are needed.
  
  define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
@@ -39,26 +35,11 @@ define(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))
  
  define(`OPERATION_popcount')
  
-ifdef(`OPERATION_popcount',`
-  define(`func',`mpn_popcount')
-  define(`up',         `r3')
-  define(`n',          `r4')
-  define(`HAM',                `dnl')
-')
-ifdef(`OPERATION_hamdist',`
-  define(`func',`mpn_hamdist')
-  define(`up',         `r3')
-  define(`vp',         `r4')
-  define(`n',          `r5')
-  define(`HAM',                `$1')
-')
+define(`ap',   `r3')
+define(`n',    `r4')
  
-define(`x01010101',`v2')
-define(`x00110011',`v7')
-define(`x00001111',`v10')
-define(`cnt1',`v11')
-define(`cnt2',`v12')
-define(`cnt4',`v13')
+define(`rtab', `v10')
+define(`cnt4', `v11')
  
  ifelse(GMP_LIMB_BITS,32,`
         define(`LIMB32',`       $1')
@@ -85,30 +66,29 @@ ifdef(`HAVE_ABI_mode32',
  C Load various constants into vector registers
         LEAL(   r11, cnsts)
         li      r12, 16
-       vspltisb cnt1, 1                C 0x0101...01 used as shift count
-       vspltisb cnt2, 2                C 0x0202...02 used as shift count
         vspltisb cnt4, 4                C 0x0404...04 used as shift count
-       lvx     x01010101, 0, r11       C 0x3333...33
-       lvx     x00110011, r12, r11     C 0x5555...55
-       vspltisb x00001111, 15          C 0x0f0f...0f
+
+       li      r7, 160
+       lvx     rtab, 0, r11
  
  LIMB64(`lis    r0, LIMBS_CHUNK_THRES   ')
  LIMB64(`cmpd   cr7, n, r0              ')
  
-       lvx     v0, 0, up
-       addi    r7, r11, 96
-       rlwinm  r6, up, 2,26,29
+       lvx     v0, 0, ap
+       addi    r7, r11, 80
+       rlwinm  r6, ap, 2,26,29
         lvx     v8, r7, r6
         vand    v0, v0, v8
  
-LIMB32(`rlwinm r8, up, 30,30,31        ')
-LIMB64(`rlwinm r8, up, 29,31,31        ')
-       add     n, n, r8                C compensate n for rounded down `up'
+LIMB32(`rlwinm r8, ap, 30,30,31        ')
+LIMB64(`rlwinm r8, ap, 29,31,31        ')
+       add     n, n, r8                C compensate n for rounded down `ap'
  
         vxor    v1, v1, v1
         li      r8, 0                   C grand total count
  
-       vxor    v3, v3, v3              C zero total count
+       vxor    v12, v12, v12           C zero total count
+       vxor    v13, v13, v13           C zero total count
  
         addic.  n, n, -LIMBS_PER_VR
         ble     L(sum)
@@ -120,82 +100,61 @@ C For 64-bit machines, handle huge n that would overflow vsum4ubs
  LIMB64(`ble    cr7, L(small)           ')
  LIMB64(`addis  r9, n, -LIMBS_PER_CHUNK ') C remaining n
  LIMB64(`lis    n, LIMBS_PER_CHUNK      ')
-L(small):
-
  
+       ALIGN(16)
+L(small):
  LIMB32(`srwi   r7, n, 3        ')      C loop count corresponding to n
  LIMB64(`srdi   r7, n, 2        ')      C loop count corresponding to n
         addi    r7, r7, 1
         mtctr   r7                      C copy n to count register
         b       L(ent)
  
-       ALIGN(8)
-L(top):        lvx     v0, 0, up
-       li      r7, 128                 C prefetch distance
-L(ent):        lvx     v1, r12, up
-       addi    up, up, 32
-       vsr     v4, v0, cnt1
-       vsr     v5, v1, cnt1
-       dcbt    up, r7                  C prefetch
-       vand    v8, v4, x01010101
-       vand    v9, v5, x01010101
-       vsububm v0, v0, v8              C 64 2-bit accumulators (0..2)
-       vsububm v1, v1, v9              C 64 2-bit accumulators (0..2)
-       vsr     v4, v0, cnt2
-       vsr     v5, v1, cnt2
-       vand    v8, v0, x00110011
-       vand    v9, v1, x00110011
-       vand    v4, v4, x00110011
-       vand    v5, v5, x00110011
-       vaddubm v0, v4, v8              C 32 4-bit accumulators (0..4)
-       vaddubm v1, v5, v9              C 32 4-bit accumulators (0..4)
-       vaddubm v8, v0, v1              C 32 4-bit accumulators (0..8)
-       vsr     v9, v8, cnt4
-       vand    v6, v8, x00001111
-       vand    v9, v9, x00001111
-       vaddubm v6, v9, v6              C 16 8-bit accumulators (0..16)
-       vsum4ubs v3, v6, v3             C sum 4 x 4 bytes into 4 32-bit fields
+       ALIGN(16)
+L(top):
+       lvx     v0, 0, ap
+L(ent):        lvx     v1, r12, ap
+       addi    ap, ap, 32
+       vsrb    v8, v0, cnt4
+       vsrb    v9, v1, cnt4
+       vperm   v2, rtab, rtab, v0
+       vperm   v3, rtab, rtab, v8
+       vperm   v4, rtab, rtab, v1
+       vperm   v5, rtab, rtab, v9
+       vaddubm v6, v2, v3
+       vaddubm v7, v4, v5
+       vsum4ubs v12, v6, v12
+       vsum4ubs v13, v7, v13
         bdnz    L(top)
  
         andi.   n, n, eval(LIMBS_PER_2VR-1)
         beq     L(rt)
  
-       lvx     v0, 0, up
+       lvx     v0, 0, ap
         vxor    v1, v1, v1
         cmpwi   n, LIMBS_PER_VR
         ble     L(sum)
  L(lsum):
         vor     v1, v0, v0
-       lvx     v0, r12, up
+       lvx     v0, r12, ap
  L(sum):
  LIMB32(`rlwinm r6, n, 4,26,27  ')
  LIMB64(`rlwinm r6, n, 5,26,26  ')
-       addi    r7, r11, 32
+       addi    r7, r11, 16
         lvx     v8, r7, r6
         vand    v0, v0, v8
-
-       vsr     v4, v0, cnt1
-       vsr     v5, v1, cnt1
-       vand    v8, v4, x01010101
-       vand    v9, v5, x01010101
-       vsububm v0, v0, v8              C 64 2-bit accumulators (0..2)
-       vsububm v1, v1, v9              C 64 2-bit accumulators (0..2)
-       vsr     v4, v0, cnt2
-       vsr     v5, v1, cnt2
-       vand    v8, v0, x00110011
-       vand    v9, v1, x00110011
-       vand    v4, v4, x00110011
-       vand    v5, v5, x00110011
-       vaddubm v0, v4, v8              C 32 4-bit accumulators (0..4)
-       vaddubm v1, v5, v9              C 32 4-bit accumulators (0..4)
-       vaddubm v8, v0, v1              C 32 4-bit accumulators (0..8)
-       vsr     v9, v8, cnt4
-       vand    v6, v8, x00001111
-       vand    v9, v9, x00001111
-       vaddubm v6, v9, v6              C 16 8-bit accumulators (0..16)
-       vsum4ubs v3, v6, v3             C sum 4 x 4 bytes into 4 32-bit fields
-
-L(rt):
+       vsrb    v8, v0, cnt4
+       vsrb    v9, v1, cnt4
+       vperm   v2, rtab, rtab, v0
+       vperm   v3, rtab, rtab, v8
+       vperm   v4, rtab, rtab, v1
+       vperm   v5, rtab, rtab, v9
+       vaddubm v6, v2, v3
+       vaddubm v7, v4, v5
+       vsum4ubs v12, v6, v12
+       vsum4ubs v13, v7, v13
+
+       ALIGN(16)
+L(rt): vadduwm v3, v12, v13
         li      r7, -16                 C FIXME: does all ppc32 and ppc64 ABIs
         stvx    v3, r7, r1              C FIXME: ...support storing below sp?
  
@@ -210,7 +169,8 @@ L(rt):
  
  C Handle outer loop for huge n.  We inherit cr7 and r0 from above.
  LIMB64(`ble    cr7, L(ret)
-       vxor    v3, v3, v3              C zero total count
+       vxor    v12, v12, v12           C zero total count
+       vxor    v13, v13, v13           C zero total count
         mr      n, r9
         cmpd    cr7, n, r0
         ble     cr7, L(2)
@@ -221,17 +181,16 @@ L(2):     srdi    r7, n, 2                C loop count corresponding to n
         b       L(top)
  ')
  
+       ALIGN(16)
  L(ret):        mr      r3, r8
         mtspr   256, r10
         blr
  EPILOGUE()
  
  DEF_OBJECT(cnsts,16)
-       .byte   0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
-       .byte   0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
-
-       .byte   0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
-       .byte   0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
+C Counts for vperm
+       .byte   0x00,0x01,0x01,0x02,0x01,0x02,0x02,0x03
+       .byte   0x01,0x02,0x02,0x03,0x02,0x03,0x03,0x04
  C Masks for high end of number
         .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
         .byte   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
diff --git a/mpn/s390_32/copyd.asm b/mpn/s390_32/copyd.asm

new file mode 100644 (file)

index 0000000..dbfde35
--- /dev/null
+++ b/mpn/s390_32/copyd.asm
@@ -0,0 +1,134 @@
+dnl  S/390-32 mpn_copyd
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C            cycles/limb
+C            cycles/limb
+C z900          1.65
+C z990           1.125
+C z9            ?
+C z10           ?
+C z196          ?
+
+C FIXME:
+C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
+C    done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
+C    We could then use r3...r10 in main loop.
+
+C INPUT PARAMETERS
+define(`rp_param',     `%r2')
+define(`up_param',     `%r3')
+define(`n',            `%r4')
+
+define(`rp',   `%r8')
+define(`up',   `%r9')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+       stm     %r6, %r11, 24(%r15)
+
+       lr      %r1, n
+       sll     %r1, 2
+       la      %r10, 8(n)
+       ahi     %r1, -32
+       srl     %r10, 3
+       lhi     %r11, -32
+
+       la      rp, 0(%r1,rp_param)     C FIXME use lay on z990 and later
+       la      up, 0(%r1,up_param)     C FIXME use lay on z990 and later
+
+       lhi     %r7, 7
+       nr      %r7, n                  C n mod 8
+       chi     %r7, 2
+       jh      L(b34567)
+       chi     %r7, 1
+       je      L(b1)
+       jh      L(b2)
+
+L(b0): brct    %r10, L(top)
+       j       L(end)
+
+L(b1): l       %r0, 28(up)
+       ahi     up, -4
+       st      %r0, 28(rp)
+       ahi     rp, -4
+       brct    %r10, L(top)
+       j       L(end)
+
+L(b2): lm      %r0, %r1, 24(up)
+       ahi     up, -8
+       stm     %r0, %r1, 24(rp)
+       ahi     rp, -8
+       brct    %r10, L(top)
+       j       L(end)
+
+L(b34567):
+       chi     %r7, 4
+       jl      L(b3)
+       je      L(b4)
+       chi     %r7, 6
+       je      L(b6)
+       jh      L(b7)
+
+L(b5): lm      %r0, %r4, 12(up)
+       ahi     up, -20
+       stm     %r0, %r4, 12(rp)
+       ahi     rp, -20
+       brct    %r10, L(top)
+       j       L(end)
+
+L(b3): lm      %r0, %r2, 20(up)
+       ahi     up, -12
+       stm     %r0, %r2, 20(rp)
+       ahi     rp, -12
+       brct    %r10, L(top)
+       j       L(end)
+
+L(b4): lm      %r0, %r3, 16(up)
+       ahi     up, -16
+       stm     %r0, %r3, 16(rp)
+       ahi     rp, -16
+       brct    %r10, L(top)
+       j       L(end)
+
+L(b6): lm      %r0, %r5, 8(up)
+       ahi     up, -24
+       stm     %r0, %r5, 8(rp)
+       ahi     rp, -24
+       brct    %r10, L(top)
+       j       L(end)
+
+L(b7): lm      %r0, %r6, 4(up)
+       ahi     up, -28
+       stm     %r0, %r6, 4(rp)
+       ahi     rp, -28
+       brct    %r10, L(top)
+       j       L(end)
+
+L(top):        lm      %r0, %r7, 0(up)
+       la      up, 0(%r11,up)
+       stm     %r0, %r7, 0(rp)
+       la      rp, 0(%r11,rp)
+       brct    %r10, L(top)
+
+L(end):        lm      %r6, %r11, 24(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/copyi.asm b/mpn/s390_32/copyi.asm

new file mode 100644 (file)

index 0000000..f3f2399
--- /dev/null
+++ b/mpn/s390_32/copyi.asm
@@ -0,0 +1,58 @@
+dnl  S/390-32 mpn_copyi
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          0.75
+C z990           0.375
+C z9            ?
+C z10           ?
+C z196          ?
+
+C NOTE
+C  * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+       ltr     %r4, %r4
+       sll     %r4, 2
+       je      L(rtn)
+       ahi     %r4, -1
+       lr      %r5, %r4
+       srl     %r5, 8
+       ltr     %r5, %r5                C < 256 bytes to copy?
+       je      L(1)
+
+L(top):        mvc     0(256, rp), 0(up)
+       la      rp, 256(rp)
+       la      up, 256(up)
+       brct    %r5, L(top)
+
+L(1):  bras    %r5, L(2)               C make r5 point to mvc insn
+       mvc     0(1, rp), 0(up)
+L(2):  ex      %r4, 0(%r5)             C execute mvc with length ((n-1) mod 256)+1
+L(rtn):        br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/esame/addmul_1.asm b/mpn/s390_32/esame/addmul_1.asm

new file mode 100644 (file)

index 0000000..b9a57ac
--- /dev/null
+++ b/mpn/s390_32/esame/addmul_1.asm
@@ -0,0 +1,61 @@
+dnl  S/390-32 mpn_addmul_1 for systems with MLR instruction
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900         18.5
+C z990         10
+C z9            ?
+C z10           ?
+C z196          ?
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`v0',   `%r5')
+
+define(`z',    `%r9')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       stm     %r9, %r12, 36(%r15)
+       lhi     %r12, 0                 C zero index reister
+       ahi     %r12, 0                 C clear carry fla
+       lhi     %r11, 0                 C clear carry limb
+       lhi     z, 0                    C clear carry limb
+
+L(top):        l       %r1, 0(%r12,up)
+       l       %r10, 0(%r12,rp)
+       mlr     %r0, v0
+       alcr    %r1, %r10
+       alcr    %r0, z
+       alr     %r1, %r11
+       lr      %r11, %r0
+       st      %r1, 0(%r12,rp)
+       la      %r12, 4(%r12)
+       brct    n, L(top)
+
+       lhi     %r2, 0
+       alcr    %r2, %r11
+
+       lm      %r9, %r12, 36(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/esame/aors_n.asm b/mpn/s390_32/esame/aors_n.asm

new file mode 100644 (file)

index 0000000..2e09ed4
--- /dev/null
+++ b/mpn/s390_32/esame/aors_n.asm
@@ -0,0 +1,126 @@
+dnl  S/390-32 mpn_add_n and mpn_sub_n.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          ?
+C z990       2.75-3            (fast for even n, slow for odd n)
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Optimise for small n
+C  * Use r0 and save/restore one less register
+C  * Using logops_n's v1 inner loop operand order make the loop about 20%
+C    faster, at the expense of highly alignment-dependent performance.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`vp',   `%r4')
+define(`n',    `%r5')
+
+ifdef(`OPERATION_add_n', `
+  define(ADSB,         al)
+  define(ADSBCR,       alcr)
+  define(ADSBC,                alc)
+  define(RETVAL,`dnl
+       lhi     %r2, 0
+       alcr    %r2, %r2')
+  define(func,         mpn_add_n)
+  define(func_nc,      mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(ADSB,         sl)
+  define(ADSBCR,       slbr)
+  define(ADSBC,                slb)
+  define(RETVAL,`dnl
+       slbr    %r2, %r2
+       lcr     %r2, %r2')
+  define(func,         mpn_sub_n)
+  define(func_nc,      mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+       stm     %r6, %r8, 24(%r15)
+
+       ahi     n, 3
+       lhi     %r7, 3
+       lr      %r1, n
+       srl     %r1, 2
+       nr      %r7, n                  C n mod 4
+       je      L(b1)
+       chi     %r7, 2
+       jl      L(b2)
+       jne     L(b0)
+
+L(b3): lm      %r5, %r7, 0(up)
+       la      up, 12(up)
+       ADSB    %r5, 0(vp)
+       ADSBC   %r6, 4(vp)
+       ADSBC   %r7, 8(vp)
+       la      vp, 12(vp)
+       stm     %r5, %r7, 0(rp)
+       la      rp, 12(rp)
+       brct    %r1, L(top)
+       j       L(end)
+
+L(b0): lm      %r5, %r8, 0(up)         C This redundant insns is no mistake,
+       la      up, 16(up)              C it is needed to make main loop run
+       ADSB    %r5, 0(vp)              C fast for n = 0 (mod 4).
+       ADSBC   %r6, 4(vp)
+       j       L(m0)
+
+L(b1): l       %r5, 0(up)
+       la      up, 4(up)
+       ADSB    %r5, 0(vp)
+       la      vp, 4(vp)
+       st      %r5, 0(rp)
+       la      rp, 4(rp)
+       brct    %r1, L(top)
+       j       L(end)
+
+L(b2): lm      %r5, %r6, 0(up)
+       la      up, 8(up)
+       ADSB    %r5, 0(vp)
+       ADSBC   %r6, 4(vp)
+       la      vp, 8(vp)
+       stm     %r5, %r6, 0(rp)
+       la      rp, 8(rp)
+       brct    %r1, L(top)
+       j       L(end)
+
+L(top):        lm      %r5, %r8, 0(up)
+       la      up, 16(up)
+       ADSBC   %r5, 0(vp)
+       ADSBC   %r6, 4(vp)
+L(m0): ADSBC   %r7, 8(vp)
+       ADSBC   %r8, 12(vp)
+       la      vp, 16(vp)
+       stm     %r5, %r8, 0(rp)
+       la      rp, 16(rp)
+       brct    %r1, L(top)
+
+L(end):        RETVAL
+       lm      %r6, %r8, 24(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/esame/aorslsh1_n.asm b/mpn/s390_32/esame/aorslsh1_n.asm

new file mode 100644 (file)

index 0000000..c152433
--- /dev/null
+++ b/mpn/s390_32/esame/aorslsh1_n.asm
@@ -0,0 +1,162 @@
+dnl  S/390-32 mpn_addlsh1_n
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          9.25
+C z990          5
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Optimise for small n
+C  * Compute RETVAL for sublsh1_n less stupidly
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`vp',   `%r4')
+define(`n',    `%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADDSUBC,       alr)
+  define(ADDSUBE,       alcr)
+  define(INITCY,        `lhi   %r13, -1')
+  define(RETVAL,        `alr   %r1, %r13
+                       lhi     %r2, 2
+                       alr     %r2, %r1')
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+  define(ADDSUBC,       slr)
+  define(ADDSUBE,       slbr)
+  define(INITCY,        `lhi   %r13, 0')
+  define(RETVAL,        `slr   %r1, %r13
+                       lhi     %r2, 1
+                       alr     %r2, %r1')
+  define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+       stm     %r6, %r13, 24(%r15)
+
+       la      %r0, 3(n)
+       lhi     %r7, 3
+       srl     %r0, 2
+       nr      %r7, n                  C n mod 4
+       je      L(b0)
+       chi     %r7, 2
+       jl      L(b1)
+       je      L(b2)
+
+L(b3): lm      %r5, %r7, 0(up)
+       la      up, 12(up)
+       lm      %r9, %r11, 0(vp)
+       la      vp, 12(vp)
+
+       alr     %r9, %r9
+       alcr    %r10, %r10
+       alcr    %r11, %r11
+       slbr    %r1, %r1
+
+       ADDSUBC %r5, %r9
+       ADDSUBE %r6, %r10
+       ADDSUBE %r7, %r11
+       slbr    %r13, %r13
+
+       stm     %r5, %r7, 0(rp)
+       la      rp, 12(rp)
+       brct    %r0, L(top)
+       j       L(end)
+
+L(b0): lhi     %r1, -1
+       INITCY
+       j       L(top)
+
+L(b1): l       %r5, 0(up)
+       la      up, 4(up)
+       l       %r9, 0(vp)
+       la      vp, 4(vp)
+
+       alr     %r9, %r9
+       slbr    %r1, %r1
+       ADDSUBC %r5, %r9
+       slbr    %r13, %r13
+
+       st      %r5, 0(rp)
+       la      rp, 4(rp)
+       brct    %r0, L(top)
+       j       L(end)
+
+L(b2): lm      %r5, %r6, 0(up)
+       la      up, 8(up)
+       lm      %r9, %r10, 0(vp)
+       la      vp, 8(vp)
+
+       alr     %r9, %r9
+       alcr    %r10, %r10
+       slbr    %r1, %r1
+
+       ADDSUBC %r5, %r9
+       ADDSUBE %r6, %r10
+       slbr    %r13, %r13
+
+       stm     %r5, %r6, 0(rp)
+       la      rp, 8(rp)
+       brct    %r0, L(top)
+       j       L(end)
+
+L(top):        lm      %r9, %r12, 0(vp)
+       la      vp, 16(vp)
+
+       ahi     %r1, 1                  C restore carry
+
+       alcr    %r9, %r9
+       alcr    %r10, %r10
+       alcr    %r11, %r11
+       alcr    %r12, %r12
+
+       slbr    %r1, %r1                C save carry
+
+       lm      %r5, %r8, 0(up)
+       la      up, 16(up)
+
+       ahi     %r13, 1                 C restore carry
+
+       ADDSUBE %r5, %r9
+       ADDSUBE %r6, %r10
+       ADDSUBE %r7, %r11
+       ADDSUBE %r8, %r12
+
+       slbr    %r13, %r13
+
+       stm     %r5, %r8, 0(rp)
+       la      rp, 16(rp)
+       brct    %r0, L(top)
+
+L(end):
+       RETVAL
+       lm      %r6, %r13, 24(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/esame/bdiv_dbm1c.asm b/mpn/s390_32/esame/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..c4e2837
--- /dev/null
+++ b/mpn/s390_32/esame/bdiv_dbm1c.asm
@@ -0,0 +1,54 @@
+dnl  S/390-32 mpn_bdiv_dbm1c for systems with MLR instruction.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900         14
+C z990         10
+C z9            ?
+C z10           ?
+C z196          ?
+
+C INPUT PARAMETERS
+define(`qp',     `%r2')
+define(`up',     `%r3')
+define(`n',      `%r4')
+define(`bd',     `%r5')
+define(`cy',     `%r6')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+       stm     %r6, %r7, 24(%r15)
+       lhi     %r7, 0                  C zero index register
+
+L(top):        l       %r1, 0(%r7,up)
+       mlr     %r0, bd
+       slr     %r6, %r1
+       st      %r6, 0(%r7,qp)
+       slbr    %r6, %r0
+       la      %r7, 4(%r7)
+       brct    n, L(top)
+
+       lr      %r2, %r6
+       lm      %r6, %r7, 24(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/esame/gmp-mparam.h b/mpn/s390_32/esame/gmp-mparam.h

new file mode 100644 (file)

index 0000000..a6508be
--- /dev/null
+++ b/mpn/s390_32/esame/gmp-mparam.h
@@ -0,0 +1,129 @@
+/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 1200 MHz IBM z990 running in 32-bit mode */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            4
+#define MOD_1_1P_METHOD                      2
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         17
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     42
+#define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           30
+
+#define MUL_TOOM22_THRESHOLD                16
+#define MUL_TOOM33_THRESHOLD                57
+#define MUL_TOOM44_THRESHOLD               147
+#define MUL_TOOM6H_THRESHOLD               226
+#define MUL_TOOM8H_THRESHOLD               333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     100
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     102
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 26
+#define SQR_TOOM3_THRESHOLD                 81
+#define SQR_TOOM4_THRESHOLD                154
+#define SQR_TOOM6_THRESHOLD                318
+#define SQR_TOOM8_THRESHOLD                478
+
+#define MULMID_TOOM42_THRESHOLD             38
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               15
+
+#define POWM_SEC_TABLE  4,23,262,892,2500
+
+#define MUL_FFT_MODF_THRESHOLD             336  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    336, 5}, {     19, 6}, {     11, 5}, {     23, 6}, \
+    {     17, 7}, {      9, 6}, {     21, 7}, {     11, 6}, \
+    {     24, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
+    {     47,10}, {     31, 9}, {     79,10}, {     47,11}, \
+    {   2048,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 41
+#define MUL_FFT_THRESHOLD                 2752
+
+#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    308, 5}, {     20, 6}, {     11, 5}, {     23, 6}, \
+    {     21, 7}, {     11, 6}, {     24, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     47,10}, {     31, 9}, \
+    {     79,10}, {     47,11}, {   2048,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 35
+#define SQR_FFT_THRESHOLD                 2368
+
+#define MULLO_BASECASE_THRESHOLD             5
+#define MULLO_DC_THRESHOLD                  49
+#define MULLO_MUL_N_THRESHOLD             5397
+
+#define DC_DIV_QR_THRESHOLD                 42
+#define DC_DIVAPPR_Q_THRESHOLD             146
+#define DC_BDIV_QR_THRESHOLD                51
+#define DC_BDIV_Q_THRESHOLD                124
+
+#define INV_MULMOD_BNM1_THRESHOLD           46
+#define INV_NEWTON_THRESHOLD               179
+#define INV_APPR_THRESHOLD                 153
+
+#define BINV_NEWTON_THRESHOLD              214
+#define REDC_1_TO_REDC_N_THRESHOLD          55
+
+#define MU_DIV_QR_THRESHOLD               1078
+#define MU_DIVAPPR_Q_THRESHOLD            1078
+#define MUPI_DIV_QR_THRESHOLD               74
+#define MU_BDIV_QR_THRESHOLD               872
+#define MU_BDIV_Q_THRESHOLD               1078
+
+#define MATRIX22_STRASSEN_THRESHOLD         14
+#define HGCD_THRESHOLD                      90
+#define HGCD_APPR_THRESHOLD                111
+#define HGCD_REDUCE_THRESHOLD             1962
+#define GCD_DC_THRESHOLD                   225
+#define GCDEXT_DC_THRESHOLD                217
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        27
+#define SET_STR_DC_THRESHOLD               274
+#define SET_STR_PRECOMPUTE_THRESHOLD       824
diff --git a/mpn/s390_32/esame/mul_1.asm b/mpn/s390_32/esame/mul_1.asm

new file mode 100644 (file)

index 0000000..91e5d3b
--- /dev/null
+++ b/mpn/s390_32/esame/mul_1.asm
@@ -0,0 +1,55 @@
+dnl  S/390-32 mpn_mul_1 for systems with MLR instruction
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900         14
+C z990          9
+C z9            ?
+C z10           ?
+C z196          ?
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`v0',   `%r5')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       stm     %r11, %r12, 44(%r15)
+       lhi     %r12, 0                 C zero index register
+       ahi     %r12, 0                 C clear carry flag
+       lhi     %r11, 0                 C clear carry limb
+
+L(top):        l       %r1, 0(%r12,up)
+       mlr     %r0, v0
+       alcr    %r1, %r11
+       lr      %r11, %r0               C copy high part to carry limb
+       st      %r1, 0(%r12,rp)
+       la      %r12, 4(%r12)
+       brct    n, L(top)
+
+       lhi     %r2, 0
+       alcr    %r2, %r11
+
+       lm      %r11, %r12, 44(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/esame/mul_basecase.asm b/mpn/s390_32/esame/mul_basecase.asm

new file mode 100644 (file)

index 0000000..c8ef70a
--- /dev/null
+++ b/mpn/s390_32/esame/mul_basecase.asm
@@ -0,0 +1,119 @@
+dnl  S/390-32/esame mpn_mul_basecase.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          ?
+C z990          ?
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Perhaps add special case for un <= 2.
+C  * Replace loops by faster code.  The mul_1 and addmul_1 loops could be sped
+C    up by about 10%.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`un',   `%r4')
+define(`vp',   `%r5')
+define(`vn',   `%r6')
+
+define(`zero', `%r8')
+
+ASM_START()
+PROLOGUE(mpn_mul_basecase)
+       chi     un, 2
+       jhe     L(ge2)
+
+C un = vn = 1
+       l       %r1, 0(vp)
+       ml      %r0, 0(up)
+       st      %r1, 0(rp)
+       st      %r0, 4(rp)
+       br      %r14
+
+L(ge2):        C jne   L(gen)
+
+
+L(gen):
+C mul_1 =======================================================================
+
+       stm     %r6, %r12, 24(%r15)
+       lhi     zero, 0
+       ahi     un, -1
+
+       l       %r7, 0(vp)
+       l       %r11, 0(up)
+       lhi     %r12, 4                 C init index register
+       mlr     %r10, %r7
+       lr      %r9, un
+       st      %r11, 0(rp)
+       cr      %r15, %r15              C clear carry flag
+
+L(tm): l       %r1, 0(%r12,up)
+       mlr     %r0, %r7
+       alcr    %r1, %r10
+       lr      %r10, %r0               C copy high part to carry limb
+       st      %r1, 0(%r12,rp)
+       la      %r12, 4(%r12)
+       brct    %r9, L(tm)
+
+       alcr    %r0, zero
+       st      %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+       ahi     vn, -1
+       je      L(outer_end)
+L(outer_loop):
+
+       la      rp, 4(rp)               C rp += 1
+       la      vp, 4(vp)               C up += 1
+       l       %r7, 0(vp)
+       l       %r11, 0(up)
+       lhi     %r12, 4                 C init index register
+       mlr     %r10, %r7
+       lr      %r9, un
+       al      %r11, 0(rp)
+       st      %r11, 0(rp)
+
+L(tam):        l       %r1, 0(%r12,up)
+       l       %r11, 0(%r12,rp)
+       mlr     %r0, %r7
+       alcr    %r1, %r11
+       alcr    %r0, zero
+       alr     %r1, %r10
+       lr      %r10, %r0
+       st      %r1, 0(%r12,rp)
+       la      %r12, 4(%r12)
+       brct    %r9, L(tam)
+
+       alcr    %r0, zero
+       st      %r0, 0(%r12,rp)
+
+       brct    vn, L(outer_loop)
+L(outer_end):
+
+       lm      %r6, %r12, 24(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/esame/sqr_basecase.asm b/mpn/s390_32/esame/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..01a81b1
--- /dev/null
+++ b/mpn/s390_32/esame/sqr_basecase.asm
@@ -0,0 +1,192 @@
+dnl  S/390-32 mpn_sqr_basecase.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          ?
+C z990         23
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Clean up.
+C  * Stop iterating addmul_1 loop at latest for n = 2, implement longer tail.
+C    This will ask for basecase handling of n = 3.
+C  * Update counters and pointers more straightforwardly, possibly lowering
+C    register usage.
+C  * Should we use this allocation-free style for more sqr_basecase asm
+C    implementations?  The only disadvantage is that it requires R != U.
+C  * Replace loops by faster code.  The mul_1 and addmul_1 loops could be sped
+C    up by about 10%.  The sqr_diag_addlsh1 loop could probably be sped up even
+C    more.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+
+define(`zero', `%r8')
+define(`rp_saved',     `%r9')
+define(`up_saved',     `%r13')
+define(`n_saved',      `%r14')
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+       ahi     n, -2
+       jhe     L(ge2)
+
+C n = 1
+       l       %r5, 0(up)
+       mlr     %r4, %r5
+       st      %r5, 0(rp)
+       st      %r4, 4(rp)
+       br      %r14
+
+L(ge2):        jne     L(gen)
+
+C n = 2
+       stm     %r6, %r8, 24(%r15)
+       lhi     zero, 0
+
+       l       %r5, 0(up)
+       mlr     %r4, %r5                C u0 * u0
+       l       %r1, 4(up)
+       mlr     %r0, %r1                C u1 * u1
+       st      %r5, 0(rp)
+
+       l       %r7, 0(up)
+       ml      %r6, 4(up)              C u0 * u1
+       alr     %r7, %r7
+       alcr    %r6, %r6
+       alcr    %r0, zero
+
+       alr     %r4, %r7
+       alcr    %r1, %r6
+       alcr    %r0, zero
+       st      %r4, 4(rp)
+       st      %r1, 8(rp)
+       st      %r0, 12(rp)
+
+       lm      %r6, %r8, 24(%r15)
+       br      %r14
+
+L(gen):
+C mul_1 =======================================================================
+
+       stm     %r6, %r14, 24(%r15)
+       lhi     zero, 0
+       lr      up_saved, up
+       lr      rp_saved, rp
+       lr      n_saved, n
+
+       l       %r6, 0(up)
+       l       %r11, 4(up)
+       lhi     %r12, 8         C init index register
+       mlr     %r10, %r6
+       lr      %r5, n
+       st      %r11, 4(rp)
+       cr      %r15, %r15              C clear carry flag
+
+L(tm): l       %r1, 0(%r12,up)
+       mlr     %r0, %r6
+       alcr    %r1, %r10
+       lr      %r10, %r0               C copy high part to carry limb
+       st      %r1, 0(%r12,rp)
+       la      %r12, 4(%r12)
+       brct    %r5, L(tm)
+
+       alcr    %r0, zero
+       st      %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+       ahi     n, -1
+       je      L(outer_end)
+L(outer_loop):
+
+       la      rp, 8(rp)               C rp += 2
+       la      up, 4(up)               C up += 1
+       l       %r6, 0(up)
+       l       %r11, 4(up)
+       lhi     %r12, 8         C init index register
+       mlr     %r10, %r6
+       lr      %r5, n
+       al      %r11, 4(rp)
+       st      %r11, 4(rp)
+
+L(tam):        l       %r1, 0(%r12,up)
+       l       %r7, 0(%r12,rp)
+       mlr     %r0, %r6
+       alcr    %r1, %r7
+       alcr    %r0, zero
+       alr     %r1, %r10
+       lr      %r10, %r0
+       st      %r1, 0(%r12,rp)
+       la      %r12, 4(%r12)
+       brct    %r5, L(tam)
+
+       alcr    %r0, zero
+       st      %r0, 0(%r12,rp)
+
+       brct    n, L(outer_loop)
+L(outer_end):
+
+       l       %r6, 4(up)
+       l       %r1, 8(up)
+       lr      %r7, %r0                C Same as: l %r7, 12(,rp)
+       mlr     %r0, %r6
+       alr     %r1, %r7
+       alcr    %r0, zero
+       st      %r1, 12(rp)
+       st      %r0, 16(rp)
+
+C sqr_dia_addlsh1 ============================================================
+
+define(`up', `up_saved')
+define(`rp', `rp_saved')
+       la      n, 1(n_saved)
+
+       l       %r1, 0(up)
+       mlr     %r0, %r1
+       st      %r1, 0(rp)
+C      clr     %r15, %r15              C clear carry (already clear per above)
+
+L(top):        l       %r11, 4(up)
+       la      up, 4(up)
+       l       %r6, 4(rp)
+       l       %r7, 8(rp)
+       mlr     %r10, %r11
+       alcr    %r6, %r6
+       alcr    %r7, %r7
+       alcr    %r10, zero              C propagate carry to high product limb
+       alr     %r6, %r0
+       alcr    %r7, %r11
+       stm     %r6, %r7, 4(rp)
+       la      rp, 8(rp)
+       lr      %r0, %r10               C copy carry limb
+       brct    n, L(top)
+
+       alcr    %r0, zero
+       st      %r0, 4(rp)
+
+       lm      %r6, %r14, 24(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/esame/submul_1.asm b/mpn/s390_32/esame/submul_1.asm

new file mode 100644 (file)

index 0000000..3fd6d56
--- /dev/null
+++ b/mpn/s390_32/esame/submul_1.asm
@@ -0,0 +1,59 @@
+dnl  S/390-32 mpn_submul_1 for systems with MLR instruction.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900         20
+C z990         11
+C z9            ?
+C z10           ?
+C z196          ?
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`v0',   `%r5')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       stm     %r9, %r12, 36(%r15)
+       lhi     %r12, 0
+       slr     %r11, %r11
+
+L(top):        l       %r1, 0(%r12, up)
+       l       %r10, 0(%r12, rp)
+       mlr     %r0, v0
+       slbr    %r10, %r1
+       slbr    %r9, %r9
+       slr     %r0, %r9                C conditional incr
+       slr     %r10, %r11
+       lr      %r11, %r0
+       st      %r10, 0(%r12, rp)
+       la      %r12, 4(%r12)
+       brct    %r4,  L(top)
+
+       lr      %r2, %r11
+       slbr    %r9, %r9
+       slr     %r2, %r9
+
+       lm      %r9, %r12, 36(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/gmp-mparam.h b/mpn/s390_32/gmp-mparam.h

index 858d9401994bd613fa52499205576ee0393eb637..795ae9e4b996e020bd8a68c8f4c23163cbd6491e 100644 (file)
--- a/mpn/s390_32/gmp-mparam.h
+++ b/mpn/s390_32/gmp-mparam.h
@@ -18,104 +18,111 @@ License for more details.
  You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
-
  #define GMP_LIMB_BITS 32
  #define BYTES_PER_MP_LIMB 4
  
+/* 770 MHz IBM z900 running in 32-bit mode, using just traditional insns */
+
  #define DIVREM_1_NORM_THRESHOLD              0  /* always */
  #define DIVREM_1_UNNORM_THRESHOLD            5
+#define MOD_1_1P_METHOD                      2
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         13
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1_UNNORM_THRESHOLD               5
+#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         15
  #define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        35
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     21
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        30
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
  #define USE_PREINV_DIVREM_1                  1
-#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           30
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
  
-#define MUL_TOOM22_THRESHOLD                22
-#define MUL_TOOM33_THRESHOLD                89
-#define MUL_TOOM44_THRESHOLD               202
-#define MUL_TOOM6H_THRESHOLD               270
-#define MUL_TOOM8H_THRESHOLD               406
+#define MUL_TOOM22_THRESHOLD                19
+#define MUL_TOOM33_THRESHOLD               114
+#define MUL_TOOM44_THRESHOLD               166
+#define MUL_TOOM6H_THRESHOLD               226
+#define MUL_TOOM8H_THRESHOLD               333
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     129
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     139
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     127
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     106
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD     113
  
-#define SQR_BASECASE_THRESHOLD               8
-#define SQR_TOOM2_THRESHOLD                 52
-#define SQR_TOOM3_THRESHOLD                125
-#define SQR_TOOM4_THRESHOLD                226
-#define SQR_TOOM6_THRESHOLD                306
-#define SQR_TOOM8_THRESHOLD                430
+#define SQR_BASECASE_THRESHOLD               7
+#define SQR_TOOM2_THRESHOLD                 40
+#define SQR_TOOM3_THRESHOLD                126
+#define SQR_TOOM4_THRESHOLD                192
+#define SQR_TOOM6_THRESHOLD                246
+#define SQR_TOOM8_THRESHOLD                357
+
+#define MULMID_TOOM42_THRESHOLD             28
  
-#define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               17
+#define MULMOD_BNM1_THRESHOLD               12
+#define SQRMOD_BNM1_THRESHOLD               18
  
-#define MUL_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     15, 6}, \
-    {     31, 7}, {     19, 8}, {     11, 7}, {     27, 9}, \
-    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
-    {     39, 8}, {     23, 7}, {     47, 8}, {     27, 9}, \
-    {     15, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
-    {     15, 9}, {     31, 8}, {     63, 9}, {     39, 8}, \
-    {     83, 9}, {     47,10}, {     31, 9}, {     79,10}, \
-    {     47,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 42
-#define MUL_FFT_THRESHOLD                 3520
-
-#define SQR_FFT_MODF_THRESHOLD             276  /* k = 5 */
+  { {    244, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
+    {      8, 5}, {     17, 6}, {     13, 7}, {      7, 6}, \
+    {     16, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
+    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
+    {     33, 8}, {     19, 7}, {     39, 8}, {     23, 7}, \
+    {     47, 8}, {     27, 9}, {     15, 8}, {     39, 9}, \
+    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     39, 8}, {     79, 9}, {     47,10}, \
+    {     31, 9}, {     63, 8}, {    127, 9}, {     71, 8}, \
+    {    143, 9}, {     79,10}, {     47,11}, {   2048,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 48
+#define MUL_FFT_THRESHOLD                 2688
+
+#define SQR_FFT_MODF_THRESHOLD             216  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    276, 5}, {     19, 6}, {     17, 7}, {      9, 6}, \
-    {     20, 7}, {     11, 6}, {     23, 7}, {     19, 8}, \
-    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
-    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
-    {     47,10}, {     15, 9}, {     31, 8}, {     63, 9}, \
-    {     39, 8}, {     79, 9}, {     47,10}, {     31, 9}, \
-    {     79,10}, {     47,11}, {   2048,12}, {   4096,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 35
-#define SQR_FFT_THRESHOLD                 2688
+  { {    216, 5}, {      7, 4}, {     15, 5}, {     17, 6}, \
+    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
+    {     20, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
+    {      7, 7}, {     19, 8}, {     11, 7}, {     25, 9}, \
+    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
+    {     39, 8}, {     23, 9}, {     15, 8}, {     39, 9}, \
+    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
+    {     63, 9}, {     39, 8}, {     79, 9}, {     47,10}, \
+    {     31, 9}, {     63, 8}, {    127, 9}, {     71, 8}, \
+    {    143, 9}, {     79,10}, {     47,11}, {   2048,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 44
+#define SQR_FFT_THRESHOLD                 1856
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  54
-#define MULLO_MUL_N_THRESHOLD             6633
-
-#define DC_DIV_QR_THRESHOLD                 52
-#define DC_DIVAPPR_Q_THRESHOLD             185
-#define DC_BDIV_QR_THRESHOLD                53
-#define DC_BDIV_Q_THRESHOLD                122
-
-#define INV_MULMOD_BNM1_THRESHOLD           29
-#define INV_NEWTON_THRESHOLD               260
-#define INV_APPR_THRESHOLD                 220
-
-#define BINV_NEWTON_THRESHOLD              230
-#define REDC_1_TO_REDC_N_THRESHOLD          56
-
-#define MU_DIV_QR_THRESHOLD               1142
-#define MU_DIVAPPR_Q_THRESHOLD            1234
-#define MUPI_DIV_QR_THRESHOLD              114
-#define MU_BDIV_QR_THRESHOLD               792
-#define MU_BDIV_Q_THRESHOLD               1099
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     151
-#define GCD_DC_THRESHOLD                   599
-#define GCDEXT_DC_THRESHOLD                460
+#define MULLO_DC_THRESHOLD                  61
+#define MULLO_MUL_N_THRESHOLD             5240
+
+#define DC_DIV_QR_THRESHOLD                 70
+#define DC_DIVAPPR_Q_THRESHOLD             234
+#define DC_BDIV_QR_THRESHOLD                59
+#define DC_BDIV_Q_THRESHOLD                137
+
+#define INV_MULMOD_BNM1_THRESHOLD           36
+#define INV_NEWTON_THRESHOLD               327
+#define INV_APPR_THRESHOLD                 268
+
+#define BINV_NEWTON_THRESHOLD              324
+#define REDC_1_TO_REDC_N_THRESHOLD          63
+
+#define MU_DIV_QR_THRESHOLD               1099
+#define MU_DIVAPPR_Q_THRESHOLD            1360
+#define MUPI_DIV_QR_THRESHOLD              138
+#define MU_BDIV_QR_THRESHOLD               889
+#define MU_BDIV_Q_THRESHOLD               1234
+
+#define MATRIX22_STRASSEN_THRESHOLD         18
+#define HGCD_THRESHOLD                     167
+#define GCD_DC_THRESHOLD                   518
+#define GCDEXT_DC_THRESHOLD                378
  #define JACOBI_BASE_METHOD                   2
  
-#define GET_STR_DC_THRESHOLD                15
-#define GET_STR_PRECOMPUTE_THRESHOLD        35
-#define SET_STR_DC_THRESHOLD               915
-#define SET_STR_PRECOMPUTE_THRESHOLD      1670
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        25
+#define SET_STR_DC_THRESHOLD               577
+#define SET_STR_PRECOMPUTE_THRESHOLD      1217
diff --git a/mpn/s390_32/logops_n.asm b/mpn/s390_32/logops_n.asm

new file mode 100644 (file)

index 0000000..61472ac
--- /dev/null
+++ b/mpn/s390_32/logops_n.asm
@@ -0,0 +1,284 @@
+dnl  S/390-32 logops.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb     variant 1           variant 2       variant 3
+C              rp!=up  rp=up
+C z900          ?       ?               ?               ?
+C z990          2.5     1               2.75            2.75
+C z9            ?                       ?               ?
+C z10           ?                       ?               ?
+C z196          ?                       ?               ?
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`vp',   `%r4')
+define(`nn',   `%r5')
+
+ifdef(`OPERATION_and_n',`
+  define(`func',`mpn_and_n')
+  define(`VARIANT_1')
+  define(`LOGOPC',`nc')
+  define(`LOGOP',`n')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',`mpn_andn_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`n')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',`mpn_nand_n')
+  define(`VARIANT_3')
+  define(`LOGOP',`n')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',`mpn_ior_n')
+  define(`VARIANT_1')
+  define(`LOGOPC',`oc')
+  define(`LOGOP',`o')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',`mpn_iorn_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`o')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',`mpn_nior_n')
+  define(`VARIANT_3')
+  define(`LOGOP',`o')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',`mpn_xor_n')
+  define(`VARIANT_1')
+  define(`LOGOPC',`xc')
+  define(`LOGOP',`x')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',`mpn_xnor_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`x')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+ifdef(`VARIANT_1',`
+       cr      rp, up
+       jne     L(normal)
+
+       sll     nn, 2
+       ahi     nn, -1
+       lr      %r1, nn
+       srl     %r1, 8
+       ltr     %r1, %r1                C < 256 bytes to copy?
+       je      L(1)
+
+L(tp): LOGOPC  0(256, rp), 0(vp)
+       la      rp, 256(rp)
+       la      vp, 256(vp)
+       brct    %r1, L(tp)
+
+L(1):  bras    %r1, L(2)               C make r1 point to mvc insn
+       LOGOPC  0(1, rp), 0(vp)
+L(2):  ex      nn, 0(%r1)              C execute mvc with length ((nn-1) mod 256)+1
+L(rtn):        br      %r14
+
+
+L(normal):
+       stm     %r6, %r8, 12(%r15)
+       ahi     nn, 3
+       lhi     %r7, 3
+       lr      %r0, nn
+       srl     %r0, 2
+       nr      %r7, nn                 C nn mod 4
+       je      L(b1)
+       chi     %r7, 2
+       jl      L(b2)
+       jne     L(top)
+
+L(b3): lm      %r5, %r7, 0(up)
+       la      up, 12(up)
+       LOGOP   %r5, 0(vp)
+       LOGOP   %r6, 4(vp)
+       LOGOP   %r7, 8(vp)
+       stm     %r5, %r7, 0(rp)
+       la      rp, 12(rp)
+       la      vp, 12(vp)
+       j       L(mid)
+
+L(b1): l       %r5, 0(up)
+       la      up, 4(up)
+       LOGOP   %r5, 0(vp)
+       st      %r5, 0(rp)
+       la      rp, 4(rp)
+       la      vp, 4(vp)
+       j       L(mid)
+
+L(b2): lm      %r5, %r6, 0(up)
+       la      up, 8(up)
+       LOGOP   %r5, 0(vp)
+       LOGOP   %r6, 4(vp)
+       stm     %r5, %r6, 0(rp)
+       la      rp, 8(rp)
+       la      vp, 8(vp)
+       j       L(mid)
+
+L(top):        lm      %r5, %r8, 0(up)
+       la      up, 16(up)
+       LOGOP   %r5, 0(vp)
+       LOGOP   %r6, 4(vp)
+       LOGOP   %r7, 8(vp)
+       LOGOP   %r8, 12(vp)
+       stm     %r5, %r8, 0(rp)
+       la      rp, 16(rp)
+       la      vp, 16(vp)
+L(mid):        brct    %r0, L(top)
+
+       lm      %r6, %r8, 12(%r15)
+       br      %r14
+')
+
+ifdef(`VARIANT_2',`
+       stm     %r6, %r8, 12(%r15)
+       lhi     %r1, -1
+
+       ahi     nn, 3
+       lhi     %r7, 3
+       lr      %r0, nn
+       srl     %r0, 2
+       nr      %r7, nn                 C nn mod 4
+       je      L(b1)
+       chi     %r7, 2
+       jl      L(b2)
+       jne     L(top)
+
+L(b3): lm      %r5, %r7, 0(vp)
+       la      vp, 12(vp)
+       xr      %r5, %r1
+       xr      %r6, %r1
+       xr      %r7, %r1
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 4(up)
+       LOGOP   %r7, 8(up)
+       stm     %r5, %r7, 0(rp)
+       la      rp, 12(rp)
+       la      up, 12(up)
+       j       L(mid)
+
+L(b1): l       %r5, 0(vp)
+       la      vp, 4(vp)
+       xr      %r5, %r1
+       LOGOP   %r5, 0(up)
+       st      %r5, 0(rp)
+       la      rp, 4(rp)
+       la      up, 4(up)
+       j       L(mid)
+
+L(b2): lm      %r5, %r6, 0(vp)
+       la      vp, 8(vp)
+       xr      %r5, %r1
+       xr      %r6, %r1
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 4(up)
+       stm     %r5, %r6, 0(rp)
+       la      rp, 8(rp)
+       la      up, 8(up)
+       j       L(mid)
+
+L(top):        lm      %r5, %r8, 0(vp)
+       la      vp, 16(vp)
+       xr      %r5, %r1
+       xr      %r6, %r1
+       xr      %r7, %r1
+       xr      %r8, %r1
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 4(up)
+       LOGOP   %r7, 8(up)
+       LOGOP   %r8, 12(up)
+       la      up, 16(up)
+       stm     %r5, %r8, 0(rp)
+       la      rp, 16(rp)
+L(mid):        brct    %r0, L(top)
+
+       lm      %r6, %r8, 12(%r15)
+       br      %r14
+')
+
+ifdef(`VARIANT_3',`
+       stm     %r6, %r8, 12(%r15)
+       lhi     %r1, -1
+
+       ahi     nn, 3
+       lhi     %r7, 3
+       lr      %r0, nn
+       srl     %r0, 2
+       nr      %r7, nn                 C nn mod 4
+       je      L(b1)
+       chi     %r7, 2
+       jl      L(b2)
+       jne     L(top)
+
+L(b3): lm      %r5, %r7, 0(vp)
+       la      vp, 12(vp)
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 4(up)
+       xr      %r5, %r1
+       xr      %r6, %r1
+       LOGOP   %r7, 8(up)
+       xr      %r7, %r1
+       stm     %r5, %r7, 0(rp)
+       la      rp, 12(rp)
+       la      up, 12(up)
+       j       L(mid)
+
+L(b1): l       %r5, 0(vp)
+       la      vp, 4(vp)
+       LOGOP   %r5, 0(up)
+       xr      %r5, %r1
+       st      %r5, 0(rp)
+       la      rp, 4(rp)
+       la      up, 4(up)
+       j       L(mid)
+
+L(b2): lm      %r5, %r6, 0(vp)
+       la      vp, 8(vp)
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 4(up)
+       xr      %r5, %r1
+       xr      %r6, %r1
+       stm     %r5, %r6, 0(rp)
+       la      rp, 8(rp)
+       la      up, 8(up)
+       j       L(mid)
+
+L(top):        lm      %r5, %r8, 0(vp)
+       la      vp, 16(vp)
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 4(up)
+       xr      %r5, %r1
+       xr      %r6, %r1
+       LOGOP   %r7, 8(up)
+       LOGOP   %r8, 12(up)
+       xr      %r7, %r1
+       xr      %r8, %r1
+       stm     %r5, %r8, 0(rp)
+       la      up, 16(up)
+       la      rp, 16(rp)
+L(mid):        brct    %r0, L(top)
+
+       lm      %r6, %r8, 12(%r15)
+       br      %r14
+')
+
+EPILOGUE()
diff --git a/mpn/s390_32/lshift.asm b/mpn/s390_32/lshift.asm

new file mode 100644 (file)

index 0000000..17e5265
--- /dev/null
+++ b/mpn/s390_32/lshift.asm
@@ -0,0 +1,133 @@
+dnl  S/390-32 mpn_lshift.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          6
+C z990          3
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  *
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`cnt',  `%r5')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       lr      %r1, n
+       sll     %r1, 2
+       stm     %r6, %r12, 24(%r15)
+       la      up, 0(%r1,up)           C put up near end of U
+       la      rp, 0(%r1,rp)           C put rp near end of R
+       ahi     up, -20
+       ahi     rp, -16
+       lhi     %r8, 32
+       sr      %r8, cnt
+       l       %r12, 16(up)
+       srl     %r12, 0(%r8)            C return value
+       lhi     %r7, 3
+       nr      %r7, n
+       srl     n, 2
+       je      L(b0)
+       chi     %r7, 2
+       jl      L(b1)
+       je      L(b2)
+
+L(b3): l       %r10, 16(up)
+       l       %r11, 12(up)
+       l       %r9,   8(up)
+       ahi     up, -8
+       lr      %r8, %r11
+       sldl    %r10, 0(cnt)
+       sldl    %r8,  0(cnt)
+       st      %r10, 12(rp)
+       st      %r8,   8(rp)
+       ahi     rp, -8
+       ltr     n, n
+       je      L(end)
+       j       L(top)
+
+L(b2): l       %r10, 16(up)
+       l       %r11, 12(up)
+       ahi     up, -4
+       sldl    %r10, 0(cnt)
+       st      %r10, 12(rp)
+       ahi     rp, -4
+       ltr     n, n
+       je      L(end)
+       j       L(top)
+
+L(b1): ltr     n, n
+       je      L(end)
+       j       L(top)
+
+L(b0): l       %r10,16(up)
+       l       %r8, 12(up)
+       l       %r6,  8(up)
+       l       %r0,  4(up)
+       ahi     up, -12
+       lr      %r11, %r8
+       lr      %r9,  %r6
+       lr      %r7,  %r0
+       sldl    %r10,0(cnt)
+       sldl    %r8, 0(cnt)
+       sldl    %r6, 0(cnt)
+       st      %r10, 12(rp)
+       st      %r8,   8(rp)
+       st      %r6,   4(rp)
+       ahi     rp, -12
+       ahi     n, -1
+       je      L(end)
+
+       ALIGN(8)
+L(top):        l       %r10, 16(up)
+       l       %r8,  12(up)
+       l       %r6,   8(up)
+       l       %r0,   4(up)
+       l       %r1,   0(up)
+       lr      %r11, %r8
+       lr      %r9,  %r6
+       lr      %r7,  %r0
+       ahi     up, -16
+       sldl    %r10, 0(cnt)
+       sldl    %r8,  0(cnt)
+       sldl    %r6,  0(cnt)
+       sldl    %r0,  0(cnt)
+       st      %r10, 12(rp)
+       st      %r8,   8(rp)
+       st      %r6,   4(rp)
+       st      %r0,   0(rp)
+       ahi     rp, -16
+       brct    n, L(top)
+
+L(end):        l       %r10, 16(up)
+       sll     %r10, 0(cnt)
+       st      %r10, 12(rp)
+
+       lr      %r2, %r12
+       lm      %r6, %r12, 24(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/lshiftc.asm b/mpn/s390_32/lshiftc.asm

new file mode 100644 (file)

index 0000000..9bdd0d7
--- /dev/null
+++ b/mpn/s390_32/lshiftc.asm
@@ -0,0 +1,145 @@
+dnl  S/390-32 mpn_lshiftc.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          7
+C z990          3.375
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  *
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`cnt',  `%r5')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+       lr      %r1, n
+       sll     %r1, 2
+       stm     %r6, %r13, 24(%r15)
+       la      up, 0(%r1,up)           C put up near end of U
+       la      rp, 0(%r1,rp)           C put rp near end of R
+       ahi     up, -20
+       ahi     rp, -16
+       lhi     %r8, 32
+       sr      %r8, cnt
+       l       %r12, 16(up)
+       srl     %r12, 0(%r8)            C return value
+       lhi     %r13, -1
+       lhi     %r7, 3
+       nr      %r7, n
+       srl     n, 2
+       je      L(b0)
+       chi     %r7, 2
+       jl      L(b1)
+       je      L(b2)
+
+L(b3): l       %r10, 16(up)
+       l       %r11, 12(up)
+       l       %r9,   8(up)
+       ahi     up, -8
+       lr      %r8, %r11
+       sldl    %r10, 0(cnt)
+       sldl    %r8,  0(cnt)
+       xr      %r10, %r13
+       xr      %r8, %r13
+       st      %r10, 12(rp)
+       st      %r8,   8(rp)
+       ahi     rp, -8
+       ltr     n, n
+       je      L(end)
+       j       L(top)
+
+L(b2): l       %r10, 16(up)
+       l       %r11, 12(up)
+       ahi     up, -4
+       sldl    %r10, 0(cnt)
+       xr      %r10, %r13
+       st      %r10, 12(rp)
+       ahi     rp, -4
+       ltr     n, n
+       je      L(end)
+       j       L(top)
+
+L(b1): ltr     n, n
+       je      L(end)
+       j       L(top)
+
+L(b0): l       %r10,16(up)
+       l       %r8, 12(up)
+       l       %r6,  8(up)
+       l       %r0,  4(up)
+       ahi     up, -12
+       lr      %r11, %r8
+       lr      %r9,  %r6
+       lr      %r7,  %r0
+       sldl    %r10,0(cnt)
+       sldl    %r8, 0(cnt)
+       sldl    %r6, 0(cnt)
+       xr      %r10, %r13
+       xr      %r8, %r13
+       xr      %r6, %r13
+       st      %r10, 12(rp)
+       st      %r8,   8(rp)
+       st      %r6,   4(rp)
+       ahi     rp, -12
+       ahi     n, -1
+       je      L(end)
+
+       ALIGN(8)
+L(top):        l       %r10, 16(up)
+       l       %r8,  12(up)
+       l       %r6,   8(up)
+       l       %r0,   4(up)
+       l       %r1,   0(up)
+       lr      %r11, %r8
+       lr      %r9,  %r6
+       lr      %r7,  %r0
+       ahi     up, -16
+       sldl    %r10, 0(cnt)
+       sldl    %r8,  0(cnt)
+       sldl    %r6,  0(cnt)
+       sldl    %r0,  0(cnt)
+       xr      %r10, %r13
+       xr      %r8, %r13
+       xr      %r6, %r13
+       xr      %r0, %r13
+       st      %r10, 12(rp)
+       st      %r8,   8(rp)
+       st      %r6,   4(rp)
+       st      %r0,   0(rp)
+       ahi     rp, -16
+       brct    n, L(top)
+
+L(end):        l       %r10, 16(up)
+       sll     %r10, 0(cnt)
+       xr      %r10, %r13
+       st      %r10, 12(rp)
+
+       lr      %r2, %r12
+       lm      %r6, %r13, 24(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_32/rshift.asm b/mpn/s390_32/rshift.asm

new file mode 100644 (file)

index 0000000..becbe18
--- /dev/null
+++ b/mpn/s390_32/rshift.asm
@@ -0,0 +1,127 @@
+dnl  S/390-32 mpn_rshift.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          6
+C z990          3
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  *
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`cnt',  `%r5')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       stm     %r6, %r12, 24(%r15)
+       lhi     %r8, 32
+       sr      %r8, cnt
+       l       %r12, 0(up)
+       sll     %r12, 0(%r8)            C return value
+       lhi     %r7, 3
+       nr      %r7, n
+       srl     n, 2
+       je      L(b0)
+       chi     %r7, 2
+       jl      L(b1)
+       je      L(b2)
+
+L(b3): l       %r11, 0(up)
+       l       %r10, 4(up)
+       l       %r8,  8(up)
+       ahi     up, 8
+       lr      %r9, %r10
+       srdl    %r10, 0(cnt)
+       srdl    %r8,  0(cnt)
+       st      %r11, 0(rp)
+       st      %r9,  4(rp)
+       ahi     rp, 8
+       ltr     n, n
+       je      L(end)
+       j       L(top)
+
+L(b2): l       %r11, 0(up)
+       l       %r10, 4(up)
+       ahi     up, 4
+       srdl    %r10, 0(cnt)
+       st      %r11, 0(rp)
+       ahi     rp, 4
+       ltr     n, n
+       je      L(end)
+       j       L(top)
+
+L(b1): ltr     n, n
+       je      L(end)
+       j       L(top)
+
+L(b0): l       %r11, 0(up)
+       l       %r9,  4(up)
+       l       %r7,  8(up)
+       l       %r1, 12(up)
+       ahi     up, 12
+       lr      %r10, %r9
+       lr      %r8,  %r7
+       lr      %r6,  %r1
+       srdl    %r10, 0(cnt)
+       srdl    %r8,  0(cnt)
+       srdl    %r6,  0(cnt)
+       st      %r11, 0(rp)
+       st      %r9,  4(rp)
+       st      %r7,  8(rp)
+       ahi     rp, 12
+       ahi     n, -1
+       je      L(end)
+
+       ALIGN(8)
+L(top):        l       %r11, 0(up)
+       l       %r9,  4(up)
+       l       %r7,  8(up)
+       l       %r1, 12(up)
+       l       %r0, 16(up)
+       lr      %r10, %r9
+       lr      %r8,  %r7
+       lr      %r6,  %r1
+       ahi     up, 16
+       srdl    %r10, 0(cnt)
+       srdl    %r8,  0(cnt)
+       srdl    %r6,  0(cnt)
+       srdl    %r0,  0(cnt)
+       st      %r11, 0(rp)
+       st      %r9,  4(rp)
+       st      %r7,  8(rp)
+       st      %r1, 12(rp)
+       ahi     rp, 16
+       brct    n, L(top)
+
+L(end):        l       %r11, 0(up)
+       srl     %r11, 0(cnt)
+       st      %r11, 0(rp)
+
+       lr      %r2, %r12
+       lm      %r6, %r12, 24(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/README b/mpn/s390_64/README

new file mode 100644 (file)

index 0000000..82b68a0
--- /dev/null
+++ b/mpn/s390_64/README
@@ -0,0 +1,77 @@
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+There are 5 generations of 64-but s390 processors, z900, z990, z9,
+z10, and z196.  The current GMP code was optimised for the two oldest,
+z900 and z990.
+
+
+mpn_copyi
+
+This code makes use of a loop around MVC.  It almost surely runs very
+close to optimally.  A small improvement could be done by using one
+MVC for size 256 bytes, now we use two (we use an extra MVC when
+copying any multiple of 256 bytes).
+
+
+mpn_copyd
+
+We have tried several feed-in variants here, branch tree, jump table
+and computed goto.  The fastest (on z990) turned out to be computed
+goto.
+
+An approach not tried is EX of LMG and STMG, modifying the register set
+on-the-fly.  Using that trick, we could completely avoid using
+separate feed-in paths.
+
+
+mpn_lshift, mpn_rshift
+
+The current code runs at pipeline decode bandwith on z990.
+
+
+mpn_add_n, mpn_sub_n
+
+The current code is 4-way unrolled.  It should be unrolled more, at
+least 8x, in order to reach 2.5 c/l.
+
+
+mpn_mul_1, mpn_addmul_1, mpn_submul_1
+
+The current code is very naive, but due to the non-pipelined nature of
+MLGR on z900 and z990, more sophisticated code would not gain much.
+
+On z10 one would need to cluster at least 4 MLGR together, in order to
+reduce stalling.
+
+On z196, one surely want to use unrolling and pipelining, to perhaps
+reach around 12 c/l.  A major issue here and on z10 is ALCGR's 3 cycle
+stalling.
+
+
+mpn_mul_2, mpn_addmul_2
+
+At least for older machines (z900, z990) with very slow MLGR, we
+should use Karatsuba's algorithm on 2-limb units, making mul_2 and
+addmul_2 the main multiplicaton primitives.  The newer machines might
+benefit less from this approach, perhaps in particular z10, where MLGR
+clustering is more important.
+
+With Karatsuba, one could hope for around 16 cycles per accumulated
+128 cross product, on z990.
diff --git a/mpn/s390_64/addmul_1.asm b/mpn/s390_64/addmul_1.asm

new file mode 100644 (file)

index 0000000..ad4c74c
--- /dev/null
+++ b/mpn/s390_64/addmul_1.asm
@@ -0,0 +1,61 @@
+dnl  S/390-64 mpn_addmul_1
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900         34
+C z990         23
+C z9            ?
+C z10           ?
+C z196          ?
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`v0',   `%r5')
+
+define(`z',    `%r9')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       stmg    %r9, %r12, 72(%r15)
+       lghi    %r12, 0                 C zero index register
+       aghi    %r12, 0                 C clear carry flag
+       lghi    %r11, 0                 C clear carry limb
+       lghi    z, 0                    C keep register zero
+
+L(top):        lg      %r1, 0(%r12,up)
+       lg      %r10, 0(%r12,rp)
+       mlgr    %r0, v0
+       alcgr   %r1, %r10
+       alcgr   %r0, z
+       algr    %r1, %r11
+       lgr     %r11, %r0
+       stg     %r1, 0(%r12,rp)
+       la      %r12, 8(%r12)
+       brctg   n, L(top)
+
+       lghi    %r2, 0
+       alcgr   %r2, %r11
+
+       lmg     %r9, %r12, 72(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/aorrlsh1_n.asm b/mpn/s390_64/aorrlsh1_n.asm

new file mode 100644 (file)

index 0000000..3bd9b11
--- /dev/null
+++ b/mpn/s390_64/aorrlsh1_n.asm
@@ -0,0 +1,157 @@
+dnl  S/390-64 mpn_addlsh1_n and mpn_rsblsh1_n.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          9
+C z990          4.75
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Optimise for small n, avoid 'la' like in aors_n.asm.
+C  * Tune to reach 3.5 c/l.  For addlsh1, we could let the main alcgr propagate
+C    carry to the lsh1 alcgr.
+C  * Compute RETVAL for sublsh1_n less stupidly.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`vp',   `%r4')
+define(`n',    `%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADSB,         alg)
+  define(ADSBC,                alcg)
+  define(INITCY,       `lghi   %r9, -1')
+  define(RETVAL,       `la     %r2, 2(%r1,%r9)')
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_rsblsh1_n',`
+  define(ADSB,         slg)
+  define(ADSBC,                slbg)
+  define(INITCY,       `lghi   %r9, 0')
+  define(RETVAL,`dnl
+       algr    %r1, %r9
+       lghi    %r2, 1
+       algr    %r2, %r1')
+  define(func, mpn_rsblsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+       stmg    %r6, %r9, 48(%r15)
+
+       aghi    n, 3
+       lghi    %r7, 3
+       srlg    %r0, n, 2
+       ngr     %r7, n                  C n mod 4
+       je      L(b1)
+       cghi    %r7, 2
+       jl      L(b2)
+       jne     L(b0)
+
+L(b3): lmg     %r5, %r7, 0(vp)
+       la      vp, 24(vp)
+
+       algr    %r5, %r5
+       alcgr   %r6, %r6
+       alcgr   %r7, %r7
+       slbgr   %r1, %r1
+
+       ADSB    %r5, 0(up)
+       ADSBC   %r6, 8(up)
+       ADSBC   %r7, 16(up)
+       la      up, 24(up)
+       slbgr   %r9, %r9
+
+       stmg    %r5, %r7, 0(rp)
+       la      rp, 24(rp)
+       brctg   %r0, L(top)
+       j       L(end)
+
+L(b0): lghi    %r1, -1
+       INITCY
+       j       L(top)
+
+L(b1): lg      %r5, 0(vp)
+       la      vp, 8(vp)
+
+       algr    %r5, %r5
+       slbgr   %r1, %r1
+       ADSB    %r5, 0(up)
+       la      up, 8(up)
+       slbgr   %r9, %r9
+
+       stg     %r5, 0(rp)
+       la      rp, 8(rp)
+       brctg   %r0, L(top)
+       j       L(end)
+
+L(b2): lmg     %r5, %r6, 0(vp)
+       la      vp, 16(vp)
+
+       algr    %r5, %r5
+       alcgr   %r6, %r6
+       slbgr   %r1, %r1
+
+       ADSB    %r5, 0(up)
+       ADSBC   %r6, 8(up)
+       la      up, 16(up)
+       slbgr   %r9, %r9
+
+       stmg    %r5, %r6, 0(rp)
+       la      rp, 16(rp)
+       brctg   %r0, L(top)
+       j       L(end)
+
+L(top):        lmg     %r5, %r8, 0(vp)
+       la      vp, 32(vp)
+
+       aghi    %r1, 1                  C restore carry
+
+       alcgr   %r5, %r5
+       alcgr   %r6, %r6
+       alcgr   %r7, %r7
+       alcgr   %r8, %r8
+
+       slbgr   %r1, %r1                C save carry
+
+       aghi    %r9, 1                  C restore carry
+
+       ADSBC   %r5, 0(up)
+       ADSBC   %r6, 8(up)
+       ADSBC   %r7, 16(up)
+       ADSBC   %r8, 24(up)
+       la      up, 32(up)
+
+       slbgr   %r9, %r9                C save carry
+
+       stmg    %r5, %r8, 0(rp)
+       la      rp, 32(rp)
+       brctg   %r0, L(top)
+
+L(end):        RETVAL
+       lmg     %r6, %r9, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/aors_n.asm b/mpn/s390_64/aors_n.asm

new file mode 100644 (file)

index 0000000..6d22ff9
--- /dev/null
+++ b/mpn/s390_64/aors_n.asm
@@ -0,0 +1,125 @@
+dnl  S/390-64 mpn_add_n and mpn_sub_n.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          5.5
+C z990          3
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Optimise for small n
+C  * Use r0 and save/restore one less register
+C  * Using logops_n's v1 inner loop operand order make the loop about 20%
+C    faster, at the expense of highly alignment-dependent performance.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`vp',   `%r4')
+define(`n',    `%r5')
+
+ifdef(`OPERATION_add_n', `
+  define(ADSB,         alg)
+  define(ADSBCR,       alcgr)
+  define(ADSBC,                alcg)
+  define(RETVAL,`dnl
+       lghi    %r2, 0
+       alcgr   %r2, %r2')
+  define(func,         mpn_add_n)
+  define(func_nc,      mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+  define(ADSB,         slg)
+  define(ADSBCR,       slbgr)
+  define(ADSBC,                slbg)
+  define(RETVAL,`dnl
+       slbgr   %r2, %r2
+       lcgr    %r2, %r2')
+  define(func,         mpn_sub_n)
+  define(func_nc,      mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+       stmg    %r6, %r8, 48(%r15)
+
+       aghi    n, 3
+       lghi    %r7, 3
+       srlg    %r1, n, 2
+       ngr     %r7, n                  C n mod 4
+       je      L(b1)
+       cghi    %r7, 2
+       jl      L(b2)
+       jne     L(b0)
+
+L(b3): lmg     %r5, %r7, 0(up)
+       la      up, 24(up)
+       ADSB    %r5, 0(vp)
+       ADSBC   %r6, 8(vp)
+       ADSBC   %r7, 16(vp)
+       la      vp, 24(vp)
+       stmg    %r5, %r7, 0(rp)
+       la      rp, 24(rp)
+       brctg   %r1, L(top)
+       j       L(end)
+
+L(b0): lmg     %r5, %r8, 0(up)         C This redundant insns is no mistake,
+       la      up, 32(up)              C it is needed to make main loop run
+       ADSB    %r5, 0(vp)              C fast for n = 0 (mod 4).
+       ADSBC   %r6, 8(vp)
+       j       L(m0)
+
+L(b1): lg      %r5, 0(up)
+       la      up, 8(up)
+       ADSB    %r5, 0(vp)
+       la      vp, 8(vp)
+       stg     %r5, 0(rp)
+       la      rp, 8(rp)
+       brctg   %r1, L(top)
+       j       L(end)
+
+L(b2): lmg     %r5, %r6, 0(up)
+       la      up, 16(up)
+       ADSB    %r5, 0(vp)
+       ADSBC   %r6, 8(vp)
+       la      vp, 16(vp)
+       stmg    %r5, %r6, 0(rp)
+       la      rp, 16(rp)
+       brctg   %r1, L(top)
+       j       L(end)
+
+L(top):        lmg     %r5, %r8, 0(up)
+       la      up, 32(up)
+       ADSBC   %r5, 0(vp)
+       ADSBC   %r6, 8(vp)
+L(m0): ADSBC   %r7, 16(vp)
+       ADSBC   %r8, 24(vp)
+       la      vp, 32(vp)
+       stmg    %r5, %r8, 0(rp)
+       la      rp, 32(rp)
+       brctg   %r1, L(top)
+
+L(end):        RETVAL
+       lmg     %r6, %r8, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/bdiv_dbm1c.asm b/mpn/s390_64/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..21b0a0d
--- /dev/null
+++ b/mpn/s390_64/bdiv_dbm1c.asm
@@ -0,0 +1,54 @@
+dnl  S/390-64 mpn_bdiv_dbm1c
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900         29
+C z990         22
+C z9            ?
+C z10           ?
+C z196          ?
+
+C INPUT PARAMETERS
+define(`qp',     `%r2')
+define(`up',     `%r3')
+define(`n',      `%r4')
+define(`bd',     `%r5')
+define(`cy',     `%r6')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+       stmg    %r6, %r7, 48(%r15)
+       lghi    %r7, 0                  C zero index register
+
+L(top):        lg      %r1, 0(%r7,up)
+       mlgr    %r0, bd
+       slgr    %r6, %r1
+       stg     %r6, 0(%r7,qp)
+       la      %r7, 8(%r7)
+       slbgr   %r6, %r0
+       brctg   n, L(top)
+
+       lgr     %r2, %r6
+       lmg     %r6, %r7, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/copyd.asm b/mpn/s390_64/copyd.asm

new file mode 100644 (file)

index 0000000..4873a44
--- /dev/null
+++ b/mpn/s390_64/copyd.asm
@@ -0,0 +1,133 @@
+dnl  S/390-64 mpn_copyd
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          2.67
+C z990           1.5
+C z9            ?
+C z10           ?
+C z196          ?
+
+C FIXME:
+C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
+C    done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
+C    We could then use r3...r10 in main loop.
+C  * Could we use some EX trick, modifying lmg/stmg, for the feed-in code?
+
+C INPUT PARAMETERS
+define(`rp_param',     `%r2')
+define(`up_param',     `%r3')
+define(`n',            `%r4')
+
+define(`rp',   `%r8')
+define(`up',   `%r9')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+       stmg    %r6, %r11, 48(%r15)
+
+       sllg    %r1, n, 3
+       la      %r10, 8(n)
+       aghi    %r1, -64
+       srlg    %r10, %r10, 3
+       lghi    %r11, -64
+
+       la      rp, 0(%r1,rp_param)     C FIXME use lay on z990 and later
+       la      up, 0(%r1,up_param)     C FIXME use lay on z990 and later
+
+       lghi    %r7, 7
+       ngr     %r7, n                  C n mod 8
+       cghi    %r7, 2
+       jh      L(b34567)
+       cghi    %r7, 1
+       je      L(b1)
+       jh      L(b2)
+
+L(b0): brctg   %r10, L(top)
+       j       L(end)
+
+L(b1): lg      %r0, 56(up)
+       aghi    up, -8
+       stg     %r0, 56(rp)
+       aghi    rp, -8
+       brctg   %r10, L(top)
+       j       L(end)
+
+L(b2): lmg     %r0, %r1, 48(up)
+       aghi    up, -16
+       stmg    %r0, %r1, 48(rp)
+       aghi    rp, -16
+       brctg   %r10, L(top)
+       j       L(end)
+
+L(b34567):
+       cghi    %r7, 4
+       jl      L(b3)
+       je      L(b4)
+       cghi    %r7, 6
+       je      L(b6)
+       jh      L(b7)
+
+L(b5): lmg     %r0, %r4, 24(up)
+       aghi    up, -40
+       stmg    %r0, %r4, 24(rp)
+       aghi    rp, -40
+       brctg   %r10, L(top)
+       j       L(end)
+
+L(b3): lmg     %r0, %r2, 40(up)
+       aghi    up, -24
+       stmg    %r0, %r2, 40(rp)
+       aghi    rp, -24
+       brctg   %r10, L(top)
+       j       L(end)
+
+L(b4): lmg     %r0, %r3, 32(up)
+       aghi    up, -32
+       stmg    %r0, %r3, 32(rp)
+       aghi    rp, -32
+       brctg   %r10, L(top)
+       j       L(end)
+
+L(b6): lmg     %r0, %r5, 16(up)
+       aghi    up, -48
+       stmg    %r0, %r5, 16(rp)
+       aghi    rp, -48
+       brctg   %r10, L(top)
+       j       L(end)
+
+L(b7): lmg     %r0, %r6, 8(up)
+       aghi    up, -56
+       stmg    %r0, %r6, 8(rp)
+       aghi    rp, -56
+       brctg   %r10, L(top)
+       j       L(end)
+
+L(top):        lmg     %r0, %r7, 0(up)
+       la      up, 0(%r11,up)
+       stmg    %r0, %r7, 0(rp)
+       la      rp, 0(%r11,rp)
+       brctg   %r10, L(top)
+
+L(end):        lmg     %r6, %r11, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/copyi.asm b/mpn/s390_64/copyi.asm

new file mode 100644 (file)

index 0000000..a566968
--- /dev/null
+++ b/mpn/s390_64/copyi.asm
@@ -0,0 +1,57 @@
+dnl  S/390-64 mpn_copyi
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          1.25
+C z990           0.75
+C z9            ?
+C z10           ?
+C z196          ?
+
+C NOTE
+C  * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+       ltgr    %r4, %r4
+       sllg    %r4, %r4, 3
+       je      L(rtn)
+       aghi    %r4, -1
+       srlg    %r5, %r4, 8
+       ltgr    %r5, %r5                C < 256 bytes to copy?
+       je      L(1)
+
+L(top):        mvc     0(256, rp), 0(up)
+       la      rp, 256(rp)
+       la      up, 256(up)
+       brctg   %r5, L(top)
+
+L(1):  bras    %r5, L(2)               C make r5 point to mvc insn
+       mvc     0(1, rp), 0(up)
+L(2):  ex      %r4, 0(%r5)             C execute mvc with length ((n-1) mod 256)+1
+L(rtn):        br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/gmp-mparam.h b/mpn/s390_64/gmp-mparam.h

index 237dc286dea7daeceeddaeeb26e6b496858c5a59..c0ade71c270b8fdbf3037de1cf82e6c61221fbd5 100644 (file)
--- a/mpn/s390_64/gmp-mparam.h
+++ b/mpn/s390_64/gmp-mparam.h
@@ -18,108 +18,150 @@ License for more details.
  You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
-
  #define GMP_LIMB_BITS 64
  #define BYTES_PER_MP_LIMB 8
  
+/* 1200 MHz z990 */
+
  #define DIVREM_1_NORM_THRESHOLD              0  /* always */
  #define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      2
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        19
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        38
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     19
  #define USE_PREINV_DIVREM_1                  1
-#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD          101
-
-#define MUL_TOOM22_THRESHOLD                14
-#define MUL_TOOM33_THRESHOLD                74
-#define MUL_TOOM44_THRESHOLD               118
-#define MUL_TOOM6H_THRESHOLD               157
-#define MUL_TOOM8H_THRESHOLD               236
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      84
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      72
-
-#define SQR_BASECASE_THRESHOLD               4
-#define SQR_TOOM2_THRESHOLD                 26
-#define SQR_TOOM3_THRESHOLD                 87
-#define SQR_TOOM4_THRESHOLD                136
-#define SQR_TOOM6_THRESHOLD                171
-#define SQR_TOOM8_THRESHOLD                246
+#define BMOD_1_TO_MOD_1_THRESHOLD           88
+
+#define MUL_TOOM22_THRESHOLD                10
+#define MUL_TOOM33_THRESHOLD                41
+#define MUL_TOOM44_THRESHOLD               104
+#define MUL_TOOM6H_THRESHOLD               149
+#define MUL_TOOM8H_THRESHOLD               212
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      66
+
+#define SQR_BASECASE_THRESHOLD               0
+#define SQR_TOOM2_THRESHOLD                 16
+#define SQR_TOOM3_THRESHOLD                 57
+#define SQR_TOOM4_THRESHOLD                154
+#define SQR_TOOM6_THRESHOLD                206
+#define SQR_TOOM8_THRESHOLD                309
+
+#define MULMID_TOOM42_THRESHOLD             20
  
  #define MULMOD_BNM1_THRESHOLD                9
  #define SQRMOD_BNM1_THRESHOLD               11
  
-#define MUL_FFT_MODF_THRESHOLD             212  /* k = 5 */
+#define POWM_SEC_TABLE  4,23,128,598
+
+#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    212, 5}, {      9, 6}, {      5, 5}, {     11, 6}, \
-    {      6, 5}, {     13, 6}, {     13, 7}, {      7, 6}, \
-    {     17, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
+  { {    220, 5}, {      7, 4}, {     15, 5}, {      8, 4}, \
+    {     17, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
+    {     15, 7}, {      8, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
      {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
      {     13, 9}, {      7, 8}, {     19, 9}, {     11, 8}, \
-    {     23,10}, {      7, 9}, {     15, 8}, {     31, 9}, \
-    {     19, 8}, {     41, 9}, {     23,10}, {     15, 9}, \
-    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
-    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
-    {     47,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 50
+    {     25,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
+    {     19, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
+    {     39,10}, {     23,11}, {     15,10}, {     31, 9}, \
+    {     63,10}, {     39, 9}, {     79,10}, {     47,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
+    {     71, 9}, {    143, 8}, {    287,10}, {     79,11}, \
+    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    511,10}, {    143,11}, {     79,10}, \
+    {    159, 9}, {    319,10}, {    175, 9}, {    351, 8}, \
+    {    703,11}, {     95,10}, {    191, 9}, {    383,10}, \
+    {    207,11}, {    111,10}, {    223,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,11}, {    143,10}, \
+    {    287, 9}, {    575, 8}, {   1151,10}, {    319,11}, \
+    {    175,10}, {    351, 9}, {    703,12}, {     95,11}, \
+    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
+    {    415, 9}, {    831,11}, {    223,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    287,10}, \
+    {    575, 9}, {   1151,12}, {    159,11}, {    319,10}, \
+    {    639,11}, {    351,10}, {    703, 9}, {   1407, 8}, \
+    {   2815,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,12}, {    223,11}, {    447, 9}, {   1791,11}, \
+    {    479,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
+    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 124
  #define MUL_FFT_THRESHOLD                 2240
  
  #define SQR_FFT_MODF_THRESHOLD             184  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    184, 5}, {     11, 6}, {     13, 7}, {      7, 6}, \
-    {     15, 7}, {     13, 8}, {      7, 7}, {     16, 8}, \
-    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
-    {     13, 9}, {      7, 8}, {     19, 9}, {     11, 8}, \
-    {     23,10}, {      7, 9}, {     15, 8}, {     31, 9}, \
-    {     23,10}, {     15, 9}, {     39,10}, {     23,11}, \
-    {     15,10}, {     31, 9}, {     63, 8}, {    127,10}, \
-    {     47,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+  { {    184, 5}, {      6, 4}, {     13, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {     15, 7}, {      8, 6}, \
+    {     17, 7}, {     16, 8}, {      9, 7}, {     19, 8}, \
+    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
+    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
+    {     15, 8}, {     31, 9}, {     23,10}, {     15, 9}, \
+    {     39,10}, {     23,11}, {     15,10}, {     31, 9}, \
+    {     63,10}, {     47,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
+    {    287, 7}, {    575,10}, {     79,11}, {     47,12}, \
+    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
+    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
+    {    319, 8}, {    639,10}, {    175, 9}, {    351,11}, \
+    {     95,10}, {    191, 9}, {    383, 8}, {    767,11}, \
+    {    111,10}, {    223,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,10}, {    351,12}, {     95,11}, {    191,10}, \
+    {    383, 9}, {    767,11}, {    207,10}, {    415, 9}, \
+    {    831,11}, {    223,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    287,10}, {    575,12}, \
+    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
+    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,12}, {    223,11}, {    447,13}, {   8192,14}, \
      {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
      { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
      {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 42
-#define SQR_FFT_THRESHOLD                 1728
-
-#define MULLO_BASECASE_THRESHOLD             2
-#define MULLO_DC_THRESHOLD                  45
-#define MULLO_MUL_N_THRESHOLD             4392
-
-#define DC_DIV_QR_THRESHOLD                 40
-#define DC_DIVAPPR_Q_THRESHOLD             154
-#define DC_BDIV_QR_THRESHOLD                42
-#define DC_BDIV_Q_THRESHOLD                102
-
-#define INV_MULMOD_BNM1_THRESHOLD           26
-#define INV_NEWTON_THRESHOLD               226
-#define INV_APPR_THRESHOLD                 171
-
-#define BINV_NEWTON_THRESHOLD              222
-#define REDC_1_TO_REDC_N_THRESHOLD          46
-
-#define MU_DIV_QR_THRESHOLD                855
-#define MU_DIVAPPR_Q_THRESHOLD             942
-#define MUPI_DIV_QR_THRESHOLD               99
-#define MU_BDIV_QR_THRESHOLD               680
-#define MU_BDIV_Q_THRESHOLD                855
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                      89
-#define GCD_DC_THRESHOLD                   273
-#define GCDEXT_DC_THRESHOLD                209
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                32
-#define GET_STR_PRECOMPUTE_THRESHOLD        47
-#define SET_STR_DC_THRESHOLD               532
-#define SET_STR_PRECOMPUTE_THRESHOLD      1336
+#define SQR_FFT_TABLE3_SIZE 106
+#define SQR_FFT_THRESHOLD                 1600
+
+#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_DC_THRESHOLD                  33
+#define MULLO_MUL_N_THRESHOLD             5240
+
+#define DC_DIV_QR_THRESHOLD                 28
+#define DC_DIVAPPR_Q_THRESHOLD             106
+#define DC_BDIV_QR_THRESHOLD                31
+#define DC_BDIV_Q_THRESHOLD                 78
+
+#define INV_MULMOD_BNM1_THRESHOLD           43
+#define INV_NEWTON_THRESHOLD               130
+#define INV_APPR_THRESHOLD                 117
+
+#define BINV_NEWTON_THRESHOLD              149
+#define REDC_1_TO_REDC_N_THRESHOLD          38
+
+#define MU_DIV_QR_THRESHOLD                680
+#define MU_DIVAPPR_Q_THRESHOLD             748
+#define MUPI_DIV_QR_THRESHOLD               66
+#define MU_BDIV_QR_THRESHOLD               562
+#define MU_BDIV_Q_THRESHOLD                680
+
+#define MATRIX22_STRASSEN_THRESHOLD         11
+#define HGCD_THRESHOLD                      75
+#define HGCD_APPR_THRESHOLD                 59
+#define HGCD_REDUCE_THRESHOLD              901
+#define GCD_DC_THRESHOLD                   186
+#define GCDEXT_DC_THRESHOLD                150
+#define JACOBI_BASE_METHOD                   3
+
+#define GET_STR_DC_THRESHOLD                27
+#define GET_STR_PRECOMPUTE_THRESHOLD        40
+#define SET_STR_DC_THRESHOLD               418
+#define SET_STR_PRECOMPUTE_THRESHOLD      1111
diff --git a/mpn/s390_64/invert_limb.asm b/mpn/s390_64/invert_limb.asm

new file mode 100644 (file)

index 0000000..4d858d1
--- /dev/null
+++ b/mpn/s390_64/invert_limb.asm
@@ -0,0 +1,112 @@
+dnl  S/390-64 mpn_invert_limb
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900        142
+C z990          86
+C z9            ?
+C z10           ?
+C z196          ?
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_invert_limb)
+       stg     %r9, 72(%r15)
+       srlg    %r9, %r2, 55
+       agr     %r9, %r9
+       larl    %r4, approx_tab-512
+       srlg    %r3, %r2, 24
+       aghi    %r3, 1
+       lghi    %r5, 1
+       llgh    %r4, 0(%r9, %r4)
+       sllg    %r9, %r4, 11
+       msgr    %r4, %r4
+       msgr    %r4, %r3
+       srlg    %r4, %r4, 40
+       aghi    %r9, -1
+       sgr     %r9, %r4
+       sllg    %r0, %r9, 60
+       sllg    %r1, %r9, 13
+       msgr    %r9, %r9
+       msgr    %r9, %r3
+       sgr     %r0, %r9
+       ngr     %r5, %r2
+       srlg    %r4, %r2, 1
+       srlg    %r3, %r0, 47
+       agr     %r3, %r1
+       agr     %r4, %r5
+       msgr    %r4, %r3
+       srlg    %r1, %r3, 1
+       lcgr    %r5, %r5
+       ngr     %r1, %r5
+       sgr     %r1, %r4
+       mlgr    %r0, %r3
+       srlg    %r9, %r0, 1
+       sllg    %r4, %r3, 31
+       agr     %r4, %r9
+       lgr     %r1, %r4
+       mlgr    %r0, %r2
+       algr    %r1, %r2
+       alcgr   %r0, %r2
+       lgr     %r2, %r4
+       sgr     %r2, %r0
+       lg      %r9, 72(%r15)
+       br      %r14
+EPILOGUE()
+       RODATA
+       ALIGN(2)
+approx_tab:
+       .word   0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+       .word   0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+       .word   0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+       .word   0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+       .word   0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+       .word   0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+       .word   0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+       .word   0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+       .word   0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+       .word   0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+       .word   0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+       .word   0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+       .word   0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+       .word   0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+       .word   0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+       .word   0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+       .word   0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+       .word   0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+       .word   0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+       .word   0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+       .word   0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+       .word   0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+       .word   0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+       .word   0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+       .word   0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+       .word   0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+       .word   0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+       .word   0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+       .word   0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+       .word   0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+       .word   0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+       .word   0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
+ASM_END()
diff --git a/mpn/s390_64/logops_n.asm b/mpn/s390_64/logops_n.asm

new file mode 100644 (file)

index 0000000..ae1a1ab
--- /dev/null
+++ b/mpn/s390_64/logops_n.asm
@@ -0,0 +1,280 @@
+dnl  S/390-64 logops.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb     variant 1           variant 2       variant 3
+C              rp!=up  rp=up
+C z900          4.5     2.25            5.5             5.5
+C z990          2.75    2               3.25            3.25
+C z9            ?                       ?               ?
+C z10           ?                       ?               ?
+C z196          ?                       ?               ?
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`vp',   `%r4')
+define(`n',    `%r5')
+
+ifdef(`OPERATION_and_n',`
+  define(`func',`mpn_and_n')
+  define(`VARIANT_1')
+  define(`LOGOPC',`nc')
+  define(`LOGOP',`ng')')
+ifdef(`OPERATION_andn_n',`
+  define(`func',`mpn_andn_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`ng')')
+ifdef(`OPERATION_nand_n',`
+  define(`func',`mpn_nand_n')
+  define(`VARIANT_3')
+  define(`LOGOP',`ng')')
+ifdef(`OPERATION_ior_n',`
+  define(`func',`mpn_ior_n')
+  define(`VARIANT_1')
+  define(`LOGOPC',`oc')
+  define(`LOGOP',`og')')
+ifdef(`OPERATION_iorn_n',`
+  define(`func',`mpn_iorn_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`og')')
+ifdef(`OPERATION_nior_n',`
+  define(`func',`mpn_nior_n')
+  define(`VARIANT_3')
+  define(`LOGOP',`og')')
+ifdef(`OPERATION_xor_n',`
+  define(`func',`mpn_xor_n')
+  define(`VARIANT_1')
+  define(`LOGOPC',`xc')
+  define(`LOGOP',`xg')')
+ifdef(`OPERATION_xnor_n',`
+  define(`func',`mpn_xnor_n')
+  define(`VARIANT_2')
+  define(`LOGOP',`xg')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+ifdef(`VARIANT_1',`
+       cgr     rp, up
+       jne     L(normal)
+
+       sllg    n, n, 3
+       aghi    n, -1
+       srlg    %r1, n, 8
+       ltgr    %r1, %r1                C < 256 bytes to copy?
+       je      L(1)
+
+L(tp): LOGOPC  0(256, rp), 0(vp)
+       la      rp, 256(rp)
+       la      vp, 256(vp)
+       brctg   %r1, L(tp)
+
+L(1):  bras    %r1, L(2)               C make r1 point to mvc insn
+       LOGOPC  0(1, rp), 0(vp)
+L(2):  ex      n, 0(%r1)               C execute mvc with length ((n-1) mod 256)+1
+L(rtn):        br      %r14
+
+
+L(normal):
+       stmg    %r6, %r8, 48(%r15)
+       aghi    n, 3
+       lghi    %r7, 3
+       srlg    %r0, n, 2
+       ngr     %r7, n                  C n mod 4
+       je      L(b1)
+       cghi    %r7, 2
+       jl      L(b2)
+       jne     L(top)
+
+L(b3): lmg     %r5, %r7, 0(up)
+       la      up, 24(up)
+       LOGOP   %r5, 0(vp)
+       LOGOP   %r6, 8(vp)
+       LOGOP   %r7, 16(vp)
+       stmg    %r5, %r7, 0(rp)
+       la      rp, 24(rp)
+       la      vp, 24(vp)
+       j       L(mid)
+
+L(b1): lg      %r5, 0(up)
+       la      up, 8(up)
+       LOGOP   %r5, 0(vp)
+       stg     %r5, 0(rp)
+       la      rp, 8(rp)
+       la      vp, 8(vp)
+       j       L(mid)
+
+L(b2): lmg     %r5, %r6, 0(up)
+       la      up, 16(up)
+       LOGOP   %r5, 0(vp)
+       LOGOP   %r6, 8(vp)
+       stmg    %r5, %r6, 0(rp)
+       la      rp, 16(rp)
+       la      vp, 16(vp)
+       j       L(mid)
+
+L(top):        lmg     %r5, %r8, 0(up)
+       la      up, 32(up)
+       LOGOP   %r5, 0(vp)
+       LOGOP   %r6, 8(vp)
+       LOGOP   %r7, 16(vp)
+       LOGOP   %r8, 24(vp)
+       stmg    %r5, %r8, 0(rp)
+       la      rp, 32(rp)
+       la      vp, 32(vp)
+L(mid):        brctg   %r0, L(top)
+
+       lmg     %r6, %r8, 48(%r15)
+       br      %r14
+')
+
+ifdef(`VARIANT_2',`
+       stmg    %r6, %r8, 48(%r15)
+       lghi    %r1, -1
+
+       aghi    n, 3
+       lghi    %r7, 3
+       srlg    %r0, n, 2
+       ngr     %r7, n                  C n mod 4
+       je      L(b1)
+       cghi    %r7, 2
+       jl      L(b2)
+       jne     L(top)
+
+L(b3): lmg     %r5, %r7, 0(vp)
+       la      vp, 24(vp)
+       xgr     %r5, %r1
+       xgr     %r6, %r1
+       xgr     %r7, %r1
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 8(up)
+       LOGOP   %r7, 16(up)
+       stmg    %r5, %r7, 0(rp)
+       la      rp, 24(rp)
+       la      up, 24(up)
+       j       L(mid)
+
+L(b1): lg      %r5, 0(vp)
+       la      vp, 8(vp)
+       xgr     %r5, %r1
+       LOGOP   %r5, 0(up)
+       stg     %r5, 0(rp)
+       la      rp, 8(rp)
+       la      up, 8(up)
+       j       L(mid)
+
+L(b2): lmg     %r5, %r6, 0(vp)
+       la      vp, 16(vp)
+       xgr     %r5, %r1
+       xgr     %r6, %r1
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 8(up)
+       stmg    %r5, %r6, 0(rp)
+       la      rp, 16(rp)
+       la      up, 16(up)
+       j       L(mid)
+
+L(top):        lmg     %r5, %r8, 0(vp)
+       la      vp, 32(vp)
+       xgr     %r5, %r1
+       xgr     %r6, %r1
+       xgr     %r7, %r1
+       xgr     %r8, %r1
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 8(up)
+       LOGOP   %r7, 16(up)
+       LOGOP   %r8, 24(up)
+       la      up, 32(up)
+       stmg    %r5, %r8, 0(rp)
+       la      rp, 32(rp)
+L(mid):        brctg   %r0, L(top)
+
+       lmg     %r6, %r8, 48(%r15)
+       br      %r14
+')
+
+ifdef(`VARIANT_3',`
+       stmg    %r6, %r8, 48(%r15)
+       lghi    %r1, -1
+
+       aghi    n, 3
+       lghi    %r7, 3
+       srlg    %r0, n, 2
+       ngr     %r7, n                  C n mod 4
+       je      L(b1)
+       cghi    %r7, 2
+       jl      L(b2)
+       jne     L(top)
+
+L(b3): lmg     %r5, %r7, 0(vp)
+       la      vp, 24(vp)
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 8(up)
+       xgr     %r5, %r1
+       xgr     %r6, %r1
+       LOGOP   %r7, 16(up)
+       xgr     %r7, %r1
+       stmg    %r5, %r7, 0(rp)
+       la      rp, 24(rp)
+       la      up, 24(up)
+       j       L(mid)
+
+L(b1): lg      %r5, 0(vp)
+       la      vp, 8(vp)
+       LOGOP   %r5, 0(up)
+       xgr     %r5, %r1
+       stg     %r5, 0(rp)
+       la      rp, 8(rp)
+       la      up, 8(up)
+       j       L(mid)
+
+L(b2): lmg     %r5, %r6, 0(vp)
+       la      vp, 16(vp)
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 8(up)
+       xgr     %r5, %r1
+       xgr     %r6, %r1
+       stmg    %r5, %r6, 0(rp)
+       la      rp, 16(rp)
+       la      up, 16(up)
+       j       L(mid)
+
+L(top):        lmg     %r5, %r8, 0(vp)
+       la      vp, 32(vp)
+       LOGOP   %r5, 0(up)
+       LOGOP   %r6, 8(up)
+       xgr     %r5, %r1
+       xgr     %r6, %r1
+       LOGOP   %r7, 16(up)
+       LOGOP   %r8, 24(up)
+       xgr     %r7, %r1
+       xgr     %r8, %r1
+       stmg    %r5, %r8, 0(rp)
+       la      up, 32(up)
+       la      rp, 32(rp)
+L(mid):        brctg   %r0, L(top)
+
+       lmg     %r6, %r8, 48(%r15)
+       br      %r14
+')
+
+EPILOGUE()
diff --git a/mpn/s390_64/lshift.asm b/mpn/s390_64/lshift.asm

new file mode 100644 (file)

index 0000000..210a964
--- /dev/null
+++ b/mpn/s390_64/lshift.asm
@@ -0,0 +1,185 @@
+dnl  S/390-64 mpn_lshift.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          7
+C z990           3
+C z9            ?
+C z10           ?
+C z196          ?
+
+C NOTES
+C  * This uses discrete loads and stores in a software pipeline.  Using lmg and
+C    stmg is not faster.
+C  * One could assume more pipelining could approach 2.5 c/l, but we have not
+C    found any 8-way loop that runs better than the current 4-way loop.
+C  * Consider using the same feed-in code for 1 <= n <= 3 as for n mod 4,
+C    similarly to the x86_64 sqr_basecase feed-in.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`cnt',  `%r5')
+
+define(`tnc',  `%r6')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       cghi    n, 3
+       jh      L(gt1)
+
+       stmg    %r6, %r7, 48(%r15)
+       larl    %r1, L(tab)-4
+       lcgr    tnc, cnt
+       sllg    n, n, 2
+       b       0(n,%r1)
+L(tab):        j       L(n1)
+       j       L(n2)
+       j       L(n3)
+
+L(n1): lg      %r1, 0(up)
+       sllg    %r0, %r1, 0(cnt)
+       stg     %r0, 0(rp)
+       srlg    %r2, %r1, 0(tnc)
+       lg      %r6, 48(%r15)           C restoring r7 not needed
+       br      %r14
+
+L(n2): lg      %r1, 8(up)
+       srlg    %r4, %r1, 0(tnc)
+       sllg    %r0, %r1, 0(cnt)
+       j       L(cj)
+
+L(n3): lg      %r1, 16(up)
+       srlg    %r4, %r1, 0(tnc)
+       sllg    %r0, %r1, 0(cnt)
+       lg      %r1, 8(up)
+       srlg    %r7, %r1, 0(tnc)
+       ogr     %r7, %r0
+       sllg    %r0, %r1, 0(cnt)
+       stg     %r7, 16(rp)
+L(cj): lg      %r1, 0(up)
+       srlg    %r7, %r1, 0(tnc)
+       ogr     %r7, %r0
+       sllg    %r0, %r1, 0(cnt)
+       stg     %r7, 8(rp)
+       stg     %r0, 0(rp)
+       lgr     %r2, %r4
+       lmg     %r6, %r7, 48(%r15)
+       br      %r14
+
+L(gt1):        stmg    %r6, %r13, 48(%r15)
+       lcgr    tnc, cnt                C tnc = -cnt
+
+       sllg    %r1, n, 3
+       srlg    %r0, n, 2               C loop count
+
+       agr     up, %r1                 C point up at end of U
+       agr     rp, %r1                 C point rp at end of R
+       aghi    up, -56
+       aghi    rp, -40
+
+       lghi    %r7, 3
+       ngr     %r7, n
+       je      L(b0)
+       cghi    %r7, 2
+       jl      L(b1)
+       je      L(b2)
+
+L(b3): lg      %r7, 48(up)
+       srlg    %r9, %r7, 0(tnc)
+       sllg    %r11, %r7, 0(cnt)
+       lg      %r8, 40(up)
+       lg      %r7, 32(up)
+       srlg    %r4, %r8, 0(tnc)
+       sllg    %r13, %r8, 0(cnt)
+       ogr     %r11, %r4
+       la      rp, 16(rp)
+       j       L(lm3)
+
+L(b2): lg      %r8, 48(up)
+       lg      %r7, 40(up)
+       srlg    %r9, %r8, 0(tnc)
+       sllg    %r13, %r8, 0(cnt)
+       la      rp, 24(rp)
+       la      up, 8(up)
+       j       L(lm2)
+
+L(b1): lg      %r7, 48(up)
+       srlg    %r9, %r7, 0(tnc)
+       sllg    %r11, %r7, 0(cnt)
+       lg      %r8, 40(up)
+       lg      %r7, 32(up)
+       srlg    %r4, %r8, 0(tnc)
+       sllg    %r10, %r8, 0(cnt)
+       ogr     %r11, %r4
+       la      rp, 32(rp)
+       la      up, 16(up)
+       j       L(lm1)
+
+L(b0): lg      %r8, 48(up)
+       lg      %r7, 40(up)
+       srlg    %r9, %r8, 0(tnc)
+       sllg    %r10, %r8, 0(cnt)
+       la      rp, 40(rp)
+       la      up, 24(up)
+       j       L(lm0)
+
+C      ALIGN(16)
+L(top):        srlg    %r4, %r8, 0(tnc)
+       sllg    %r13, %r8, 0(cnt)
+       ogr     %r11, %r4
+       stg     %r10, 24(rp)
+L(lm3):        stg     %r11, 16(rp)
+L(lm2):        srlg    %r12, %r7, 0(tnc)
+       sllg    %r11, %r7, 0(cnt)
+       lg      %r8, 24(up)
+       lg      %r7, 16(up)
+       ogr     %r13, %r12
+       srlg    %r4, %r8, 0(tnc)
+       sllg    %r10, %r8, 0(cnt)
+       ogr     %r11, %r4
+       stg     %r13, 8(rp)
+L(lm1):        stg     %r11, 0(rp)
+L(lm0):        srlg    %r12, %r7, 0(tnc)
+       aghi    rp, -32
+       sllg    %r11, %r7, 0(cnt)
+       lg      %r8, 8(up)
+       lg      %r7, 0(up)
+       aghi    up, -32
+       ogr     %r10, %r12
+       brctg   %r0, L(top)
+
+L(end):        srlg    %r4, %r8, 0(tnc)
+       sllg    %r13, %r8, 0(cnt)
+       ogr     %r11, %r4
+       stg     %r10, 24(rp)
+       stg     %r11, 16(rp)
+       srlg    %r12, %r7, 0(tnc)
+       sllg    %r11, %r7, 0(cnt)
+       ogr     %r13, %r12
+       stg     %r13, 8(rp)
+       stg     %r11, 0(rp)
+       lgr     %r2, %r9
+
+       lmg     %r6, %r13, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/lshiftc.asm b/mpn/s390_64/lshiftc.asm

new file mode 100644 (file)

index 0000000..1fbc7fc
--- /dev/null
+++ b/mpn/s390_64/lshiftc.asm
@@ -0,0 +1,196 @@
+dnl  S/390-64 mpn_lshiftc.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          9
+C z990           3.5
+C z9            ?
+C z10           ?
+C z196          ?
+
+C NOTES
+C  * See notes in lshift.asm.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`cnt',  `%r5')
+
+define(`tnc',  `%r6')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+       cghi    n, 3
+       jh      L(gt1)
+
+       stmg    %r6, %r8, 48(%r15)
+       larl    %r1, L(tab)-4
+       lcgr    tnc, cnt
+       sllg    n, n, 2
+       lghi    %r8, -1
+       b       0(n,%r1)
+L(tab):        j       L(n1)
+       j       L(n2)
+       j       L(n3)
+
+L(n1): lg      %r1, 0(up)
+       sllg    %r0, %r1, 0(cnt)
+       xgr     %r0, %r8
+       stg     %r0, 0(rp)
+       srlg    %r2, %r1, 0(tnc)
+       lmg     %r6, %r8, 48(%r15)
+       br      %r14
+
+L(n2): lg      %r1, 8(up)
+       srlg    %r4, %r1, 0(tnc)
+       sllg    %r0, %r1, 0(cnt)
+       j       L(cj)
+
+L(n3): lg      %r1, 16(up)
+       srlg    %r4, %r1, 0(tnc)
+       sllg    %r0, %r1, 0(cnt)
+       lg      %r1, 8(up)
+       srlg    %r7, %r1, 0(tnc)
+       ogr     %r7, %r0
+       sllg    %r0, %r1, 0(cnt)
+       xgr     %r7, %r8
+       stg     %r7, 16(rp)
+L(cj): lg      %r1, 0(up)
+       srlg    %r7, %r1, 0(tnc)
+       ogr     %r7, %r0
+       sllg    %r0, %r1, 0(cnt)
+       xgr     %r7, %r8
+       xgr     %r0, %r8
+       stg     %r7, 8(rp)
+       stg     %r0, 0(rp)
+       lgr     %r2, %r4
+       lmg     %r6, %r8, 48(%r15)
+       br      %r14
+
+L(gt1):        stmg    %r6, %r14, 48(%r15)
+       lcgr    tnc, cnt                C tnc = -cnt
+
+       sllg    %r1, n, 3
+       srlg    %r0, n, 2               C loop count
+
+       agr     up, %r1                 C point up at end of U
+       agr     rp, %r1                 C point rp at end of R
+       aghi    up, -56
+       aghi    rp, -40
+
+       lghi    %r7, 3
+       lghi    %r14, -1
+       ngr     %r7, n
+       je      L(b0)
+       cghi    %r7, 2
+       jl      L(b1)
+       je      L(b2)
+
+L(b3): lg      %r7, 48(up)
+       srlg    %r9, %r7, 0(tnc)
+       sllg    %r11, %r7, 0(cnt)
+       lg      %r8, 40(up)
+       lg      %r7, 32(up)
+       srlg    %r4, %r8, 0(tnc)
+       sllg    %r13, %r8, 0(cnt)
+       ogr     %r11, %r4
+       la      rp, 16(rp)
+       xgr     %r11, %r14
+       j       L(lm3)
+
+L(b2): lg      %r8, 48(up)
+       lg      %r7, 40(up)
+       srlg    %r9, %r8, 0(tnc)
+       sllg    %r13, %r8, 0(cnt)
+       la      rp, 24(rp)
+       la      up, 8(up)
+       j       L(lm2)
+
+L(b1): lg      %r7, 48(up)
+       srlg    %r9, %r7, 0(tnc)
+       sllg    %r11, %r7, 0(cnt)
+       lg      %r8, 40(up)
+       lg      %r7, 32(up)
+       srlg    %r4, %r8, 0(tnc)
+       sllg    %r10, %r8, 0(cnt)
+       ogr     %r11, %r4
+       la      rp, 32(rp)
+       la      up, 16(up)
+       xgr     %r11, %r14
+       j       L(lm1)
+
+L(b0): lg      %r8, 48(up)
+       lg      %r7, 40(up)
+       srlg    %r9, %r8, 0(tnc)
+       sllg    %r10, %r8, 0(cnt)
+       la      rp, 40(rp)
+       la      up, 24(up)
+       j       L(lm0)
+
+C      ALIGN(16)
+L(top):        srlg    %r4, %r8, 0(tnc)
+       sllg    %r13, %r8, 0(cnt)
+       ogr     %r11, %r4
+       xgr     %r10, %r14
+       xgr     %r11, %r14
+       stg     %r10, 24(rp)
+L(lm3):        stg     %r11, 16(rp)
+L(lm2):        srlg    %r12, %r7, 0(tnc)
+       sllg    %r11, %r7, 0(cnt)
+       lg      %r8, 24(up)
+       lg      %r7, 16(up)
+       ogr     %r13, %r12
+       srlg    %r4, %r8, 0(tnc)
+       sllg    %r10, %r8, 0(cnt)
+       ogr     %r11, %r4
+       xgr     %r13, %r14
+       xgr     %r11, %r14
+       stg     %r13, 8(rp)
+L(lm1):        stg     %r11, 0(rp)
+L(lm0):        srlg    %r12, %r7, 0(tnc)
+       aghi    rp, -32
+       sllg    %r11, %r7, 0(cnt)
+       lg      %r8, 8(up)
+       lg      %r7, 0(up)
+       aghi    up, -32
+       ogr     %r10, %r12
+       brctg   %r0, L(top)
+
+L(end):        srlg    %r4, %r8, 0(tnc)
+       sllg    %r13, %r8, 0(cnt)
+       ogr     %r11, %r4
+       xgr     %r10, %r14
+       xgr     %r11, %r14
+       stg     %r10, 24(rp)
+       stg     %r11, 16(rp)
+       srlg    %r12, %r7, 0(tnc)
+       sllg    %r11, %r7, 0(cnt)
+       ogr     %r13, %r12
+       xgr     %r13, %r14
+       xgr     %r11, %r14
+       stg     %r13, 8(rp)
+       stg     %r11, 0(rp)
+       lgr     %r2, %r9
+
+       lmg     %r6, %r14, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/mod_34lsub1.asm b/mpn/s390_64/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..b95c300
--- /dev/null
+++ b/mpn/s390_64/mod_34lsub1.asm
@@ -0,0 +1,98 @@
+dnl  S/390-64 mpn_addmul_1
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          5.8
+C z990           2
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Optimise summation code, see x86_64.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`n',    `%r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+       stmg    %r7, %r12, 56(%r15)
+       lghi    %r11, 0
+       lghi    %r12, 0
+       lghi    %r0, 0
+       lghi    %r8, 0
+       lghi    %r9, 0
+       lghi    %r10, 0
+       lghi    %r7, 0
+       aghi    %r3, -3
+       jl      .L3
+
+L(top):        alg     %r0, 0(%r2)
+       alcg    %r12, 8(%r2)
+       alcg    %r11, 16(%r2)
+       alcgr   %r8, %r7
+       la      %r2, 24(%r2)
+       aghi    %r3, -3
+       jnl     L(top)
+
+       lgr     %r7, %r8
+       srlg    %r1, %r11, 16
+       nihh    %r7, 0                  C 0xffffffffffff
+       agr     %r7, %r1
+       srlg    %r8, %r8, 48
+       agr     %r7, %r8
+       sllg    %r11, %r11, 32
+       nihh    %r11, 0
+       agr     %r7, %r11
+.L3:
+       cghi    %r3, -3
+       je      .L6
+       alg     %r0, 0(%r2)
+       alcgr   %r10, %r10
+       cghi    %r3, -2
+       je      .L6
+       alg     %r12, 8(%r2)
+       alcgr   %r9, %r9
+.L6:
+       srlg    %r1, %r0, 48
+       nihh    %r0, 0                  C 0xffffffffffff
+       agr     %r0, %r1
+       agr     %r0, %r7
+       srlg    %r1, %r12, 32
+       agr     %r0, %r1
+       srlg    %r1, %r10, 32
+       agr     %r0, %r1
+       llgfr   %r12, %r12
+       srlg    %r1, %r9, 16
+       sllg    %r12, %r12, 16
+       llgfr   %r10, %r10
+       agr     %r0, %r1
+       llill   %r2, 65535
+       agr     %r0, %r12
+       sllg    %r10, %r10, 16
+       ngr     %r2, %r9
+       agr     %r0, %r10
+       sllg    %r2, %r2, 32
+       agr     %r2, %r0
+       lmg     %r7, %r12, 56(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/mul_1.asm b/mpn/s390_64/mul_1.asm

new file mode 100644 (file)

index 0000000..03df8cb
--- /dev/null
+++ b/mpn/s390_64/mul_1.asm
@@ -0,0 +1,55 @@
+dnl  S/390-64 mpn_mul_1
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900         29
+C z990         22
+C z9            ?
+C z10           ?
+C z196          ?
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`v0',   `%r5')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       stmg    %r11, %r12, 88(%r15)
+       lghi    %r12, 0                 C zero index register
+       aghi    %r12, 0                 C clear carry flag
+       lghi    %r11, 0                 C clear carry limb
+
+L(top):        lg      %r1, 0(%r12,up)
+       mlgr    %r0, v0
+       alcgr   %r1, %r11
+       lgr     %r11, %r0               C copy high part to carry limb
+       stg     %r1, 0(%r12,rp)
+       la      %r12, 8(%r12)
+       brctg   n, L(top)
+
+       lghi    %r2, 0
+       alcgr   %r2, %r11
+
+       lmg     %r11, %r12, 88(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/mul_basecase.asm b/mpn/s390_64/mul_basecase.asm

new file mode 100644 (file)

index 0000000..cd7a3ee
--- /dev/null
+++ b/mpn/s390_64/mul_basecase.asm
@@ -0,0 +1,119 @@
+dnl  S/390-64 mpn_mul_basecase.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          ?
+C z990         23
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Perhaps add special case for un <= 2.
+C  * Replace loops by faster code.  The mul_1 and addmul_1 loops could be sped
+C    up by about 10%.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`un',   `%r4')
+define(`vp',   `%r5')
+define(`vn',   `%r6')
+
+define(`zero', `%r8')
+
+ASM_START()
+PROLOGUE(mpn_mul_basecase)
+       cghi    un, 2
+       jhe     L(ge2)
+
+C un = vn = 1
+       lg      %r1, 0(vp)
+       mlg     %r0, 0(up)
+       stg     %r1, 0(rp)
+       stg     %r0, 8(rp)
+       br      %r14
+
+L(ge2):        C jne   L(gen)
+
+
+L(gen):
+C mul_1 =======================================================================
+
+       stmg    %r6, %r12, 48(%r15)
+       lghi    zero, 0
+       aghi    un, -1
+
+       lg      %r7, 0(vp)
+       lg      %r11, 0(up)
+       lghi    %r12, 8                 C init index register
+       mlgr    %r10, %r7
+       lgr     %r9, un
+       stg     %r11, 0(rp)
+       cr      %r15, %r15              C clear carry flag
+
+L(tm): lg      %r1, 0(%r12,up)
+       mlgr    %r0, %r7
+       alcgr   %r1, %r10
+       lgr     %r10, %r0               C copy high part to carry limb
+       stg     %r1, 0(%r12,rp)
+       la      %r12, 8(%r12)
+       brctg   %r9, L(tm)
+
+       alcgr   %r0, zero
+       stg     %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+       aghi    vn, -1
+       je      L(outer_end)
+L(outer_loop):
+
+       la      rp, 8(rp)               C rp += 1
+       la      vp, 8(vp)               C up += 1
+       lg      %r7, 0(vp)
+       lg      %r11, 0(up)
+       lghi    %r12, 8                 C init index register
+       mlgr    %r10, %r7
+       lgr     %r9, un
+       alg     %r11, 0(rp)
+       stg     %r11, 0(rp)
+
+L(tam):        lg      %r1, 0(%r12,up)
+       lg      %r11, 0(%r12,rp)
+       mlgr    %r0, %r7
+       alcgr   %r1, %r11
+       alcgr   %r0, zero
+       algr    %r1, %r10
+       lgr     %r10, %r0
+       stg     %r1, 0(%r12,rp)
+       la      %r12, 8(%r12)
+       brctg   %r9, L(tam)
+
+       alcgr   %r0, zero
+       stg     %r0, 0(%r12,rp)
+
+       brctg   vn, L(outer_loop)
+L(outer_end):
+
+       lmg     %r6, %r12, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/rshift.asm b/mpn/s390_64/rshift.asm

new file mode 100644 (file)

index 0000000..736c485
--- /dev/null
+++ b/mpn/s390_64/rshift.asm
@@ -0,0 +1,184 @@
+dnl  S/390-64 mpn_rshift.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          7
+C z990           3
+C z9            ?
+C z10           ?
+C z196          ?
+
+C NOTES
+C  * See notes in lshift.asm.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`cnt',  `%r5')
+
+define(`tnc',  `%r6')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       cghi    n, 3
+       jh      L(gt1)
+
+       stmg    %r6, %r7, 48(%r15)
+       larl    %r1, L(tab)-4
+       lcgr    tnc, cnt
+       sllg    n, n, 2
+       b       0(n,%r1)
+L(tab):        j       L(n1)
+       j       L(n2)
+       j       L(n3)
+
+L(n1): lg      %r1, 0(up)
+       srlg    %r0, %r1, 0(cnt)
+       stg     %r0, 0(rp)
+       sllg    %r2, %r1, 0(tnc)
+       lg      %r6, 48(%r15)           C restoring r7 not needed
+       br      %r14
+
+L(n2): lg      %r1, 0(up)
+       sllg    %r4, %r1, 0(tnc)
+       srlg    %r0, %r1, 0(cnt)
+       lg      %r1, 8(up)
+       sllg    %r7, %r1, 0(tnc)
+       ogr     %r7, %r0
+       srlg    %r0, %r1, 0(cnt)
+       stg     %r7, 0(rp)
+       stg     %r0, 8(rp)
+       lgr     %r2, %r4
+       lmg     %r6, %r7, 48(%r15)
+       br      %r14
+
+
+L(n3): lg      %r1, 0(up)
+       sllg    %r4, %r1, 0(tnc)
+       srlg    %r0, %r1, 0(cnt)
+       lg      %r1, 8(up)
+       sllg    %r7, %r1, 0(tnc)
+       ogr     %r7, %r0
+       srlg    %r0, %r1, 0(cnt)
+       stg     %r7, 0(rp)
+       lg      %r1, 16(up)
+       sllg    %r7, %r1, 0(tnc)
+       ogr     %r7, %r0
+       srlg    %r0, %r1, 0(cnt)
+       stg     %r7, 8(rp)
+       stg     %r0, 16(rp)
+       lgr     %r2, %r4
+       lmg     %r6, %r7, 48(%r15)
+       br      %r14
+
+L(gt1):        stmg    %r6, %r13, 48(%r15)
+       lcgr    tnc, cnt                C tnc = -cnt
+
+       sllg    %r1, n, 3
+       srlg    %r0, n, 2               C loop count
+
+       lghi    %r7, 3
+       ngr     %r7, n
+       je      L(b0)
+       cghi    %r7, 2
+       jl      L(b1)
+       je      L(b2)
+
+L(b3): aghi    rp, -8
+       lg      %r7, 0(up)
+       sllg    %r9, %r7, 0(tnc)
+       srlg    %r11, %r7, 0(cnt)
+       lg      %r8, 8(up)
+       lg      %r7, 16(up)
+       sllg    %r4, %r8, 0(tnc)
+       srlg    %r13, %r8, 0(cnt)
+       ogr     %r11, %r4
+       la      up, 24(up)
+       j       L(lm3)
+
+L(b2): aghi    rp, -16
+       lg      %r8, 0(up)
+       lg      %r7, 8(up)
+       sllg    %r9, %r8, 0(tnc)
+       srlg    %r13, %r8, 0(cnt)
+       la      up, 16(up)
+       j       L(lm2)
+
+L(b1): aghi    rp, -24
+       lg      %r7, 0(up)
+       sllg    %r9, %r7, 0(tnc)
+       srlg    %r11, %r7, 0(cnt)
+       lg      %r8, 8(up)
+       lg      %r7, 16(up)
+       sllg    %r4, %r8, 0(tnc)
+       srlg    %r10, %r8, 0(cnt)
+       ogr     %r11, %r4
+       la      up, 8(up)
+       j       L(lm1)
+
+L(b0): aghi    rp, -32
+       lg      %r8, 0(up)
+       lg      %r7, 8(up)
+       sllg    %r9, %r8, 0(tnc)
+       srlg    %r10, %r8, 0(cnt)
+       j       L(lm0)
+
+C      ALIGN(16)
+L(top):        sllg    %r4, %r8, 0(tnc)
+       srlg    %r13, %r8, 0(cnt)
+       ogr     %r11, %r4
+       stg     %r10, 0(rp)
+L(lm3):        stg     %r11, 8(rp)
+L(lm2):        sllg    %r12, %r7, 0(tnc)
+       srlg    %r11, %r7, 0(cnt)
+       lg      %r8, 0(up)
+       lg      %r7, 8(up)
+       ogr     %r13, %r12
+       sllg    %r4, %r8, 0(tnc)
+       srlg    %r10, %r8, 0(cnt)
+       ogr     %r11, %r4
+       stg     %r13, 16(rp)
+L(lm1):        stg     %r11, 24(rp)
+L(lm0):        sllg    %r12, %r7, 0(tnc)
+       aghi    rp, 32
+       srlg    %r11, %r7, 0(cnt)
+       lg      %r8, 16(up)
+       lg      %r7, 24(up)
+       aghi    up, 32
+       ogr     %r10, %r12
+       brctg   %r0, L(top)
+
+L(end):        sllg    %r4, %r8, 0(tnc)
+       srlg    %r13, %r8, 0(cnt)
+       ogr     %r11, %r4
+       stg     %r10, 0(rp)
+       stg     %r11, 8(rp)
+       sllg    %r12, %r7, 0(tnc)
+       srlg    %r11, %r7, 0(cnt)
+       ogr     %r13, %r12
+       stg     %r13, 16(rp)
+       stg     %r11, 24(rp)
+       lgr     %r2, %r9
+
+       lmg     %r6, %r13, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/sqr_basecase.asm b/mpn/s390_64/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..7df0ab1
--- /dev/null
+++ b/mpn/s390_64/sqr_basecase.asm
@@ -0,0 +1,192 @@
+dnl  S/390-64 mpn_sqr_basecase.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900          ?
+C z990         23
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Clean up.
+C  * Stop iterating addmul_1 loop at latest for n = 2, implement longer tail.
+C    This will ask for basecase handling of n = 3.
+C  * Update counters and pointers more straightforwardly, possibly lowering
+C    register usage.
+C  * Should we use this allocation-free style for more sqr_basecase asm
+C    implementations?  The only disadvantage is that it requires R != U.
+C  * Replace loops by faster code.  The mul_1 and addmul_1 loops could be sped
+C    up by about 10%.  The sqr_diag_addlsh1 loop could probably be sped up even
+C    more.
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+
+define(`zero', `%r8')
+define(`rp_saved',     `%r9')
+define(`up_saved',     `%r13')
+define(`n_saved',      `%r14')
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+       aghi    n, -2
+       jhe     L(ge2)
+
+C n = 1
+       lg      %r5, 0(up)
+       mlgr    %r4, %r5
+       stg     %r5, 0(rp)
+       stg     %r4, 8(rp)
+       br      %r14
+
+L(ge2):        jne     L(gen)
+
+C n = 2
+       stmg    %r6, %r8, 48(%r15)
+       lghi    zero, 0
+
+       lg      %r5, 0(up)
+       mlgr    %r4, %r5                C u0 * u0
+       lg      %r1, 8(up)
+       mlgr    %r0, %r1                C u1 * u1
+       stg     %r5, 0(rp)
+
+       lg      %r7, 0(up)
+       mlg     %r6, 8(up)              C u0 * u1
+       algr    %r7, %r7
+       alcgr   %r6, %r6
+       alcgr   %r0, zero
+
+       algr    %r4, %r7
+       alcgr   %r1, %r6
+       alcgr   %r0, zero
+       stg     %r4, 8(rp)
+       stg     %r1, 16(rp)
+       stg     %r0, 24(rp)
+
+       lmg     %r6, %r8, 48(%r15)
+       br      %r14
+
+L(gen):
+C mul_1 =======================================================================
+
+       stmg    %r6, %r14, 48(%r15)
+       lghi    zero, 0
+       lgr     up_saved, up
+       lgr     rp_saved, rp
+       lgr     n_saved, n
+
+       lg      %r6, 0(up)
+       lg      %r11, 8(up)
+       lghi    %r12, 16                C init index register
+       mlgr    %r10, %r6
+       lgr     %r5, n
+       stg     %r11, 8(rp)
+       cr      %r15, %r15              C clear carry flag
+
+L(tm): lg      %r1, 0(%r12,up)
+       mlgr    %r0, %r6
+       alcgr   %r1, %r10
+       lgr     %r10, %r0               C copy high part to carry limb
+       stg     %r1, 0(%r12,rp)
+       la      %r12, 8(%r12)
+       brctg   %r5, L(tm)
+
+       alcgr   %r0, zero
+       stg     %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+       aghi    n, -1
+       je      L(outer_end)
+L(outer_loop):
+
+       la      rp, 16(rp)              C rp += 2
+       la      up, 8(up)               C up += 1
+       lg      %r6, 0(up)
+       lg      %r11, 8(up)
+       lghi    %r12, 16                C init index register
+       mlgr    %r10, %r6
+       lgr     %r5, n
+       alg     %r11, 8(rp)
+       stg     %r11, 8(rp)
+
+L(tam):        lg      %r1, 0(%r12,up)
+       lg      %r7, 0(%r12,rp)
+       mlgr    %r0, %r6
+       alcgr   %r1, %r7
+       alcgr   %r0, zero
+       algr    %r1, %r10
+       lgr     %r10, %r0
+       stg     %r1, 0(%r12,rp)
+       la      %r12, 8(%r12)
+       brctg   %r5, L(tam)
+
+       alcgr   %r0, zero
+       stg     %r0, 0(%r12,rp)
+
+       brctg   n, L(outer_loop)
+L(outer_end):
+
+       lg      %r6, 8(up)
+       lg      %r1, 16(up)
+       lgr     %r7, %r0                C Same as: lg %r7, 24(,rp)
+       mlgr    %r0, %r6
+       algr    %r1, %r7
+       alcgr   %r0, zero
+       stg     %r1, 24(rp)
+       stg     %r0, 32(rp)
+
+C sqr_diag_addlsh1 ============================================================
+
+define(`up', `up_saved')
+define(`rp', `rp_saved')
+       la      n, 1(n_saved)
+
+       lg      %r1, 0(up)
+       mlgr    %r0, %r1
+       stg     %r1, 0(rp)
+C      clr     %r15, %r15              C clear carry (already clear per above)
+
+L(top):        lg      %r11, 8(up)
+       la      up, 8(up)
+       lg      %r6, 8(rp)
+       lg      %r7, 16(rp)
+       mlgr    %r10, %r11
+       alcgr   %r6, %r6
+       alcgr   %r7, %r7
+       alcgr   %r10, zero              C propagate carry to high product limb
+       algr    %r6, %r0
+       alcgr   %r7, %r11
+       stmg    %r6, %r7, 8(rp)
+       la      rp, 16(rp)
+       lgr     %r0, %r10               C copy carry limb
+       brctg   n, L(top)
+
+       alcgr   %r0, zero
+       stg     %r0, 8(rp)
+
+       lmg     %r6, %r14, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/sublsh1_n.asm b/mpn/s390_64/sublsh1_n.asm

new file mode 100644 (file)

index 0000000..d33d9bf
--- /dev/null
+++ b/mpn/s390_64/sublsh1_n.asm
@@ -0,0 +1,158 @@
+dnl  S/390-64 mpn_sublsh1_n
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900         10
+C z990          5
+C z9            ?
+C z10           ?
+C z196          ?
+
+C TODO
+C  * Optimise for small n
+C  * Compute RETVAL for sublsh1_n less stupidly
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`vp',   `%r4')
+define(`n',    `%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADSBR,                algr)
+  define(ADSBCR,       alcgr)
+  define(INITCY,       `lghi   %r13, -1')
+  define(RETVAL,       `la     %r2, 2(%r1,%r13)')
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+  define(ADSBR,                slgr)
+  define(ADSBCR,       slbgr)
+  define(INITCY,       `lghi   %r13, 0')
+  define(RETVAL,`dnl
+       slgr    %r1, %r13
+       lghi    %r2, 1
+       algr    %r2, %r1')
+  define(func, mpn_sublsh1_n)
+')
+
+ASM_START()
+PROLOGUE(mpn_sublsh1_n)
+       stmg    %r6, %r13, 48(%r15)
+
+       aghi    n, 3
+       lghi    %r7, 3
+       srlg    %r0, n, 2
+       ngr     %r7, n                  C n mod 4
+       je      L(b1)
+       cghi    %r7, 2
+       jl      L(b2)
+       jne     L(b0)
+
+L(b3): lmg     %r5, %r7, 0(up)
+       la      up, 24(up)
+       lmg     %r9, %r11, 0(vp)
+       la      vp, 24(vp)
+
+       algr    %r9, %r9
+       alcgr   %r10, %r10
+       alcgr   %r11, %r11
+       slbgr   %r1, %r1
+
+       ADSBR   %r5, %r9
+       ADSBCR  %r6, %r10
+       ADSBCR  %r7, %r11
+       slbgr   %r13, %r13
+
+       stmg    %r5, %r7, 0(rp)
+       la      rp, 24(rp)
+       brctg   %r0, L(top)
+       j       L(end)
+
+L(b0): lghi    %r1, -1
+       INITCY
+       j       L(top)
+
+L(b1): lg      %r5, 0(up)
+       la      up, 8(up)
+       lg      %r9, 0(vp)
+       la      vp, 8(vp)
+
+       algr    %r9, %r9
+       slbgr   %r1, %r1
+       ADSBR   %r5, %r9
+       slbgr   %r13, %r13
+
+       stg     %r5, 0(rp)
+       la      rp, 8(rp)
+       brctg   %r0, L(top)
+       j       L(end)
+
+L(b2): lmg     %r5, %r6, 0(up)
+       la      up, 16(up)
+       lmg     %r9, %r10, 0(vp)
+       la      vp, 16(vp)
+
+       algr    %r9, %r9
+       alcgr   %r10, %r10
+       slbgr   %r1, %r1
+
+       ADSBR   %r5, %r9
+       ADSBCR  %r6, %r10
+       slbgr   %r13, %r13
+
+       stmg    %r5, %r6, 0(rp)
+       la      rp, 16(rp)
+       brctg   %r0, L(top)
+       j       L(end)
+
+L(top):        lmg     %r9, %r12, 0(vp)
+       la      vp, 32(vp)
+
+       aghi    %r1, 1                  C restore carry
+
+       alcgr   %r9, %r9
+       alcgr   %r10, %r10
+       alcgr   %r11, %r11
+       alcgr   %r12, %r12
+
+       slbgr   %r1, %r1                C save carry
+
+       lmg     %r5, %r8, 0(up)
+       la      up, 32(up)
+
+       aghi    %r13, 1                 C restore carry
+
+       ADSBCR  %r5, %r9
+       ADSBCR  %r6, %r10
+       ADSBCR  %r7, %r11
+       ADSBCR  %r8, %r12
+
+       slbgr   %r13, %r13              C save carry
+
+       stmg    %r5, %r8, 0(rp)
+       la      rp, 32(rp)
+       brctg   %r0, L(top)
+
+L(end):        RETVAL
+       lmg     %r6, %r13, 48(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/s390_64/submul_1.asm b/mpn/s390_64/submul_1.asm

new file mode 100644 (file)

index 0000000..b78f266
--- /dev/null
+++ b/mpn/s390_64/submul_1.asm
@@ -0,0 +1,59 @@
+dnl  S/390-64 mpn_submul_1
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C z900         35
+C z990         24
+C z9            ?
+C z10           ?
+C z196          ?
+
+C INPUT PARAMETERS
+define(`rp',   `%r2')
+define(`up',   `%r3')
+define(`n',    `%r4')
+define(`v0',   `%r5')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       stmg    %r9, %r12, 72(%r15)
+       lghi    %r12, 0
+       slgr    %r11, %r11
+
+L(top):        lg      %r1, 0(%r12, up)
+       lg      %r10, 0(%r12, rp)
+       mlgr    %r0, v0
+       slbgr   %r10, %r1
+       slbgr   %r9, %r9
+       slgr    %r0, %r9                C conditional incr
+       slgr    %r10, %r11
+       lgr     %r11, %r0
+       stg     %r10, 0(%r12, rp)
+       la      %r12, 8(%r12)
+       brctg   %r4,  L(top)
+
+       lgr     %r2, %r11
+       slbgr   %r9, %r9
+       slgr    %r2, %r9
+
+       lmg     %r9, %r12, 72(%r15)
+       br      %r14
+EPILOGUE()
diff --git a/mpn/sparc32/ultrasparct1/add_n.asm b/mpn/sparc32/ultrasparct1/add_n.asm

new file mode 100644 (file)

index 0000000..7fd21a6
--- /dev/null
+++ b/mpn/sparc32/ultrasparct1/add_n.asm
@@ -0,0 +1,59 @@
+dnl  SPARC T1 32-bit mpn_add_n.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp',  %o0)
+define(`ap',  %o1)
+define(`bp',  %o2)
+define(`n',   %o3)
+define(`cy',  %o4)
+
+define(`i',   %o3)
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc)
+
+ASM_START()
+PROLOGUE(mpn_add_nc)
+       b       L(ent)
+       srl     cy, 0, cy       C strip any bogus high bits
+EPILOGUE()
+
+PROLOGUE(mpn_add_n)
+       mov     0, cy
+L(ent):        srl     n, 0, n         C strip any bogus high bits
+       sll     n, 2, n
+       add     ap, n, ap
+       add     bp, n, bp
+       add     rp, n, rp
+       neg     n, i
+
+L(top):        lduw    [ap+i], %g1
+       lduw    [bp+i], %g2
+       add     %g1, %g2, %g3
+       add     %g3, cy, %g3
+       stw     %g3, [rp+i]
+       add     i, 4, i
+       brnz    i, L(top)
+       srlx    %g3, 32, cy
+
+       retl
+       mov     cy, %o0         C return value
+EPILOGUE()
diff --git a/mpn/sparc32/ultrasparct1/addmul_1.asm b/mpn/sparc32/ultrasparct1/addmul_1.asm

new file mode 100644 (file)

index 0000000..5001726
--- /dev/null
+++ b/mpn/sparc32/ultrasparct1/addmul_1.asm
@@ -0,0 +1,52 @@
+dnl  SPARC T1 32-bit mpn_addmul_1.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:       27
+
+C INPUT PARAMETERS
+define(`rp',   `%o0')
+define(`up',   `%o1')
+define(`n',    `%o2')
+define(`v0',   `%o3')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       mov     0, %g4
+       srl     v0, 0, v0
+       srl     n, 0, n
+       dec     n                       C n--
+
+L(top):        lduw    [up+0], %g1
+       add     up, 4, up               C up++
+       mulx    %g1, v0, %g3
+       lduw    [rp+0], %g2
+       add     %g2, %g3, %g3
+       add     %g4, %g3, %g3
+       stw     %g3, [rp+0]
+       add     rp, 4, rp               C rp++
+       srlx    %g3, 32, %g4
+       brnz    n, L(top)
+       dec     n                       C n--
+
+       retl
+       mov     %g4, %o0                C return value
+EPILOGUE()
diff --git a/mpn/sparc32/ultrasparct1/gmp-mparam.h b/mpn/sparc32/ultrasparct1/gmp-mparam.h

new file mode 100644 (file)

index 0000000..2342e14
--- /dev/null
+++ b/mpn/sparc32/ultrasparct1/gmp-mparam.h
@@ -0,0 +1,143 @@
+/* UltraSPARC T 32-bit gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            3
+#define MOD_1_1P_METHOD                      2
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         10
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        21
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     22
+#define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD           35
+
+#define MUL_TOOM22_THRESHOLD                14
+#define MUL_TOOM33_THRESHOLD                98
+#define MUL_TOOM44_THRESHOLD               166
+#define MUL_TOOM6H_THRESHOLD               226
+#define MUL_TOOM8H_THRESHOLD               333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     139
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      98
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     120
+
+#define SQR_BASECASE_THRESHOLD               6
+#define SQR_TOOM2_THRESHOLD                 34
+#define SQR_TOOM3_THRESHOLD                110
+#define SQR_TOOM4_THRESHOLD                178
+#define SQR_TOOM6_THRESHOLD                240
+#define SQR_TOOM8_THRESHOLD                333
+
+#define MULMID_TOOM42_THRESHOLD             22
+
+#define MULMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD               13
+
+#define MUL_FFT_MODF_THRESHOLD             280  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    280, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
+    {      9, 5}, {     19, 6}, {     13, 7}, {      7, 6}, \
+    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 8}, {      7, 7}, {     21, 8}, \
+    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
+    {     33, 8}, {     19, 7}, {     41, 8}, {     23, 7}, \
+    {     49, 8}, {     27, 9}, {     15, 8}, {     31, 7}, \
+    {     63, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
+    {     15, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
+    {     79, 9}, {     47,10}, {     31, 9}, {     79,10}, \
+    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255,10}, {     79, 9}, {    159, 8}, {    319,10}, \
+    {     95, 9}, {    191, 8}, {    383,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    143, 9}, {    287,10}, \
+    {    159, 9}, {    319,10}, {    175,11}, {     95,10}, \
+    {    191, 9}, {    383,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 66
+#define MUL_FFT_THRESHOLD                 3712
+
+#define SQR_FFT_MODF_THRESHOLD             240  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    240, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
+    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
+    {     20, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
+    {      7, 7}, {     19, 8}, {     11, 7}, {     25, 9}, \
+    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
+    {     39, 8}, {     23, 7}, {     47, 8}, {     27, 9}, \
+    {     15, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
+    {     15, 9}, {     31, 8}, {     63, 9}, {     39, 8}, \
+    {     79, 9}, {     47,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
+    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255, 9}, {    143,10}, {     79, 9}, {    159, 8}, \
+    {    319, 9}, {    175,10}, {     95, 9}, {    191, 8}, \
+    {    383, 9}, {    207,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    143, 9}, {    287,10}, {    159, 9}, \
+    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    207,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 70
+#define SQR_FFT_THRESHOLD                 2624
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  51
+#define MULLO_MUL_N_THRESHOLD             6633
+
+#define DC_DIV_QR_THRESHOLD                 51
+#define DC_DIVAPPR_Q_THRESHOLD             202
+#define DC_BDIV_QR_THRESHOLD                47
+#define DC_BDIV_Q_THRESHOLD                124
+
+#define INV_MULMOD_BNM1_THRESHOLD           26
+#define INV_NEWTON_THRESHOLD               266
+#define INV_APPR_THRESHOLD                 222
+
+#define BINV_NEWTON_THRESHOLD              296
+#define REDC_1_TO_REDC_N_THRESHOLD          59
+
+#define MU_DIV_QR_THRESHOLD               1334
+#define MU_DIVAPPR_Q_THRESHOLD            1499
+#define MUPI_DIV_QR_THRESHOLD              116
+#define MU_BDIV_QR_THRESHOLD              1057
+#define MU_BDIV_Q_THRESHOLD               1334
+
+#define POWM_SEC_TABLE  6,35,213,724,2618
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      84
+#define HGCD_APPR_THRESHOLD                101
+#define HGCD_REDUCE_THRESHOLD             1437
+#define GCD_DC_THRESHOLD                   372
+#define GCDEXT_DC_THRESHOLD                253
+#define JACOBI_BASE_METHOD                   2
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        27
+#define SET_STR_DC_THRESHOLD               399
+#define SET_STR_PRECOMPUTE_THRESHOLD       885
+
+#define FAC_DSC_THRESHOLD                  179
+#define FAC_ODD_THRESHOLD                   29
diff --git a/mpn/sparc32/ultrasparct1/mul_1.asm b/mpn/sparc32/ultrasparct1/mul_1.asm

new file mode 100644 (file)

index 0000000..fcde0c7
--- /dev/null
+++ b/mpn/sparc32/ultrasparct1/mul_1.asm
@@ -0,0 +1,50 @@
+dnl  SPARC T1 32-bit mpn_mul_1.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:       23
+
+C INPUT PARAMETERS
+define(`rp',   `%o0')
+define(`up',   `%o1')
+define(`n',    `%o2')
+define(`v0',   `%o3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       mov     0, %g4
+       srl     v0, 0, v0
+       srl     n, 0, n
+       dec     n                       C n--
+
+L(top):        lduw    [up+0], %g1
+       add     up, 4, up               C up++
+       mulx    %g1, v0, %g3
+       add     %g4, %g3, %g3
+       stw     %g3, [rp+0]
+       add     rp, 4, rp               C rp++
+       srlx    %g3, 32, %g4
+       brnz    n, L(top)
+       dec     n                       C n--
+
+       retl
+       mov     %g4, %o0                C return value
+EPILOGUE()
diff --git a/mpn/sparc32/ultrasparct1/sqr_diagonal.asm b/mpn/sparc32/ultrasparct1/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..c7aa966
--- /dev/null
+++ b/mpn/sparc32/ultrasparct1/sqr_diagonal.asm
@@ -0,0 +1,44 @@
+dnl  SPARC T1 32-bit mpn_sqr_diagonal.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp',   `%o0')
+define(`up',   `%o1')
+define(`n',    `%o2')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+       deccc   n                       C n--
+       nop
+
+L(top):        lduw    [up+0], %g1
+       add     up, 4, up               C up++
+       mulx    %g1, %g1, %g3
+       stw     %g3, [rp+0]
+       srlx    %g3, 32, %g4
+       stw     %g4, [rp+4]
+       add     rp, 8, rp               C rp += 2
+       bnz     %icc, L(top)
+       deccc   n                       C n--
+
+       retl
+       nop
+EPILOGUE()
diff --git a/mpn/sparc32/ultrasparct1/sub_n.asm b/mpn/sparc32/ultrasparct1/sub_n.asm

new file mode 100644 (file)

index 0000000..a2293a9
--- /dev/null
+++ b/mpn/sparc32/ultrasparct1/sub_n.asm
@@ -0,0 +1,59 @@
+dnl  SPARC T1 32-bit mpn_sub_n.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp',  %o0)
+define(`ap',  %o1)
+define(`bp',  %o2)
+define(`n',   %o3)
+define(`cy',  %o4)
+
+define(`i',   %o3)
+
+MULFUNC_PROLOGUE(mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(mpn_sub_nc)
+       b       L(ent)
+       srl     cy, 0, cy       C strip any bogus high bits
+EPILOGUE()
+
+PROLOGUE(mpn_sub_n)
+       mov     0, cy
+L(ent):        srl     n, 0, n         C strip any bogus high bits
+       sll     n, 2, n
+       add     ap, n, ap
+       add     bp, n, bp
+       add     rp, n, rp
+       neg     n, i
+
+L(top):        lduw    [ap+i], %g1
+       lduw    [bp+i], %g2
+       sub     %g1, %g2, %g3
+       sub     %g3, cy, %g3
+       stw     %g3, [rp+i]
+       add     i, 4, i
+       brnz    i, L(top)
+       srlx    %g3, 63, cy
+
+       retl
+       mov     cy, %o0         C return value
+EPILOGUE()
diff --git a/mpn/sparc32/ultrasparct1/submul_1.asm b/mpn/sparc32/ultrasparct1/submul_1.asm

new file mode 100644 (file)

index 0000000..605a882
--- /dev/null
+++ b/mpn/sparc32/ultrasparct1/submul_1.asm
@@ -0,0 +1,52 @@
+dnl  SPARC T1 32-bit mpn_submul_1.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:       27
+
+C INPUT PARAMETERS
+define(`rp',   `%o0')
+define(`up',   `%o1')
+define(`n',    `%o2')
+define(`v0',   `%o3')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       subcc   %g0, %g0, %g4           C clear CF and g4
+       srl     v0, 0, v0
+       srl     n, 0, n
+       dec     n                       C n--
+
+L(top):        lduw    [up+0], %g1
+       add     up, 4, up               C up++
+       mulx    %g1, v0, %g3
+       lduw    [rp+0], %g2
+       addx    %g4, %g3, %g3
+       srlx    %g3, 32, %g4
+       subcc   %g2, %g3, %g3
+       stw     %g3, [rp+0]
+       add     rp, 4, rp               C rp++
+       brnz    n, L(top)
+       dec     n                       C n--
+
+       retl
+       addx    %g4, 0, %o0             C return value
+EPILOGUE()
diff --git a/mpn/sparc32/v9/gmp-mparam.h b/mpn/sparc32/v9/gmp-mparam.h

index ab401ffedbc2cacc440c305b5b040aef1f75135b..40da333cf2b5dc6d340ba12132ddc24616f130ff 100644 (file)
--- a/mpn/sparc32/v9/gmp-mparam.h
+++ b/mpn/sparc32/v9/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* SPARC v9 32-bit gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009, 2010 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009, 2010, 2011
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -22,77 +22,136 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define BYTES_PER_MP_LIMB 4
  
  #define DIVREM_1_NORM_THRESHOLD              3
-#define DIVREM_1_UNNORM_THRESHOLD            5
-#define MOD_1_NORM_THRESHOLD                 4
-#define MOD_1_UNNORM_THRESHOLD               7
-#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define DIVREM_1_UNNORM_THRESHOLD            4
+#define MOD_1_1P_METHOD                      2
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         11
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         11
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     61
  #define USE_PREINV_DIVREM_1                  1
-#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
  #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
  
-#define MUL_TOOM22_THRESHOLD                32
-#define MUL_TOOM33_THRESHOLD                96
-#define MUL_TOOM44_THRESHOLD               143
-#define MUL_TOOM6H_THRESHOLD               216
-#define MUL_TOOM8H_THRESHOLD               494
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      96
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     145
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      92
-
-#define SQR_BASECASE_THRESHOLD              12
-#define SQR_TOOM2_THRESHOLD                 62
-#define SQR_TOOM3_THRESHOLD                103
-#define SQR_TOOM4_THRESHOLD                274
-#define SQR_TOOM6_THRESHOLD                274
-#define SQR_TOOM8_THRESHOLD                542
-
-#define MULMOD_BNM1_THRESHOLD               14
-#define SQRMOD_BNM1_THRESHOLD               21
-
-#define MUL_FFT_TABLE  { 272, 736, 1152, 3584, 10240, 24576, 98304, 917504, 0 }
-#define MUL_FFT_MODF_THRESHOLD             248
-#define MUL_FFT_THRESHOLD                 2112
-
-#define SQR_FFT_TABLE  { 336, 800, 1408, 3584, 10240, 24576, 98304, 393216, 0 }
-#define SQR_FFT_MODF_THRESHOLD             248
-#define SQR_FFT_THRESHOLD                 2112
+#define MUL_TOOM22_THRESHOLD                27
+#define MUL_TOOM33_THRESHOLD               112
+#define MUL_TOOM44_THRESHOLD               124
+#define MUL_TOOM6H_THRESHOLD               160
+#define MUL_TOOM8H_THRESHOLD               242
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      69
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      93
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      71
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      53
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      70
+
+#define SQR_BASECASE_THRESHOLD               5
+#define SQR_TOOM2_THRESHOLD                 64
+#define SQR_TOOM3_THRESHOLD                 85
+#define SQR_TOOM4_THRESHOLD                158
+#define SQR_TOOM6_THRESHOLD                185
+#define SQR_TOOM8_THRESHOLD                224
+
+#define MULMID_TOOM42_THRESHOLD             64
+
+#define MULMOD_BNM1_THRESHOLD               11
+#define SQRMOD_BNM1_THRESHOLD               16
+
+#define MUL_FFT_MODF_THRESHOLD             212  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    212, 5}, {     11, 6}, {      7, 5}, {     17, 6}, \
+    {      9, 5}, {     20, 6}, {     13, 7}, {      7, 6}, \
+    {     16, 7}, {      9, 6}, {     20, 7}, {     13, 8}, \
+    {      7, 7}, {     19, 8}, {     11, 7}, {     25, 9}, \
+    {      7, 8}, {     15, 7}, {     31, 8}, {     19, 7}, \
+    {     39, 8}, {     27, 9}, {     15, 8}, {     39, 9}, \
+    {     23,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 7}, {    159, 8}, {     83, 7}, \
+    {    175, 8}, {     91, 9}, {     47, 8}, {     95,10}, \
+    {     31, 9}, {     63, 8}, {    127, 9}, {     71, 8}, \
+    {    143, 9}, {     79, 8}, {    159, 9}, {     87,10}, \
+    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255, 9}, {    143,10}, {     79, 9}, \
+    {    175,10}, {     95, 9}, {    191, 8}, {    415,10}, \
+    {    111,11}, {     63,10}, {    127, 9}, {    271,10}, \
+    {    143, 9}, {    287, 8}, {    575,10}, {    175,11}, \
+    {     95,10}, {    191, 9}, {    415, 8}, {    831,10}, \
+    {    223,12}, {     63,11}, {    127,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319, 9}, {    639, 8}, \
+    {   1407,11}, {    191,10}, {    415, 9}, {    831,11}, \
+    {    223,10}, {    447,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 86
+#define MUL_FFT_THRESHOLD                 2688
+
+#define SQR_FFT_MODF_THRESHOLD             180  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    180, 5}, {      6, 4}, {     13, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
+    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
+    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
+    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
+    {     31, 8}, {     23, 9}, {     15, 8}, {     39, 9}, \
+    {     23,10}, {     15, 9}, {     31, 8}, {     63, 7}, \
+    {    127, 9}, {     47,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     71, 8}, {    143, 7}, {    287, 6}, \
+    {    575,10}, {     47, 9}, {     95,11}, {     31,10}, \
+    {     63, 9}, {    127, 8}, {    255, 9}, {    143,10}, \
+    {     79, 9}, {    159, 8}, {    319, 9}, {    175, 8}, \
+    {    351, 7}, {    703,10}, {     95, 9}, {    191, 8}, \
+    {    383, 9}, {    207,10}, {    111,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    143, 9}, {    287, 8}, \
+    {    575,10}, {    159, 9}, {    319,10}, {    175, 9}, \
+    {    351, 8}, {    703,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    207, 9}, {    415, 8}, {    831,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    543,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    351, 9}, {    703, 8}, {   1407,11}, \
+    {    191,10}, {    415, 9}, {    831,11}, {    223,10}, \
+    {    447, 9}, {    895,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 94
+#define SQR_FFT_THRESHOLD                 1856
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 106
-#define MULLO_MUL_N_THRESHOLD             3493
+#define MULLO_DC_THRESHOLD                 145
+#define MULLO_MUL_N_THRESHOLD             5333
  
-#define DC_DIV_QR_THRESHOLD                123
-#define DC_DIVAPPR_Q_THRESHOLD             396
-#define DC_BDIV_QR_THRESHOLD               121
-#define DC_BDIV_Q_THRESHOLD                280
+#define DC_DIV_QR_THRESHOLD                 78
+#define DC_DIVAPPR_Q_THRESHOLD             414
+#define DC_BDIV_QR_THRESHOLD                75
+#define DC_BDIV_Q_THRESHOLD                360
  
-#define INV_MULMOD_BNM1_THRESHOLD           62
+#define INV_MULMOD_BNM1_THRESHOLD           52
  #define INV_NEWTON_THRESHOLD               351
-#define INV_APPR_THRESHOLD                 357
-
-#define BINV_NEWTON_THRESHOLD              324
-#define REDC_1_TO_REDC_N_THRESHOLD          78
-
-#define MU_DIV_QR_THRESHOLD               1895
-#define MU_DIVAPPR_Q_THRESHOLD            1895
-#define MUPI_DIV_QR_THRESHOLD              122
-#define MU_BDIV_QR_THRESHOLD               872
-#define MU_BDIV_Q_THRESHOLD               2801
-
-#define MATRIX22_STRASSEN_THRESHOLD         13
-#define HGCD_THRESHOLD                     144
-#define GCD_DC_THRESHOLD                   630
-#define GCDEXT_DC_THRESHOLD                416
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                 9
-#define GET_STR_PRECOMPUTE_THRESHOLD        17
-#define SET_STR_DC_THRESHOLD               537
-#define SET_STR_PRECOMPUTE_THRESHOLD      1576
+#define INV_APPR_THRESHOLD                 354
+
+#define BINV_NEWTON_THRESHOLD              234
+#define REDC_1_TO_REDC_N_THRESHOLD          60
+
+#define MU_DIV_QR_THRESHOLD                855
+#define MU_DIVAPPR_Q_THRESHOLD            1099
+#define MUPI_DIV_QR_THRESHOLD              112
+#define MU_BDIV_QR_THRESHOLD               839
+#define MU_BDIV_Q_THRESHOLD                979
+
+#define POWM_SEC_TABLE  4,23,127,453,1679,2870
+
+#define MATRIX22_STRASSEN_THRESHOLD          9
+#define HGCD_THRESHOLD                      87
+#define HGCD_APPR_THRESHOLD                126
+#define HGCD_REDUCE_THRESHOLD             1679
+#define GCD_DC_THRESHOLD                   283
+#define GCDEXT_DC_THRESHOLD                189
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        28
+#define SET_STR_DC_THRESHOLD               262
+#define SET_STR_PRECOMPUTE_THRESHOLD       548
+
+#define FAC_DSC_THRESHOLD                  156
+#define FAC_ODD_THRESHOLD                   28
diff --git a/mpn/sparc64/README b/mpn/sparc64/README

index 19072996de48b13fc6b706fa6102c93e203bb481..fada1926c054873711a2c7305960c09a1b9416c5 100644 (file)
--- a/mpn/sparc64/README
+++ b/mpn/sparc64/README
@@ -65,7 +65,7 @@ Integer conditional move instructions cannot dual-issue with other integer
  instructions.  No conditional move can issue 1-5 cycles after a load.  (This
  might have been fixed for UltraSPARC-3.)
  
-The UltraSPARC-3 pipeline is very simular to he one of UltraSPARC-1/2 , but is
+The UltraSPARC-3 pipeline is very simular to the one of UltraSPARC-1/2 , but is
  somewhat slower.  Branches execute slower, and there may be other new stalls.
  But integer multiply doesn't stall the entire CPU and also has a much lower
  latency.  But it's still not pipelined, and thus useless for our needs.
diff --git a/mpn/sparc64/add_n.asm b/mpn/sparc64/add_n.asm

deleted file mode 100644 (file)

index c3e5b46..0000000
--- a/mpn/sparc64/add_n.asm
+++ /dev/null
@@ -1,220 +0,0 @@
-dnl  SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and
-dnl  store sum in a third limb vector.
-
-dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                 cycles/limb
-C UltraSPARC 1&2:     4
-C UltraSPARC 3:              4.5
-
-C Compute carry-out from the most significant bits of u,v, and r, where
-C r=u+v+carry_in, using logic operations.
-
-C This code runs at 4 cycles/limb on UltraSPARC 1 and 2.  It has a 4 insn
-C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
-C Therefore, it seems futile to try to optimize this any further...
-
-C INPUT PARAMETERS
-define(`rp',`%i0')
-define(`up',`%i1')
-define(`vp',`%i2')
-define(`n',`%i3')
-
-define(`u0',`%l0')
-define(`u1',`%l2')
-define(`u2',`%l4')
-define(`u3',`%l6')
-define(`v0',`%l1')
-define(`v1',`%l3')
-define(`v2',`%l5')
-define(`v3',`%l7')
-
-define(`cy',`%i4')
-
-define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
-define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
-
-ASM_START()
-       REGISTER(%g2,#scratch)
-       REGISTER(%g3,#scratch)
-PROLOGUE(mpn_add_n)
-       save    %sp,-160,%sp
-
-       fitod   %f0,%f0         C make sure f0 contains small, quiet number
-       subcc   n,4,%g0
-       bl,pn   %icc,.Loop0
-       mov     0,cy
-
-       ldx     [up+0],u0
-       ldx     [vp+0],v0
-       add     up,32,up
-       ldx     [up-24],u1
-       ldx     [vp+8],v1
-       add     vp,32,vp
-       ldx     [up-16],u2
-       ldx     [vp-16],v2
-       ldx     [up-8],u3
-       ldx     [vp-8],v3
-       subcc   n,8,n
-       add     u0,v0,%g1       C main add
-       add     %g1,cy,%g4      C carry add
-       or      u0,v0,%g2
-       bl,pn   %icc,.Lend4567
-       fanop
-       b,a     .Loop
-
-       .align  16
-C START MAIN LOOP
-.Loop: andn    %g2,%g4,%g2
-       and     u0,v0,%g3
-       ldx     [up+0],u0
-       fanop
-C --
-       or      %g3,%g2,%g2
-       ldx     [vp+0],v0
-       add     up,32,up
-       fanop
-C --
-       srlx    %g2,63,cy
-       add     u1,v1,%g1
-       stx     %g4,[rp+0]
-       fanop
-C --
-       add     %g1,cy,%g4
-       or      u1,v1,%g2
-       fmnop
-       fanop
-C --
-       andn    %g2,%g4,%g2
-       and     u1,v1,%g3
-       ldx     [up-24],u1
-       fanop
-C --
-       or      %g3,%g2,%g2
-       ldx     [vp+8],v1
-       add     vp,32,vp
-       fanop
-C --
-       srlx    %g2,63,cy
-       add     u2,v2,%g1
-       stx     %g4,[rp+8]
-       fanop
-C --
-       add     %g1,cy,%g4
-       or      u2,v2,%g2
-       fmnop
-       fanop
-C --
-       andn    %g2,%g4,%g2
-       and     u2,v2,%g3
-       ldx     [up-16],u2
-       fanop
-C --
-       or      %g3,%g2,%g2
-       ldx     [vp-16],v2
-       add     rp,32,rp
-       fanop
-C --
-       srlx    %g2,63,cy
-       add     u3,v3,%g1
-       stx     %g4,[rp-16]
-       fanop
-C --
-       add     %g1,cy,%g4
-       or      u3,v3,%g2
-       fmnop
-       fanop
-C --
-       andn    %g2,%g4,%g2
-       and     u3,v3,%g3
-       ldx     [up-8],u3
-       fanop
-C --
-       or      %g3,%g2,%g2
-       subcc   n,4,n
-       ldx     [vp-8],v3
-       fanop
-C --
-       srlx    %g2,63,cy
-       add     u0,v0,%g1
-       stx     %g4,[rp-8]
-       fanop
-C --
-       add     %g1,cy,%g4
-       or      u0,v0,%g2
-       bge,pt  %icc,.Loop
-       fanop
-C END MAIN LOOP
-.Lend4567:
-       andn    %g2,%g4,%g2
-       and     u0,v0,%g3
-       or      %g3,%g2,%g2
-       srlx    %g2,63,cy
-       add     u1,v1,%g1
-       stx     %g4,[rp+0]
-       add     %g1,cy,%g4
-       or      u1,v1,%g2
-       andn    %g2,%g4,%g2
-       and     u1,v1,%g3
-       or      %g3,%g2,%g2
-       srlx    %g2,63,cy
-       add     u2,v2,%g1
-       stx     %g4,[rp+8]
-       add     %g1,cy,%g4
-       or      u2,v2,%g2
-       andn    %g2,%g4,%g2
-       and     u2,v2,%g3
-       or      %g3,%g2,%g2
-       add     rp,32,rp
-       srlx    %g2,63,cy
-       add     u3,v3,%g1
-       stx     %g4,[rp-16]
-       add     %g1,cy,%g4
-       or      u3,v3,%g2
-       andn    %g2,%g4,%g2
-       and     u3,v3,%g3
-       or      %g3,%g2,%g2
-       srlx    %g2,63,cy
-       stx     %g4,[rp-8]
-
-       addcc   n,4,n
-       bz,pn   %icc,.Lret
-       fanop
-
-.Loop0:        ldx     [up],u0
-       add     up,8,up
-       ldx     [vp],v0
-       add     vp,8,vp
-       add     rp,8,rp
-       subcc   n,1,n
-       add     u0,v0,%g1
-       or      u0,v0,%g2
-       add     %g1,cy,%g4
-       and     u0,v0,%g3
-       andn    %g2,%g4,%g2
-       stx     %g4,[rp-8]
-       or      %g3,%g2,%g2
-       bnz,pt  %icc,.Loop0
-       srlx    %g2,63,cy
-
-.Lret: mov     cy,%i0
-       ret
-       restore
-EPILOGUE(mpn_add_n)
diff --git a/mpn/sparc64/addmul_1.asm b/mpn/sparc64/addmul_1.asm

deleted file mode 100644 (file)

index bd83c65..0000000
--- a/mpn/sparc64/addmul_1.asm
+++ /dev/null
@@ -1,596 +0,0 @@
-dnl  SPARC v9 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add
-dnl  the result to a second limb vector.
-
-dnl  Copyright 1998, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
-dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                 cycles/limb
-C UltraSPARC 1&2:     14
-C UltraSPARC 3:              17.5
-
-C Algorithm: We use eight floating-point multiplies per limb product, with the
-C invariant v operand split into four 16-bit pieces, and the up operand split
-C into 32-bit pieces.  We sum pairs of 48-bit partial products using
-C floating-point add, then convert the four 49-bit product-sums and transfer
-C them to the integer unit.
-
-C Possible optimizations:
-C   0. Rewrite to use algorithm of mpn_addmul_2.
-C   1. Align the stack area where we transfer the four 49-bit product-sums
-C      to a 32-byte boundary.  That would minimize the cache collision.
-C      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would
-C      be to align the area to map to the area immediately before up?)
-C   2. Sum the 4 49-bit quantities using 32-bit operations, as in the
-C      develop mpn_addmul_2.  This would save many integer instructions.
-C   3. Unrolling.  Questionable if it is worth the code expansion, given that
-C      it could only save 1 cycle/limb.
-C   4. Specialize for particular v values.  If its upper 32 bits are zero, we
-C      could save many operations, in the FPU (fmuld), but more so in the IEU
-C      since we'll be summing 48-bit quantities, which might be simpler.
-C   5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
-C      the i00,i16,i32,i48 RAW less apart.  The latter apart-scheduling should
-C      not be greater than needed for L2 cache latency, and also not so great
-C      that i16 needs to be copied.
-C   6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
-C      to get high IEU bandwidth.  (12 of the 14 cycles will be free for 2 IEU
-C      ops.)
-
-C Instruction classification (as per UltraSPARC-1/2 functional units):
-C    8 FM
-C   10 FA
-C   12 MEM
-C   10 ISHIFT + 14 IADDLOG
-C    1 BRANCH
-C   55 insns totally (plus one mov insn that should be optimized out)
-
-C The loop executes 56 instructions in 14 cycles on UltraSPARC-1/2, i.e we
-C sustain the peak execution rate of 4 instructions/cycle.
-
-C INPUT PARAMETERS
-C rp   i0
-C up   i1
-C n    i2
-C v    i3
-
-ASM_START()
-       REGISTER(%g2,#scratch)
-       REGISTER(%g3,#scratch)
-
-define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
-define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
-define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
-define(`u00',`%f32') define(`u32', `%f34')
-define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
-define(`cy',`%g1')
-define(`rlimb',`%g3')
-define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
-define(`xffffffff',`%l7')
-define(`xffff',`%o0')
-
-PROLOGUE(mpn_addmul_1)
-
-C Initialization.  (1) Split v operand into four 16-bit chunks and store them
-C as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs
-C f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.
-
-       save    %sp, -256, %sp
-       mov     -1, %g4
-       srlx    %g4, 48, xffff          C store mask in register `xffff'
-       and     %i3, xffff, %g2
-       stx     %g2, [%sp+2223+0]
-       srlx    %i3, 16, %g3
-       and     %g3, xffff, %g3
-       stx     %g3, [%sp+2223+8]
-       srlx    %i3, 32, %g2
-       and     %g2, xffff, %g2
-       stx     %g2, [%sp+2223+16]
-       srlx    %i3, 48, %g3
-       stx     %g3, [%sp+2223+24]
-       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
-
-       sllx    %i2, 3, %i2
-       mov     0, cy                   C clear cy
-       add     %i0, %i2, %i0
-       add     %i1, %i2, %i1
-       neg     %i2
-       add     %i1, 4, %i5
-       add     %i0, -32, %i4
-       add     %i0, -16, %i0
-
-       ldd     [%sp+2223+0], v00
-       ldd     [%sp+2223+8], v16
-       ldd     [%sp+2223+16], v32
-       ldd     [%sp+2223+24], v48
-       ld      [%sp+2223+0],%f2        C zero f2
-       ld      [%sp+2223+0],%f4        C zero f4
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fxtod   v00, v00
-       fxtod   v16, v16
-       fxtod   v32, v32
-       fxtod   v48, v48
-
-C Start real work.  (We sneakingly read f3 and f5 above...)
-C The software pipeline is very deep, requiring 4 feed-in stages.
-
-       fxtod   %f2, u00
-       fxtod   %f4, u32
-       fmuld   u00, v00, a00
-       fmuld   u00, v16, a16
-       fmuld   u00, v32, p32
-       fmuld   u32, v00, r32
-       fmuld   u00, v48, p48
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .L_two_or_more
-       fmuld   u32, v16, r48
-
-.L_one:
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       fdtox   a00, a00
-       faddd   p48, r48, a48
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       fdtox   a32, a32
-       fdtox   a48, a48
-       std     a00, [%sp+2223+0]
-       std     a16, [%sp+2223+8]
-       std     a32, [%sp+2223+16]
-       std     a48, [%sp+2223+24]
-       add     %i2, 8, %i2
-
-       fdtox   r64, a00
-       ldx     [%i0+%i2], rlimb        C read rp[i]
-       fdtox   r80, a16
-       ldx     [%sp+2223+0], i00
-       ldx     [%sp+2223+8], i16
-       ldx     [%sp+2223+16], i32
-       ldx     [%sp+2223+24], i48
-       std     a00, [%sp+2223+0]
-       std     a16, [%sp+2223+8]
-       add     %i2, 8, %i2
-
-       srlx    rlimb, 32, %g4          C HI(rlimb)
-       and     rlimb, xffffffff, %g5   C LO(rlimb)
-       add     i00, %g5, %g5           C i00+ now in g5
-       ldx     [%sp+2223+0], i00
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       add     i32, %g4, %g4           C i32+ now in g4
-       sllx    i48, 32, %l6            C (i48 << 32)
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       add     %l6, %o2, %o2           C mi64- in %o2
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       b       .L_out_1
-       add     %i2, 8, %i2
-
-.L_two_or_more:
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fdtox   a00, a00
-       faddd   p48, r48, a48
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       fdtox   a32, a32
-       fxtod   %f2, u00
-       fxtod   %f4, u32
-       fdtox   a48, a48
-       std     a00, [%sp+2223+0]
-       fmuld   u00, v00, p00
-       std     a16, [%sp+2223+8]
-       fmuld   u00, v16, p16
-       std     a32, [%sp+2223+16]
-       fmuld   u00, v32, p32
-       std     a48, [%sp+2223+24]
-       faddd   p00, r64, a00
-       fmuld   u32, v00, r32
-       faddd   p16, r80, a16
-       fmuld   u00, v48, p48
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .L_three_or_more
-       fmuld   u32, v16, r48
-
-.L_two:
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       fdtox   a00, a00
-       ldx     [%i0+%i2], rlimb        C read rp[i]
-       faddd   p48, r48, a48
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       ldx     [%sp+2223+8], i16
-       ldx     [%sp+2223+16], i32
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       std     a00, [%sp+2223+0]
-       std     a16, [%sp+2223+8]
-       std     a32, [%sp+2223+16]
-       std     a48, [%sp+2223+24]
-       add     %i2, 8, %i2
-
-       fdtox   r64, a00
-       srlx    rlimb, 32, %g4          C HI(rlimb)
-       and     rlimb, xffffffff, %g5   C LO(rlimb)
-       ldx     [%i0+%i2], rlimb        C read rp[i]
-       add     i00, %g5, %g5           C i00+ now in g5
-       fdtox   r80, a16
-       ldx     [%sp+2223+0], i00
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       add     i32, %g4, %g4           C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       ldx     [%sp+2223+24], i48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       add     %l6, %o2, %o2           C mi64- in %o2
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       b       .L_out_2
-       add     %i2, 8, %i2
-
-.L_three_or_more:
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fdtox   a00, a00
-       ldx     [%i0+%i2], rlimb        C read rp[i]
-       faddd   p48, r48, a48
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       ldx     [%sp+2223+8], i16
-       fxtod   %f2, u00
-       ldx     [%sp+2223+16], i32
-       fxtod   %f4, u32
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       std     a00, [%sp+2223+0]
-       fmuld   u00, v00, p00
-       std     a16, [%sp+2223+8]
-       fmuld   u00, v16, p16
-       std     a32, [%sp+2223+16]
-       fmuld   u00, v32, p32
-       std     a48, [%sp+2223+24]
-       faddd   p00, r64, a00
-       fmuld   u32, v00, r32
-       faddd   p16, r80, a16
-       fmuld   u00, v48, p48
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .L_four_or_more
-       fmuld   u32, v16, r48
-
-.L_three:
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       fdtox   a00, a00
-       srlx    rlimb, 32, %g4          C HI(rlimb)
-       and     rlimb, xffffffff, %g5   C LO(rlimb)
-       ldx     [%i0+%i2], rlimb        C read rp[i]
-       faddd   p48, r48, a48
-       add     i00, %g5, %g5           C i00+ now in g5
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       add     i32, %g4, %g4           C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       std     a32, [%sp+2223+16]
-       add     %l6, %o2, %o2           C mi64- in %o2
-       std     a48, [%sp+2223+24]
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       b       .L_out_3
-       add     %i2, 8, %i2
-
-.L_four_or_more:
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fdtox   a00, a00
-       srlx    rlimb, 32, %g4          C HI(rlimb)
-       and     rlimb, xffffffff, %g5   C LO(rlimb)
-       ldx     [%i0+%i2], rlimb        C read rp[i]
-       faddd   p48, r48, a48
-       add     i00, %g5, %g5           C i00+ now in g5
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       fxtod   %f2, u00
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       add     i32, %g4, %g4           C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       fxtod   %f4, u32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       fmuld   u00, v00, p00
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       fmuld   u00, v16, p16
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       std     a32, [%sp+2223+16]
-       fmuld   u00, v32, p32
-       add     %l6, %o2, %o2           C mi64- in %o2
-       std     a48, [%sp+2223+24]
-       faddd   p00, r64, a00
-       fmuld   u32, v00, r32
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       faddd   p16, r80, a16
-       fmuld   u00, v48, p48
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .Loop
-       fmuld   u32, v16, r48
-
-.L_four:
-       b,a     .L_out_4
-
-C BEGIN MAIN LOOP
-       .align  16
-.Loop:
-C 00
-       srlx    %o4, 16, %o5            C (x >> 16)
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-C 01
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fdtox   a00, a00
-C 02
-       srlx    rlimb, 32, %g4          C HI(rlimb)
-       and     rlimb, xffffffff, %g5   C LO(rlimb)
-       ldx     [%i0+%i2], rlimb        C read rp[i]
-       faddd   p48, r48, a48
-C 03
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       add     i00, %g5, %g5           C i00+ now in g5
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-C 04
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-C 05
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       fxtod   %f2, u00
-C 06
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       add     i32, %g4, %g4           C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       fxtod   %f4, u32
-C 07
-       sllx    i48, 32, %l6            C (i48 << 32)
-       or      %i3, %o5, %o5
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-C 08
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       fmuld   u00, v00, p00
-C 09
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       fmuld   u00, v16, p16
-C 10
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       std     a32, [%sp+2223+16]
-       fmuld   u00, v32, p32
-C 11
-       add     %l6, %o2, %o2           C mi64- in %o2
-       std     a48, [%sp+2223+24]
-       faddd   p00, r64, a00
-       fmuld   u32, v00, r32
-C 12
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       stx     %o5, [%i4+%i2]
-       faddd   p16, r80, a16
-       fmuld   u00, v48, p48
-C 13
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .Loop
-       fmuld   u32, v16, r48
-C END MAIN LOOP
-
-.L_out_4:
-       srlx    %o4, 16, %o5            C (x >> 16)
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       fdtox   a00, a00
-       srlx    rlimb, 32, %g4          C HI(rlimb)
-       and     rlimb, xffffffff, %g5   C LO(rlimb)
-       ldx     [%i0+%i2], rlimb        C read rp[i]
-       faddd   p48, r48, a48
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       add     i00, %g5, %g5           C i00+ now in g5
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       add     i32, %g4, %g4           C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       or      %i3, %o5, %o5
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       std     a32, [%sp+2223+16]
-       add     %l6, %o2, %o2           C mi64- in %o2
-       std     a48, [%sp+2223+24]
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       stx     %o5, [%i4+%i2]
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       add     %i2, 8, %i2
-.L_out_3:
-       srlx    %o4, 16, %o5            C (x >> 16)
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       fdtox   r64, a00
-       srlx    rlimb, 32, %g4          C HI(rlimb)
-       and     rlimb, xffffffff, %g5   C LO(rlimb)
-       ldx     [%i0+%i2], rlimb        C read rp[i]
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       add     i00, %g5, %g5           C i00+ now in g5
-       fdtox   r80, a16
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       ldx     [%sp+2223+0], i00
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       add     i32, %g4, %g4           C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       or      %i3, %o5, %o5
-       ldx     [%sp+2223+24], i48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       add     %l6, %o2, %o2           C mi64- in %o2
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       stx     %o5, [%i4+%i2]
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       add     %i2, 8, %i2
-.L_out_2:
-       srlx    %o4, 16, %o5            C (x >> 16)
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       srlx    rlimb, 32, %g4          C HI(rlimb)
-       and     rlimb, xffffffff, %g5   C LO(rlimb)
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       add     i00, %g5, %g5           C i00+ now in g5
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       ldx     [%sp+2223+0], i00
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       add     i32, %g4, %g4           C i32+ now in g4
-       sllx    i48, 32, %l6            C (i48 << 32)
-       or      %i3, %o5, %o5
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       add     %l6, %o2, %o2           C mi64- in %o2
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       stx     %o5, [%i4+%i2]
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       add     %i2, 8, %i2
-.L_out_1:
-       srlx    %o4, 16, %o5            C (x >> 16)
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       or      %i3, %o5, %o5
-       stx     %o5, [%i4+%i2]
-
-       sllx    i00, 0, %g2
-       add     %g2, cy, cy
-       sllx    i16, 16, %g3
-       add     %g3, cy, cy
-
-       return  %i7+8
-       mov     cy, %o0
-EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc64/addmul_2.asm b/mpn/sparc64/addmul_2.asm

deleted file mode 100644 (file)

index 65efb51..0000000
--- a/mpn/sparc64/addmul_2.asm
+++ /dev/null
@@ -1,540 +0,0 @@
-dnl  SPARC v9 64-bit mpn_addmul_2 -- Multiply an n limb number with 2-limb
-dnl  number and add the result to a n limb vector.
-
-dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C UltraSPARC 1&2:      9
-C UltraSPARC 3:       10
-
-C Algorithm: We use 16 floating-point multiplies per limb product, with the
-C 2-limb v operand split into eight 16-bit pieces, and the n-limb u operand
-C split into 32-bit pieces.  We sum four 48-bit partial products using
-C floating-point add, then convert the resulting four 50-bit quantities and
-C transfer them to the integer unit.
-
-C Possible optimizations:
-C   1. Align the stack area where we transfer the four 50-bit product-sums
-C      to a 32-byte boundary.  That would minimize the cache collision.
-C      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would
-C      be to align the area to map to the area immediately before up?)
-C   2. Perform two of the fp->int conversions with integer instructions.  We
-C      can get almost ten free IEU slots, if we clean up bookkeeping and the
-C      silly carry-limb code.
-C   3. For an mpn_addmul_1 based on this, we need to fix the silly carry-limb
-C      code.
-
-C OSP (Overlapping software pipeline) version of mpn_mul_basecase:
-C Operand swap will require 8 LDDA and 8 FXTOD, which will mean 8 cycles.
-C FI   = 20
-C L    =  9 x un * vn
-C WDFI = 10 x vn / 2
-C WD   = 4
-
-C Instruction classification (as per UltraSPARC functional units).
-C Assuming silly carry code is fixed.  Includes bookkeeping.
-C
-C               mpn_addmul_X     mpn_mul_X
-C                1       2       1       2
-C               ==========      ==========
-C      FM        8      16       8      16
-C      FA       10      18      10      18
-C     MEM       12      12      10      10
-C  ISHIFT        6       6       6       6
-C IADDLOG       11      11      10      10
-C  BRANCH        1       1       1       1
-C
-C TOTAL IEU     17      17      16      16
-C TOTAL         48      64      45      61
-C
-C IEU cycles     8.5     8.5     8       8
-C MEM cycles    12      12      10      10
-C ISSUE cycles  12      16      11.25   15.25
-C FPU cycles    10      18      10      18
-C cycles/loop   12      18      12      18
-C cycles/limb   12       9      12       9
-
-
-C INPUT PARAMETERS
-C rp[n + 1]    i0
-C up[n]                i1
-C n            i2
-C vp[2]                i3
-
-
-ASM_START()
-       REGISTER(%g2,#scratch)
-       REGISTER(%g3,#scratch)
-
-C Combine registers:
-C u00_hi= u32_hi
-C u00_lo= u32_lo
-C a000  = out000
-C a016  = out016
-C Free: f52 f54
-
-
-define(`p000', `%f8')  define(`p016',`%f10')
-define(`p032',`%f12')  define(`p048',`%f14')
-define(`p064',`%f16')  define(`p080',`%f18')
-define(`p096a',`%f20') define(`p112a',`%f22')
-define(`p096b',`%f56') define(`p112b',`%f58')
-
-define(`out000',`%f0') define(`out016',`%f6')
-
-define(`v000',`%f24')  define(`v016',`%f26')
-define(`v032',`%f28')  define(`v048',`%f30')
-define(`v064',`%f44')  define(`v080',`%f46')
-define(`v096',`%f48')  define(`v112',`%f50')
-
-define(`u00',`%f32')   define(`u32', `%f34')
-
-define(`a000',`%f36')  define(`a016',`%f38')
-define(`a032',`%f40')  define(`a048',`%f42')
-define(`a064',`%f60')  define(`a080',`%f62')
-
-define(`u00_hi',`%f2') define(`u32_hi',`%f4')
-define(`u00_lo',`%f3') define(`u32_lo',`%f5')
-
-define(`cy',`%g1')
-define(`rlimb',`%g3')
-define(`i00',`%l0')    define(`i16',`%l1')
-define(`r00',`%l2')    define(`r32',`%l3')
-define(`xffffffff',`%l7')
-define(`xffff',`%o0')
-
-
-PROLOGUE(mpn_addmul_2)
-
-C Initialization.  (1) Split v operand into eight 16-bit chunks and store them
-C as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs
-C f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.
-C This code could be better scheduled.
-
-       save    %sp, -256, %sp
-
-ifdef(`HAVE_VIS',
-`      mov     -1, %g4
-       wr      %g0, 0xD2, %asi
-       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
-       ldda    [%i3+6] %asi, v000
-       ldda    [%i3+4] %asi, v016
-       ldda    [%i3+2] %asi, v032
-       ldda    [%i3+0] %asi, v048
-       fxtod   v000, v000
-       ldda    [%i3+14] %asi, v064
-       fxtod   v016, v016
-       ldda    [%i3+12] %asi, v080
-       fxtod   v032, v032
-       ldda    [%i3+10] %asi, v096
-       fxtod   v048, v048
-       ldda    [%i3+8] %asi, v112
-       fxtod   v064, v064
-       fxtod   v080, v080
-       fxtod   v096, v096
-       fxtod   v112, v112
-       fzero   u00_hi
-       fzero   u32_hi
-',
-`      mov     -1, %g4
-       ldx     [%i3+0], %l0            C vp[0]
-       srlx    %g4, 48, xffff          C store mask in register `xffff'
-       ldx     [%i3+8], %l1            C vp[1]
-
-       and     %l0, xffff, %g2
-       stx     %g2, [%sp+2223+0]
-       srlx    %l0, 16, %g3
-       and     %g3, xffff, %g3
-       stx     %g3, [%sp+2223+8]
-       srlx    %l0, 32, %g2
-       and     %g2, xffff, %g2
-       stx     %g2, [%sp+2223+16]
-       srlx    %l0, 48, %g3
-       stx     %g3, [%sp+2223+24]
-       and     %l1, xffff, %g2
-       stx     %g2, [%sp+2223+32]
-       srlx    %l1, 16, %g3
-       and     %g3, xffff, %g3
-       stx     %g3, [%sp+2223+40]
-       srlx    %l1, 32, %g2
-       and     %g2, xffff, %g2
-       stx     %g2, [%sp+2223+48]
-       srlx    %l1, 48, %g3
-       stx     %g3, [%sp+2223+56]
-
-       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
-
-       ldd     [%sp+2223+0], v000
-       ldd     [%sp+2223+8], v016
-       ldd     [%sp+2223+16], v032
-       ldd     [%sp+2223+24], v048
-       fxtod   v000, v000
-       ldd     [%sp+2223+32], v064
-       fxtod   v016, v016
-       ldd     [%sp+2223+40], v080
-       fxtod   v032, v032
-       ldd     [%sp+2223+48], v096
-       fxtod   v048, v048
-       ldd     [%sp+2223+56], v112
-       fxtod   v064, v064
-       ld      [%sp+2223+0], u00_hi    C zero u00_hi
-       fxtod   v080, v080
-       ld      [%sp+2223+0], u32_hi    C zero u32_hi
-       fxtod   v096, v096
-       fxtod   v112, v112
-')
-C Initialization done.
-       mov     0, %g2
-       mov     0, rlimb
-       mov     0, %g4
-       add     %i0, -8, %i0            C BOOKKEEPING
-
-C Start software pipeline.
-
-       ld      [%i1+4], u00_lo         C read low 32 bits of up[i]
-       fxtod   u00_hi, u00
-C mid
-       ld      [%i1+0], u32_lo         C read high 32 bits of up[i]
-       fmuld   u00, v000, a000
-       fmuld   u00, v016, a016
-       fmuld   u00, v032, a032
-       fmuld   u00, v048, a048
-       add     %i2, -1, %i2            C BOOKKEEPING
-       fmuld   u00, v064, p064
-       add     %i1, 8, %i1             C BOOKKEEPING
-       fxtod   u32_hi, u32
-       fmuld   u00, v080, p080
-       fmuld   u00, v096, p096a
-       brnz,pt %i2, .L_2_or_more
-        fmuld  u00, v112, p112a
-
-.L1:   fdtox   a000, out000
-       fmuld   u32, v000, p000
-       fdtox   a016, out016
-       fmuld   u32, v016, p016
-       fmovd   p064, a064
-       fmuld   u32, v032, p032
-       fmovd   p080, a080
-       fmuld   u32, v048, p048
-       std     out000, [%sp+2223+16]
-       faddd   p000, a032, a000
-       fmuld   u32, v064, p064
-       std     out016, [%sp+2223+24]
-       fxtod   u00_hi, u00
-       faddd   p016, a048, a016
-       fmuld   u32, v080, p080
-       faddd   p032, a064, a032
-       fmuld   u32, v096, p096b
-       faddd   p048, a080, a048
-       fmuld   u32, v112, p112b
-C mid
-       fdtox   a000, out000
-       fdtox   a016, out016
-       faddd   p064, p096a, a064
-       faddd   p080, p112a, a080
-       std     out000, [%sp+2223+0]
-       b       .L_wd2
-        std    out016, [%sp+2223+8]
-
-.L_2_or_more:
-       ld      [%i1+4], u00_lo         C read low 32 bits of up[i]
-       fdtox   a000, out000
-       fmuld   u32, v000, p000
-       fdtox   a016, out016
-       fmuld   u32, v016, p016
-       fmovd   p064, a064
-       fmuld   u32, v032, p032
-       fmovd   p080, a080
-       fmuld   u32, v048, p048
-       std     out000, [%sp+2223+16]
-       faddd   p000, a032, a000
-       fmuld   u32, v064, p064
-       std     out016, [%sp+2223+24]
-       fxtod   u00_hi, u00
-       faddd   p016, a048, a016
-       fmuld   u32, v080, p080
-       faddd   p032, a064, a032
-       fmuld   u32, v096, p096b
-       faddd   p048, a080, a048
-       fmuld   u32, v112, p112b
-C mid
-       ld      [%i1+0], u32_lo         C read high 32 bits of up[i]
-       fdtox   a000, out000
-       fmuld   u00, v000, p000
-       fdtox   a016, out016
-       fmuld   u00, v016, p016
-       faddd   p064, p096a, a064
-       fmuld   u00, v032, p032
-       faddd   p080, p112a, a080
-       fmuld   u00, v048, p048
-       add     %i2, -1, %i2            C BOOKKEEPING
-       std     out000, [%sp+2223+0]
-       faddd   p000, a032, a000
-       fmuld   u00, v064, p064
-       add     %i1, 8, %i1             C BOOKKEEPING
-       std     out016, [%sp+2223+8]
-       fxtod   u32_hi, u32
-       faddd   p016, a048, a016
-       fmuld   u00, v080, p080
-       faddd   p032, a064, a032
-       fmuld   u00, v096, p096a
-       faddd   p048, a080, a048
-       brnz,pt %i2, .L_3_or_more
-        fmuld  u00, v112, p112a
-
-       b       .Lend
-        nop
-
-C  64      32       0
-C   .       .       .
-C   .       |__rXXX_|  32
-C   .      |___cy___|  34
-C   .  |_______i00__|  50
-C  |_______i16__|   .  50
-
-
-C BEGIN MAIN LOOP
-       .align  16
-.L_3_or_more:
-.Loop: ld      [%i1+4], u00_lo         C read low 32 bits of up[i]
-       and     %g2, xffffffff, %g2
-       fdtox   a000, out000
-       fmuld   u32, v000, p000
-C
-       lduw    [%i0+4+8], r00          C read low 32 bits of rp[i]
-       add     %g2, rlimb, %l5
-       fdtox   a016, out016
-       fmuld   u32, v016, p016
-C
-       srlx    %l5, 32, cy
-       ldx     [%sp+2223+16], i00
-       faddd   p064, p096b, a064
-       fmuld   u32, v032, p032
-C
-       add     %g4, cy, cy             C new cy
-       ldx     [%sp+2223+24], i16
-       faddd   p080, p112b, a080
-       fmuld   u32, v048, p048
-C
-       nop
-       std     out000, [%sp+2223+16]
-       faddd   p000, a032, a000
-       fmuld   u32, v064, p064
-C
-       add     i00, r00, rlimb
-       add     %i0, 8, %i0             C BOOKKEEPING
-       std     out016, [%sp+2223+24]
-       fxtod   u00_hi, u00
-C
-       sllx    i16, 16, %g2
-       add     cy, rlimb, rlimb
-       faddd   p016, a048, a016
-       fmuld   u32, v080, p080
-C
-       srlx    i16, 16, %g4
-       add     %g2, rlimb, %l5
-       faddd   p032, a064, a032
-       fmuld   u32, v096, p096b
-C
-       stw     %l5, [%i0+4]
-       nop
-       faddd   p048, a080, a048
-       fmuld   u32, v112, p112b
-C midloop
-       ld      [%i1+0], u32_lo         C read high 32 bits of up[i]
-       and     %g2, xffffffff, %g2
-       fdtox   a000, out000
-       fmuld   u00, v000, p000
-C
-       lduw    [%i0+0], r32            C read high 32 bits of rp[i]
-       add     %g2, rlimb, %l5
-       fdtox   a016, out016
-       fmuld   u00, v016, p016
-C
-       srlx    %l5, 32, cy
-       ldx     [%sp+2223+0], i00
-       faddd   p064, p096a, a064
-       fmuld   u00, v032, p032
-C
-       add     %g4, cy, cy             C new cy
-       ldx     [%sp+2223+8], i16
-       faddd   p080, p112a, a080
-       fmuld   u00, v048, p048
-C
-       add     %i2, -1, %i2            C BOOKKEEPING
-       std     out000, [%sp+2223+0]
-       faddd   p000, a032, a000
-       fmuld   u00, v064, p064
-C
-       add     i00, r32, rlimb
-       add     %i1, 8, %i1             C BOOKKEEPING
-       std     out016, [%sp+2223+8]
-       fxtod   u32_hi, u32
-C
-       sllx    i16, 16, %g2
-       add     cy, rlimb, rlimb
-       faddd   p016, a048, a016
-       fmuld   u00, v080, p080
-C
-       srlx    i16, 16, %g4
-       add     %g2, rlimb, %l5
-       faddd   p032, a064, a032
-       fmuld   u00, v096, p096a
-C
-       stw     %l5, [%i0+0]
-       faddd   p048, a080, a048
-       brnz,pt %i2, .Loop
-        fmuld  u00, v112, p112a
-C END MAIN LOOP
-
-C WIND-DOWN PHASE 1
-.Lend: and     %g2, xffffffff, %g2
-       fdtox   a000, out000
-       fmuld   u32, v000, p000
-       lduw    [%i0+4+8], r00          C read low 32 bits of rp[i]
-       add     %g2, rlimb, %l5
-       fdtox   a016, out016
-       fmuld   u32, v016, p016
-       srlx    %l5, 32, cy
-       ldx     [%sp+2223+16], i00
-       faddd   p064, p096b, a064
-       fmuld   u32, v032, p032
-       add     %g4, cy, cy             C new cy
-       ldx     [%sp+2223+24], i16
-       faddd   p080, p112b, a080
-       fmuld   u32, v048, p048
-       std     out000, [%sp+2223+16]
-       faddd   p000, a032, a000
-       fmuld   u32, v064, p064
-       add     i00, r00, rlimb
-       add     %i0, 8, %i0             C BOOKKEEPING
-       std     out016, [%sp+2223+24]
-       sllx    i16, 16, %g2
-       add     cy, rlimb, rlimb
-       faddd   p016, a048, a016
-       fmuld   u32, v080, p080
-       srlx    i16, 16, %g4
-       add     %g2, rlimb, %l5
-       faddd   p032, a064, a032
-       fmuld   u32, v096, p096b
-       stw     %l5, [%i0+4]
-       faddd   p048, a080, a048
-       fmuld   u32, v112, p112b
-C mid
-       and     %g2, xffffffff, %g2
-       fdtox   a000, out000
-       lduw    [%i0+0], r32            C read high 32 bits of rp[i]
-       add     %g2, rlimb, %l5
-       fdtox   a016, out016
-       srlx    %l5, 32, cy
-       ldx     [%sp+2223+0], i00
-       faddd   p064, p096a, a064
-       add     %g4, cy, cy             C new cy
-       ldx     [%sp+2223+8], i16
-       faddd   p080, p112a, a080
-       std     out000, [%sp+2223+0]
-       add     i00, r32, rlimb
-       std     out016, [%sp+2223+8]
-       sllx    i16, 16, %g2
-       add     cy, rlimb, rlimb
-       srlx    i16, 16, %g4
-       add     %g2, rlimb, %l5
-       stw     %l5, [%i0+0]
-
-C WIND-DOWN PHASE 2
-.L_wd2:        and     %g2, xffffffff, %g2
-       fdtox   a032, out000
-       lduw    [%i0+4+8], r00          C read low 32 bits of rp[i]
-       add     %g2, rlimb, %l5
-       fdtox   a048, out016
-       srlx    %l5, 32, cy
-       ldx     [%sp+2223+16], i00
-       add     %g4, cy, cy             C new cy
-       ldx     [%sp+2223+24], i16
-       std     out000, [%sp+2223+16]
-       add     i00, r00, rlimb
-       add     %i0, 8, %i0             C BOOKKEEPING
-       std     out016, [%sp+2223+24]
-       sllx    i16, 16, %g2
-       add     cy, rlimb, rlimb
-       srlx    i16, 16, %g4
-       add     %g2, rlimb, %l5
-       stw     %l5, [%i0+4]
-C mid
-       and     %g2, xffffffff, %g2
-       fdtox   a064, out000
-       lduw    [%i0+0], r32            C read high 32 bits of rp[i]
-       add     %g2, rlimb, %l5
-       fdtox   a080, out016
-       srlx    %l5, 32, cy
-       ldx     [%sp+2223+0], i00
-       add     %g4, cy, cy             C new cy
-       ldx     [%sp+2223+8], i16
-       std     out000, [%sp+2223+0]
-       add     i00, r32, rlimb
-       std     out016, [%sp+2223+8]
-       sllx    i16, 16, %g2
-       add     cy, rlimb, rlimb
-       srlx    i16, 16, %g4
-       add     %g2, rlimb, %l5
-       stw     %l5, [%i0+0]
-
-C WIND-DOWN PHASE 3
-.L_wd3:        and     %g2, xffffffff, %g2
-       fdtox   p096b, out000
-       add     %g2, rlimb, %l5
-       fdtox   p112b, out016
-       srlx    %l5, 32, cy
-       ldx     [%sp+2223+16], rlimb
-       add     %g4, cy, cy             C new cy
-       ldx     [%sp+2223+24], i16
-       std     out000, [%sp+2223+16]
-       add     %i0, 8, %i0             C BOOKKEEPING
-       std     out016, [%sp+2223+24]
-       sllx    i16, 16, %g2
-       add     cy, rlimb, rlimb
-       srlx    i16, 16, %g4
-       add     %g2, rlimb, %l5
-       stw     %l5, [%i0+4]
-C mid
-       and     %g2, xffffffff, %g2
-       add     %g2, rlimb, %l5
-       srlx    %l5, 32, cy
-       ldx     [%sp+2223+0], rlimb
-       add     %g4, cy, cy             C new cy
-       ldx     [%sp+2223+8], i16
-       sllx    i16, 16, %g2
-       add     cy, rlimb, rlimb
-       srlx    i16, 16, %g4
-       add     %g2, rlimb, %l5
-       stw     %l5, [%i0+0]
-
-       and     %g2, xffffffff, %g2
-       add     %g2, rlimb, %l5
-       srlx    %l5, 32, cy
-       ldx     [%sp+2223+16], i00
-       add     %g4, cy, cy             C new cy
-       ldx     [%sp+2223+24], i16
-
-       sllx    i16, 16, %g2
-       add     i00, cy, cy
-       return  %i7+8
-       add     %g2, cy, %o0
-EPILOGUE(mpn_addmul_2)
diff --git a/mpn/sparc64/copyd.asm b/mpn/sparc64/copyd.asm

index 8a73dba8f0cae33eb975ab7e03bc51d1908334c4..409425fe028abf40ce0ca6817ef0ef3228c7d3ed 100644 (file)
--- a/mpn/sparc64/copyd.asm
+++ b/mpn/sparc64/copyd.asm
@@ -36,7 +36,7 @@ PROLOGUE(mpn_copyd)
         add     %g1,%o0,%o0
         add     %g1,%o1,%o1
         addcc   %o2,-8,%o2
-       bl,pt   %icc,L(end01234567)
+       bl,pt   %xcc,L(end01234567)
         nop
  L(loop1):
         ldx     [%o1-8],%g1
@@ -57,18 +57,18 @@ L(loop1):
         stx     %o4,[%o0-56]
         stx     %o5,[%o0-64]
         addcc   %o2,-8,%o2
-       bge,pt  %icc,L(loop1)
+       bge,pt  %xcc,L(loop1)
         add     %o0,-64,%o0
  L(end01234567):
         addcc   %o2,8,%o2
-       bz,pn   %icc,L(end)
+       bz,pn   %xcc,L(end)
         nop
  L(loop2):
         ldx     [%o1-8],%g1
         add     %o1,-8,%o1
         addcc   %o2,-1,%o2
         stx     %g1,[%o0-8]
-       bg,pt   %icc,L(loop2)
+       bg,pt   %xcc,L(loop2)
         add     %o0,-8,%o0
  L(end):        retl
         nop
diff --git a/mpn/sparc64/copyi.asm b/mpn/sparc64/copyi.asm

index 3158357c0b862fbefbd1db6e89603e6e3fe2d20f..bc81797ce806363c88b44072dd7015755ff9ab52 100644 (file)
--- a/mpn/sparc64/copyi.asm
+++ b/mpn/sparc64/copyi.asm
@@ -33,7 +33,7 @@ ASM_START()
         REGISTER(%g3,#scratch)
  PROLOGUE(mpn_copyi)
         addcc   %o2,-8,%o2
-       bl,pt   %icc,L(end01234567)
+       bl,pt   %xcc,L(end01234567)
         nop
  L(loop1):
         ldx     [%o1+0],%g1
@@ -54,18 +54,18 @@ L(loop1):
         stx     %o4,[%o0+48]
         stx     %o5,[%o0+56]
         addcc   %o2,-8,%o2
-       bge,pt  %icc,L(loop1)
+       bge,pt  %xcc,L(loop1)
         add     %o0,64,%o0
  L(end01234567):
         addcc   %o2,8,%o2
-       bz,pn   %icc,L(end)
+       bz,pn   %xcc,L(end)
         nop
  L(loop2):
         ldx     [%o1+0],%g1
         add     %o1,8,%o1
         addcc   %o2,-1,%o2
         stx     %g1,[%o0+0]
-       bg,pt   %icc,L(loop2)
+       bg,pt   %xcc,L(loop2)
         add     %o0,8,%o0
  L(end):        retl
         nop
diff --git a/mpn/sparc64/gcd_1.asm b/mpn/sparc64/gcd_1.asm

new file mode 100644 (file)

index 0000000..3014777
--- /dev/null
+++ b/mpn/sparc64/gcd_1.asm
@@ -0,0 +1,144 @@
+dnl  SPARC64 mpn_gcd_1.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for SPARC by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                cycles/bit (approx)
+C UltraSPARC 1&2:      5.1
+C UltraSPARC 3:        5.0
+C UltraSPARC T1:      12.8
+C Numbers measured with: speed -CD -s32-64 -t32 mpn_gcd_1
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+       .section        ".rodata"
+ctz_table:
+       .byte   MAXSHIFT
+forloop(i,1,MASK,
+`      .byte   m4_count_trailing_zeros(i)
+')
+
+
+C Threshold of when to call bmod when U is one limb.  Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 14)
+
+C INPUT PARAMETERS
+define(`up',    `%i0')
+define(`n',     `%i1')
+define(`v0',    `%i2')
+
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_gcd_1)
+       save    %sp, -192, %sp
+       ldx     [up+0], %g1             C U low limb
+       mov     -1, %i4
+       or      v0, %g1, %g2            C x | y
+
+L(twos):
+       inc     %i4
+       andcc   %g2, 1, %g0
+       bz,a    %xcc, L(twos)
+        srlx   %g2, 1, %g2
+
+L(divide_strip_y):
+       andcc   v0, 1, %g0
+       bz,a    %xcc, L(divide_strip_y)
+        srlx   v0, 1, v0
+
+       cmp     n, 1                    C if n > 1 we need
+       bnz     %xcc, L(bmod)           C to call bmod_1
+        nop
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+       srlx    %g1, BMOD_THRES_LOG2, %g2
+       cmp     %g2, v0
+       bleu    %xcc, L(noreduce)
+        mov    %g1, %o0
+
+L(bmod):
+       mov     up, %o0
+       mov     n, %o1
+       mov     v0, %o2
+       call    mpn_modexact_1c_odd
+        mov    0, %o3
+
+L(noreduce):
+
+ifdef(`PIC',`
+       sethi   %hi(_GLOBAL_OFFSET_TABLE_-4), %l7
+       call    L(LGETPC0)
+       add     %l7, %lo(_GLOBAL_OFFSET_TABLE_+4), %l7
+       sethi   %hi(ctz_table), %g1
+       or      %g1, %lo(ctz_table), %g1
+       ldx     [%l7+%g1], %i5
+',`
+       sethi   %hh(ctz_table), %l7
+       or      %l7, %hm(ctz_table), %l7
+       sllx    %l7, 32, %l7
+       sethi   %lm(ctz_table), %g1
+       add     %l7, %g1, %l7
+       or      %l7, %lo(ctz_table), %i5
+')
+
+       cmp     %o0, 0
+       bnz     %xcc, L(mid)
+        andcc  %o0, MASK, %g3          C
+
+       return  %i7+8
+        sllx   %o2, %o4, %o0           C CAUTION: v0 alias for o2
+
+       ALIGN(16)
+L(top):        movcc   %xcc, %l4, v0           C v = min(u,v)
+       movcc   %xcc, %l2, %o0          C u = |v - u]
+       cmp     %g3, 0                  C are all MAXSHIFT low bits zero?
+L(mid):        ldub    [%i5+%g3], %g3          C
+       bz,a    %xcc, L(shift_alot)     C
+        srlx   %o0, MAXSHIFT, %o0
+       srlx    %o0, %g3, %l4           C new u, odd
+       nop                             C force parallel exec of sub insns
+       subcc   v0, %l4, %l2            C v - u, set flags for branch and movcc
+       sub     %l4, v0, %o0            C u - v
+       bnz     %xcc, L(top)            C
+        and    %l2, MASK, %g3          C extract low MAXSHIFT bits from (v-u)
+
+       return  %i7+8
+        sllx   %o2, %o4, %o0           C CAUTION: v0 alias for o2
+
+L(shift_alot):
+       b       L(mid)
+        andcc  %o0, MASK, %g3          C
+
+ifdef(`PIC',`
+L(LGETPC0):
+       retl
+       add     %o7, %l7, %l7
+')
+EPILOGUE()
diff --git a/mpn/sparc64/gmp-mparam.h b/mpn/sparc64/gmp-mparam.h

index 91bed9e0feed90510f08a5408719d2203175113d..8539d1b1fd2727177f3749995198885f793521a2 100644 (file)
--- a/mpn/sparc64/gmp-mparam.h
+++ b/mpn/sparc64/gmp-mparam.h
@@ -28,12 +28,11 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MOD_1_NORM_THRESHOLD                 3
  #define MOD_1_UNNORM_THRESHOLD               3
  #define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         22
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        27
  #define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
  #define USE_PREINV_DIVREM_1                  1
-#define DIVREM_2_THRESHOLD                   7
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
  #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
  
diff --git a/mpn/sparc64/lshift.asm b/mpn/sparc64/lshift.asm

deleted file mode 100644 (file)

index b3bbd9d..0000000
--- a/mpn/sparc64/lshift.asm
+++ /dev/null
@@ -1,152 +0,0 @@
-dnl  SPARC v9 mpn_lshift
-
-dnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C                 cycles/limb
-C UltraSPARC 1&2:     2
-C UltraSPARC 3:              3.25
-
-C INPUT PARAMETERS
-define(`rp',`%i0')
-define(`up',`%i1')
-define(`n',`%i2')
-define(`cnt',`%i3')
-
-define(`u0',`%l0')
-define(`u1',`%l2')
-define(`u2',`%l4')
-define(`u3',`%l6')
-
-define(`tnc',`%i4')
-
-define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
-define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
-
-ASM_START()
-       REGISTER(%g2,#scratch)
-       REGISTER(%g3,#scratch)
-PROLOGUE(mpn_lshift)
-       save    %sp,-160,%sp
-
-       sllx    n,3,%g1
-       sub     %g0,cnt,tnc             C negate shift count
-       add     up,%g1,up               C make %o1 point at end of src
-       add     rp,%g1,rp               C make %o0 point at end of res
-       ldx     [up-8],u3               C load first limb
-       subcc   n,5,n
-       srlx    u3,tnc,%i5              C compute function result
-       sllx    u3,cnt,%g3
-       bl,pn   %icc,.Lend1234
-       fanop
-
-       subcc   n,4,n
-       ldx     [up-16],u0
-       ldx     [up-24],u1
-       add     up,-32,up
-       ldx     [up-0],u2
-       ldx     [up-8],u3
-       srlx    u0,tnc,%g2
-
-       bl,pn   %icc,.Lend5678
-       fanop
-
-       b,a     .Loop
-       .align  16
-.Loop:
-       sllx    u0,cnt,%g1
-       or      %g3,%g2,%g3
-       ldx     [up-16],u0
-       fanop
-C --
-       srlx    u1,tnc,%g2
-       subcc   n,4,n
-       stx     %g3,[rp-8]
-       fanop
-C --
-       sllx    u1,cnt,%g3
-       or      %g1,%g2,%g1
-       ldx     [up-24],u1
-       fanop
-C --
-       srlx    u2,tnc,%g2
-       stx     %g1,[rp-16]
-       add     up,-32,up
-       fanop
-C --
-       sllx    u2,cnt,%g1
-       or      %g3,%g2,%g3
-       ldx     [up-0],u2
-       fanop
-C --
-       srlx    u3,tnc,%g2
-       stx     %g3,[rp-24]
-       add     rp,-32,rp
-       fanop
-C --
-       sllx    u3,cnt,%g3
-       or      %g1,%g2,%g1
-       ldx     [up-8],u3
-       fanop
-C --
-       srlx    u0,tnc,%g2
-       stx     %g1,[rp-0]
-       bge,pt  %icc,.Loop
-       fanop
-C --
-.Lend5678:
-       sllx    u0,cnt,%g1
-       or      %g3,%g2,%g3
-       srlx    u1,tnc,%g2
-       stx     %g3,[rp-8]
-       sllx    u1,cnt,%g3
-       or      %g1,%g2,%g1
-       srlx    u2,tnc,%g2
-       stx     %g1,[rp-16]
-       sllx    u2,cnt,%g1
-       or      %g3,%g2,%g3
-       srlx    u3,tnc,%g2
-       stx     %g3,[rp-24]
-       add     rp,-32,rp
-       sllx    u3,cnt,%g3              C carry...
-       or      %g1,%g2,%g1
-       stx     %g1,[rp-0]
-
-.Lend1234:
-       addcc   n,4,n
-       bz,pn   %icc,.Lret
-       fanop
-.Loop0:
-       add     rp,-8,rp
-       subcc   n,1,n
-       ldx     [up-16],u3
-       add     up,-8,up
-       srlx    u3,tnc,%g2
-       or      %g3,%g2,%g3
-       stx     %g3,[rp]
-       sllx    u3,cnt,%g3
-       bnz,pt  %icc,.Loop0
-       fanop
-.Lret:
-       stx     %g3,[rp-8]
-       mov     %i5,%i0
-       ret
-       restore
-EPILOGUE(mpn_lshift)
diff --git a/mpn/sparc64/mod_1.c b/mpn/sparc64/mod_1.c

index 757ae01b95acb0c93287cedf686541cf76ececc5..a1fef62046e9c8ed7e0c73a40cdb6c3ccd68f621 100644 (file)
--- a/mpn/sparc64/mod_1.c
+++ b/mpn/sparc64/mod_1.c
@@ -1,7 +1,7 @@
  /* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder.
  
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2003 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2003, 2010 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -46,8 +46,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     sizes, but at size==2 it was only about the same speed and at size==3 was
     slower.  */
  
-mp_limb_t
-mpn_mod_1 (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
+static mp_limb_t
+mpn_mod_1_anynorm (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
  {
    int        norm, norm_rshift;
    mp_limb_t  src_high_limb;
@@ -175,3 +175,54 @@ mpn_mod_1 (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
        return r >> norm;
      }
  }
+
+mp_limb_t
+mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+  ASSERT (n >= 0);
+  ASSERT (b != 0);
+
+  /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
+     required by mpz/fdiv_r_ui.c and possibly other places.  */
+  if (n == 0)
+    return 0;
+
+  if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
+    {
+      if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
+       {
+         return mpn_mod_1_anynorm (ap, n, b);
+       }
+      else
+       {
+         mp_limb_t pre[4];
+         mpn_mod_1_1p_cps (pre, b);
+         return mpn_mod_1_1p (ap, n, b, pre);
+       }
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
+       {
+         return mpn_mod_1_anynorm (ap, n, b);
+       }
+      else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
+       {
+         mp_limb_t pre[4];
+         mpn_mod_1_1p_cps (pre, b);
+         return mpn_mod_1_1p (ap, n, b << pre[1], pre);
+       }
+      else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
+       {
+         mp_limb_t pre[5];
+         mpn_mod_1s_2p_cps (pre, b);
+         return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
+       }
+      else
+       {
+         mp_limb_t pre[7];
+         mpn_mod_1s_4p_cps (pre, b);
+         return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
+       }
+    }
+}
diff --git a/mpn/sparc64/mod_1_4.c b/mpn/sparc64/mod_1_4.c

new file mode 100644 (file)

index 0000000..d2202c8
--- /dev/null
+++ b/mpn/sparc64/mod_1_4.c
@@ -0,0 +1,221 @@
+/* mpn_mod_1s_4p (ap, n, b, cps)
+   Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that d < B / 4.
+
+   Contributed to the GNU project by Torbjorn Granlund.
+   Based on a suggestion by Peter L. Montgomery.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+void
+mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
+{
+  mp_limb_t bi;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  int cnt;
+
+  ASSERT (b <= (~(mp_limb_t) 0) / 4);
+
+  count_leading_zeros (cnt, b);
+
+  b <<= cnt;
+  invert_limb (bi, b);
+
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  ASSERT (B1modb <= b);                /* NB: not fully reduced mod b */
+  udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
+  udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
+  udiv_rnnd_preinv (B4modb, B3modb, 0, b, bi);
+  udiv_rnnd_preinv (B5modb, B4modb, 0, b, bi);
+
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
+  cps[3] = B2modb >> cnt;
+  cps[4] = B3modb >> cnt;
+  cps[5] = B4modb >> cnt;
+  cps[6] = B5modb >> cnt;
+
+#if WANT_ASSERT
+  {
+    int i;
+    b = cps[2];
+    for (i = 3; i <= 6; i++)
+      {
+       b += cps[i];
+       ASSERT (b >= cps[i]);
+      }
+  }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
+{
+  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+  mp_size_t i;
+  int cnt;
+
+  ASSERT (n >= 1);
+
+  B1modb = cps[2];
+  B2modb = cps[3];
+  B3modb = cps[4];
+  B4modb = cps[5];
+  B5modb = cps[6];
+
+  if ((b >> 32) == 0)
+    {
+      switch (n & 3)
+       {
+       case 0:
+         umul_ppmm_s (ph, pl, ap[n - 3], B1modb);
+         add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
+         umul_ppmm_s (ch, cl, ap[n - 2], B2modb);
+         add_ssaaaa (ph, pl, ph, pl, ch, cl);
+         umul_ppmm_s (rh, rl, ap[n - 1], B3modb);
+         add_ssaaaa (rh, rl, rh, rl, ph, pl);
+         n -= 4;
+         break;
+       case 1:
+         rh = 0;
+         rl = ap[n - 1];
+         n -= 1;
+         break;
+       case 2:
+         rh = ap[n - 1];
+         rl = ap[n - 2];
+         n -= 2;
+         break;
+       case 3:
+         umul_ppmm_s (ph, pl, ap[n - 2], B1modb);
+         add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+         umul_ppmm_s (rh, rl, ap[n - 1], B2modb);
+         add_ssaaaa (rh, rl, rh, rl, ph, pl);
+         n -= 3;
+         break;
+       }
+
+      for (i = n - 4; i >= 0; i -= 4)
+       {
+         /* rr = ap[i]                         < B
+               + ap[i+1] * (B mod b)           <= (B-1)(b-1)
+               + ap[i+2] * (B^2 mod b)         <= (B-1)(b-1)
+               + ap[i+3] * (B^3 mod b)         <= (B-1)(b-1)
+               + LO(rr)  * (B^4 mod b)         <= (B-1)(b-1)
+               + HI(rr)  * (B^5 mod b)         <= (B-1)(b-1)
+         */
+         umul_ppmm_s (ph, pl, ap[i + 1], B1modb);
+         add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
+
+         umul_ppmm_s (ch, cl, ap[i + 2], B2modb);
+         add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+         umul_ppmm_s (ch, cl, ap[i + 3], B3modb);
+         add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+         umul_ppmm_s (ch, cl, rl, B4modb);
+         add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+         umul_ppmm_s (rh, rl, rh, B5modb);
+         add_ssaaaa (rh, rl, rh, rl, ph, pl);
+       }
+
+      umul_ppmm_s (rh, cl, rh, B1modb);
+      add_ssaaaa (rh, rl, rh, rl, 0, cl);
+    }
+  else
+    {
+      switch (n & 3)
+       {
+       case 0:
+         umul_ppmm (ph, pl, ap[n - 3], B1modb);
+         add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
+         umul_ppmm (ch, cl, ap[n - 2], B2modb);
+         add_ssaaaa (ph, pl, ph, pl, ch, cl);
+         umul_ppmm (rh, rl, ap[n - 1], B3modb);
+         add_ssaaaa (rh, rl, rh, rl, ph, pl);
+         n -= 4;
+         break;
+       case 1:
+         rh = 0;
+         rl = ap[n - 1];
+         n -= 1;
+         break;
+       case 2:
+         rh = ap[n - 1];
+         rl = ap[n - 2];
+         n -= 2;
+         break;
+       case 3:
+         umul_ppmm (ph, pl, ap[n - 2], B1modb);
+         add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+         umul_ppmm (rh, rl, ap[n - 1], B2modb);
+         add_ssaaaa (rh, rl, rh, rl, ph, pl);
+         n -= 3;
+         break;
+       }
+
+      for (i = n - 4; i >= 0; i -= 4)
+       {
+         /* rr = ap[i]                         < B
+               + ap[i+1] * (B mod b)           <= (B-1)(b-1)
+               + ap[i+2] * (B^2 mod b)         <= (B-1)(b-1)
+               + ap[i+3] * (B^3 mod b)         <= (B-1)(b-1)
+               + LO(rr)  * (B^4 mod b)         <= (B-1)(b-1)
+               + HI(rr)  * (B^5 mod b)         <= (B-1)(b-1)
+         */
+         umul_ppmm (ph, pl, ap[i + 1], B1modb);
+         add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
+
+         umul_ppmm (ch, cl, ap[i + 2], B2modb);
+         add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+         umul_ppmm (ch, cl, ap[i + 3], B3modb);
+         add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+         umul_ppmm (ch, cl, rl, B4modb);
+         add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+         umul_ppmm (rh, rl, rh, B5modb);
+         add_ssaaaa (rh, rl, rh, rl, ph, pl);
+       }
+
+      umul_ppmm (rh, cl, rh, B1modb);
+      add_ssaaaa (rh, rl, rh, rl, 0, cl);
+    }
+
+  bi = cps[0];
+  cnt = cps[1];
+
+  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+  return r >> cnt;
+}
diff --git a/mpn/sparc64/mul_1.asm b/mpn/sparc64/mul_1.asm

deleted file mode 100644 (file)

index e57e822..0000000
--- a/mpn/sparc64/mul_1.asm
+++ /dev/null
@@ -1,569 +0,0 @@
-dnl  SPARC v9 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
-dnl  the result in a second limb vector.
-
-dnl  Copyright 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                 cycles/limb
-C UltraSPARC 1&2:     14
-C UltraSPARC 3:              18.5
-
-C Algorithm: We use eight floating-point multiplies per limb product, with the
-C invariant v operand split into four 16-bit pieces, and the s1 operand split
-C into 32-bit pieces.  We sum pairs of 48-bit partial products using
-C floating-point add, then convert the four 49-bit product-sums and transfer
-C them to the integer unit.
-
-C Possible optimizations:
-C   1. Align the stack area where we transfer the four 49-bit product-sums
-C      to a 32-byte boundary.  That would minimize the cache collision.
-C      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would
-C      be to align the area to map to the area immediately before s1?)
-C   2. Sum the 4 49-bit quantities using 32-bit operations, as in the
-C      develop mpn_addmul_2.  This would save many integer instructions.
-C   3. Unrolling.  Questionable if it is worth the code expansion, given that
-C      it could only save 1 cycle/limb.
-C   4. Specialize for particular v values.  If its upper 32 bits are zero, we
-C      could save many operations, in the FPU (fmuld), but more so in the IEU
-C      since we'll be summing 48-bit quantities, which might be simpler.
-C   5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
-C      the i00,i16,i32,i48 RAW less apart.  The latter apart-scheduling should
-C      not be greater than needed for L2 cache latency, and also not so great
-C      that i16 needs to be copied.
-C   6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
-C      to get high IEU bandwidth.  (12 of the 14 cycles will be free for 2 IEU
-C      ops.)
-
-C Instruction classification (as per UltraSPARC-1/2 functional units):
-C    8 FM
-C   10 FA
-C   11 MEM
-C   9 ISHIFT + 10? IADDLOG
-C    1 BRANCH
-C   49 insns totally (plus three mov insns that should be optimized out)
-
-C The loop executes 53 instructions in 14 cycles on UltraSPARC-1/2, i.e we
-C sustain 3.79 instructions/cycle.
-
-C INPUT PARAMETERS
-C rp   i0
-C up   i1
-C n    i2
-C v    i3
-
-ASM_START()
-       REGISTER(%g2,#scratch)
-       REGISTER(%g3,#scratch)
-
-define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
-define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
-define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
-define(`u00',`%f32') define(`u32', `%f34')
-define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
-define(`cy',`%g1')
-define(`rlimb',`%g3')
-define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
-define(`xffffffff',`%l7')
-define(`xffff',`%o0')
-
-PROLOGUE(mpn_mul_1)
-
-C Initialization.  (1) Split v operand into four 16-bit chunks and store them
-C as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs
-C f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.
-
-       save    %sp, -256, %sp
-       mov     -1, %g4
-       srlx    %g4, 48, xffff          C store mask in register `xffff'
-       and     %i3, xffff, %g2
-       stx     %g2, [%sp+2223+0]
-       srlx    %i3, 16, %g3
-       and     %g3, xffff, %g3
-       stx     %g3, [%sp+2223+8]
-       srlx    %i3, 32, %g2
-       and     %g2, xffff, %g2
-       stx     %g2, [%sp+2223+16]
-       srlx    %i3, 48, %g3
-       stx     %g3, [%sp+2223+24]
-       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
-
-       sllx    %i2, 3, %i2
-       mov     0, cy                   C clear cy
-       add     %i0, %i2, %i0
-       add     %i1, %i2, %i1
-       neg     %i2
-       add     %i1, 4, %i5
-       add     %i0, -32, %i4
-       add     %i0, -16, %i0
-
-       ldd     [%sp+2223+0], v00
-       ldd     [%sp+2223+8], v16
-       ldd     [%sp+2223+16], v32
-       ldd     [%sp+2223+24], v48
-       ld      [%sp+2223+0],%f2        C zero f2
-       ld      [%sp+2223+0],%f4        C zero f4
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fxtod   v00, v00
-       fxtod   v16, v16
-       fxtod   v32, v32
-       fxtod   v48, v48
-
-C Start real work.  (We sneakingly read f3 and f5 above...)
-C The software pipeline is very deep, requiring 4 feed-in stages.
-
-       fxtod   %f2, u00
-       fxtod   %f4, u32
-       fmuld   u00, v00, a00
-       fmuld   u00, v16, a16
-       fmuld   u00, v32, p32
-       fmuld   u32, v00, r32
-       fmuld   u00, v48, p48
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .L_two_or_more
-       fmuld   u32, v16, r48
-
-.L_one:
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       fdtox   a00, a00
-       faddd   p48, r48, a48
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       fdtox   a32, a32
-       fdtox   a48, a48
-       std     a00, [%sp+2223+0]
-       std     a16, [%sp+2223+8]
-       std     a32, [%sp+2223+16]
-       std     a48, [%sp+2223+24]
-       add     %i2, 8, %i2
-
-       fdtox   r64, a00
-       fdtox   r80, a16
-       ldx     [%sp+2223+0], i00
-       ldx     [%sp+2223+8], i16
-       ldx     [%sp+2223+16], i32
-       ldx     [%sp+2223+24], i48
-       std     a00, [%sp+2223+0]
-       std     a16, [%sp+2223+8]
-       add     %i2, 8, %i2
-
-       mov     i00, %g5                C i00+ now in g5
-       ldx     [%sp+2223+0], i00
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       mov     i32, %g4                C i32+ now in g4
-       sllx    i48, 32, %l6            C (i48 << 32)
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       add     %l6, %o2, %o2           C mi64- in %o2
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       b       .L_out_1
-       add     %i2, 8, %i2
-
-.L_two_or_more:
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fdtox   a00, a00
-       faddd   p48, r48, a48
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       fdtox   a32, a32
-       fxtod   %f2, u00
-       fxtod   %f4, u32
-       fdtox   a48, a48
-       std     a00, [%sp+2223+0]
-       fmuld   u00, v00, p00
-       std     a16, [%sp+2223+8]
-       fmuld   u00, v16, p16
-       std     a32, [%sp+2223+16]
-       fmuld   u00, v32, p32
-       std     a48, [%sp+2223+24]
-       faddd   p00, r64, a00
-       fmuld   u32, v00, r32
-       faddd   p16, r80, a16
-       fmuld   u00, v48, p48
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .L_three_or_more
-       fmuld   u32, v16, r48
-
-.L_two:
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       fdtox   a00, a00
-       faddd   p48, r48, a48
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       ldx     [%sp+2223+8], i16
-       ldx     [%sp+2223+16], i32
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       std     a00, [%sp+2223+0]
-       std     a16, [%sp+2223+8]
-       std     a32, [%sp+2223+16]
-       std     a48, [%sp+2223+24]
-       add     %i2, 8, %i2
-
-       fdtox   r64, a00
-       mov     i00, %g5                C i00+ now in g5
-       fdtox   r80, a16
-       ldx     [%sp+2223+0], i00
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       mov     i32, %g4                C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       ldx     [%sp+2223+24], i48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       add     %l6, %o2, %o2           C mi64- in %o2
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       b       .L_out_2
-       add     %i2, 8, %i2
-
-.L_three_or_more:
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fdtox   a00, a00
-       faddd   p48, r48, a48
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       ldx     [%sp+2223+8], i16
-       fxtod   %f2, u00
-       ldx     [%sp+2223+16], i32
-       fxtod   %f4, u32
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       std     a00, [%sp+2223+0]
-       fmuld   u00, v00, p00
-       std     a16, [%sp+2223+8]
-       fmuld   u00, v16, p16
-       std     a32, [%sp+2223+16]
-       fmuld   u00, v32, p32
-       std     a48, [%sp+2223+24]
-       faddd   p00, r64, a00
-       fmuld   u32, v00, r32
-       faddd   p16, r80, a16
-       fmuld   u00, v48, p48
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .L_four_or_more
-       fmuld   u32, v16, r48
-
-.L_three:
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       fdtox   a00, a00
-       faddd   p48, r48, a48
-       mov     i00, %g5                C i00+ now in g5
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       mov     i32, %g4                C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       std     a32, [%sp+2223+16]
-       add     %l6, %o2, %o2           C mi64- in %o2
-       std     a48, [%sp+2223+24]
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       b       .L_out_3
-       add     %i2, 8, %i2
-
-.L_four_or_more:
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fdtox   a00, a00
-       faddd   p48, r48, a48
-       mov     i00, %g5                C i00+ now in g5
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       fxtod   %f2, u00
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       mov     i32, %g4                C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       fxtod   %f4, u32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       fmuld   u00, v00, p00
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       fmuld   u00, v16, p16
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       std     a32, [%sp+2223+16]
-       fmuld   u00, v32, p32
-       add     %l6, %o2, %o2           C mi64- in %o2
-       std     a48, [%sp+2223+24]
-       faddd   p00, r64, a00
-       fmuld   u32, v00, r32
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       faddd   p16, r80, a16
-       fmuld   u00, v48, p48
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .Loop
-       fmuld   u32, v16, r48
-
-.L_four:
-       b,a     .L_out_4
-
-C BEGIN MAIN LOOP
-       .align  16
-.Loop:
-C 00
-       srlx    %o4, 16, %o5            C (x >> 16)
-       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-C 01
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
-       fdtox   a00, a00
-C 02
-       faddd   p48, r48, a48
-C 03
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       mov     i00, %g5                C i00+ now in g5
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-C 04
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-C 05
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       fxtod   %f2, u00
-C 06
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       mov     i32, %g4                C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       fxtod   %f4, u32
-C 07
-       sllx    i48, 32, %l6            C (i48 << 32)
-       or      %i3, %o5, %o5
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-C 08
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       fmuld   u00, v00, p00
-C 09
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       fmuld   u00, v16, p16
-C 10
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       std     a32, [%sp+2223+16]
-       fmuld   u00, v32, p32
-C 11
-       add     %l6, %o2, %o2           C mi64- in %o2
-       std     a48, [%sp+2223+24]
-       faddd   p00, r64, a00
-       fmuld   u32, v00, r32
-C 12
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       stx     %o5, [%i4+%i2]
-       faddd   p16, r80, a16
-       fmuld   u00, v48, p48
-C 13
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       addcc   %i2, 8, %i2
-       bnz,pt  %icc, .Loop
-       fmuld   u32, v16, r48
-C END MAIN LOOP
-
-.L_out_4:
-       srlx    %o4, 16, %o5            C (x >> 16)
-       fmuld   u32, v32, r64   C FIXME not urgent
-       faddd   p32, r32, a32
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       fdtox   a00, a00
-       faddd   p48, r48, a48
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       mov     i00, %g5                C i00+ now in g5
-       fmuld   u32, v48, r80   C FIXME not urgent
-       fdtox   a16, a16
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       ldx     [%sp+2223+0], i00
-       fdtox   a32, a32
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       mov     i32, %g4                C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       or      %i3, %o5, %o5
-       ldx     [%sp+2223+24], i48
-       fdtox   a48, a48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       std     a32, [%sp+2223+16]
-       add     %l6, %o2, %o2           C mi64- in %o2
-       std     a48, [%sp+2223+24]
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       stx     %o5, [%i4+%i2]
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       add     %i2, 8, %i2
-.L_out_3:
-       srlx    %o4, 16, %o5            C (x >> 16)
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       fdtox   r64, a00
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       mov     i00, %g5                C i00+ now in g5
-       fdtox   r80, a16
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       ldx     [%sp+2223+0], i00
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       mov     i32, %g4                C i32+ now in g4
-       ldx     [%sp+2223+16], i32
-       sllx    i48, 32, %l6            C (i48 << 32)
-       or      %i3, %o5, %o5
-       ldx     [%sp+2223+24], i48
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       std     a00, [%sp+2223+0]
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       std     a16, [%sp+2223+8]
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       add     %l6, %o2, %o2           C mi64- in %o2
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       stx     %o5, [%i4+%i2]
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       add     %i2, 8, %i2
-.L_out_2:
-       srlx    %o4, 16, %o5            C (x >> 16)
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       mov     i00, %g5                C i00+ now in g5
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       ldx     [%sp+2223+0], i00
-       srlx    i16, 48, %l4            C (i16 >> 48)
-       mov     i16, %g2
-       ldx     [%sp+2223+8], i16
-       srlx    i48, 16, %l5            C (i48 >> 16)
-       mov     i32, %g4                C i32+ now in g4
-       sllx    i48, 32, %l6            C (i48 << 32)
-       or      %i3, %o5, %o5
-       srlx    %g4, 32, %o3            C (i32 >> 32)
-       add     %l5, %l4, %o1           C hi64- in %o1
-       sllx    %g4, 16, %o2            C (i32 << 16)
-       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
-       sllx    %o1, 48, %o3            C (hi64 << 48)
-       add     %g2, %o2, %o2           C mi64- in %o2
-       add     %l6, %o2, %o2           C mi64- in %o2
-       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
-       stx     %o5, [%i4+%i2]
-       add     cy, %g5, %o4            C x = prev(i00) + cy
-       add     %i2, 8, %i2
-.L_out_1:
-       srlx    %o4, 16, %o5            C (x >> 16)
-       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
-       and     %o4, xffff, %o5         C (x & 0xffff)
-       srlx    %o2, 48, %o7            C (mi64 >> 48)
-       sllx    %o2, 16, %i3            C (mi64 << 16)
-       add     %o7, %o1, cy            C new cy
-       or      %i3, %o5, %o5
-       stx     %o5, [%i4+%i2]
-
-       sllx    i00, 0, %g2
-       add     %g2, cy, cy
-       sllx    i16, 16, %g3
-       add     %g3, cy, cy
-
-       return  %i7+8
-       mov     cy, %o0
-EPILOGUE(mpn_mul_1)
diff --git a/mpn/sparc64/rshift.asm b/mpn/sparc64/rshift.asm

deleted file mode 100644 (file)

index 691fe01..0000000
--- a/mpn/sparc64/rshift.asm
+++ /dev/null
@@ -1,149 +0,0 @@
-dnl  SPARC v9 mpn_rshift
-
-dnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C                 cycles/limb
-C UltraSPARC 1&2:     2
-C UltraSPARC 3:              3.25
-
-C INPUT PARAMETERS
-define(`rp',`%i0')
-define(`up',`%i1')
-define(`n',`%i2')
-define(`cnt',`%i3')
-
-define(`u0',`%l0')
-define(`u1',`%l2')
-define(`u2',`%l4')
-define(`u3',`%l6')
-
-define(`tnc',`%i4')
-
-define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
-define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
-
-ASM_START()
-       REGISTER(%g2,#scratch)
-       REGISTER(%g3,#scratch)
-PROLOGUE(mpn_rshift)
-       save    %sp,-160,%sp
-
-       sub     %g0,cnt,tnc             C negate shift count
-       ldx     [up],u3                 C load first limb
-       subcc   n,5,n
-       sllx    u3,tnc,%i5              C compute function result
-       srlx    u3,cnt,%g3
-       bl,pn   %icc,.Lend1234
-       fanop
-
-       subcc   n,4,n
-       ldx     [up+8],u0
-       ldx     [up+16],u1
-       add     up,32,up
-       ldx     [up-8],u2
-       ldx     [up+0],u3
-       sllx    u0,tnc,%g2
-
-       bl,pn   %icc,.Lend5678
-       fanop
-
-       b,a     .Loop
-       .align  16
-.Loop:
-       srlx    u0,cnt,%g1
-       or      %g3,%g2,%g3
-       ldx     [up+8],u0
-       fanop
-C --
-       sllx    u1,tnc,%g2
-       subcc   n,4,n
-       stx     %g3,[rp+0]
-       fanop
-C --
-       srlx    u1,cnt,%g3
-       or      %g1,%g2,%g1
-       ldx     [up+16],u1
-       fanop
-C --
-       sllx    u2,tnc,%g2
-       stx     %g1,[rp+8]
-       add     up,32,up
-       fanop
-C --
-       srlx    u2,cnt,%g1
-       or      %g3,%g2,%g3
-       ldx     [up-8],u2
-       fanop
-C --
-       sllx    u3,tnc,%g2
-       stx     %g3,[rp+16]
-       add     rp,32,rp
-       fanop
-C --
-       srlx    u3,cnt,%g3
-       or      %g1,%g2,%g1
-       ldx     [up+0],u3
-       fanop
-C --
-       sllx    u0,tnc,%g2
-       stx     %g1,[rp-8]
-       bge,pt  %icc,.Loop
-       fanop
-C --
-.Lend5678:
-       srlx    u0,cnt,%g1
-       or      %g3,%g2,%g3
-       sllx    u1,tnc,%g2
-       stx     %g3,[rp+0]
-       srlx    u1,cnt,%g3
-       or      %g1,%g2,%g1
-       sllx    u2,tnc,%g2
-       stx     %g1,[rp+8]
-       srlx    u2,cnt,%g1
-       or      %g3,%g2,%g3
-       sllx    u3,tnc,%g2
-       stx     %g3,[rp+16]
-       add     rp,32,rp
-       srlx    u3,cnt,%g3              C carry...
-       or      %g1,%g2,%g1
-       stx     %g1,[rp-8]
-
-.Lend1234:
-       addcc   n,4,n
-       bz,pn   %icc,.Lret
-       fanop
-.Loop0:
-       add     rp,8,rp
-       subcc   n,1,n
-       ldx     [up+8],u3
-       add     up,8,up
-       sllx    u3,tnc,%g2
-       or      %g3,%g2,%g3
-       stx     %g3,[rp-8]
-       srlx    u3,cnt,%g3
-       bnz,pt  %icc,.Loop0
-       fanop
-.Lret:
-       stx     %g3,[rp+0]
-       mov     %i5,%i0
-       ret
-       restore
-EPILOGUE(mpn_rshift)
diff --git a/mpn/sparc64/sparc64.h b/mpn/sparc64/sparc64.h

index 945e422f5a73a87764bca5c06a530d8e993ce214..8f77ddd161b8fcf445521f1b0521076a40669017 100644 (file)
--- a/mpn/sparc64/sparc64.h
+++ b/mpn/sparc64/sparc64.h
@@ -129,6 +129,24 @@ Error, error, unknown limb endianness;
  #endif
  
  
+/* Multiply u anv v, where v < 2^32.  */
+#define umul_ppmm_s(w1, w0, u, v)                                      \
+  do {                                                                 \
+    UWtype __x0, __x2;                                                 \
+    UWtype __ul, __vl, __uh;                                           \
+    UWtype __u = (u), __v = (v);                                       \
+                                                                       \
+    __ul = __ll_lowpart (__u);                                         \
+    __uh = __ll_highpart (__u);                                                \
+    __vl = __ll_lowpart (__v);                                         \
+                                                                       \
+    __x0 = (UWtype) __ul * __vl;                                       \
+    __x2 = (UWtype) __uh * __vl;                                       \
+                                                                       \
+    (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2;          \
+    (w0) = (__x2 << W_TYPE_SIZE/2) + __x0;                             \
+  } while (0)
+
  /* Count the leading zeros on a limb, but assuming it fits in 32 bits.
     The count returned will be in the range 32 to 63.
     This is the 32-bit generic C count_leading_zeros from longlong.h. */
diff --git a/mpn/sparc64/sqr_diagonal.asm b/mpn/sparc64/sqr_diagonal.asm

deleted file mode 100644 (file)

index fbbb4ff..0000000
--- a/mpn/sparc64/sqr_diagonal.asm
+++ /dev/null
@@ -1,331 +0,0 @@
-dnl  SPARC v9 64-bit mpn_sqr_diagonal.
-
-dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                 cycles/limb
-C UltraSPARC 1&2:     22
-C UltraSPARC 3:              36
-
-C This was generated by the Sun C compiler.  It runs at 22 cycles/limb on the
-C UltraSPARC-1/2, three cycles slower than theoretically possible for optimal
-C code using the same algorithm.  For 1-3 limbs, a special loop was generated,
-C which causes performance problems in particular for 2 and 3 limbs.
-C Ultimately, this should be replaced by hand-written code in the same software
-C pipeline style as e.g., addmul_1.asm.
-
-ASM_START()
-       REGISTER(%g2,#scratch)
-       REGISTER(%g3,#scratch)
-PROLOGUE(mpn_sqr_diagonal)
-       save    %sp, -240, %sp
-
-       sethi   %hi(0x1ffc00), %o0
-       sethi   %hi(0x3ffc00), %o1
-       add     %o0, 1023, %o7
-       cmp     %i2, 4
-       add     %o1, 1023, %o4
-       or      %g0, %i1, %g1
-       or      %g0, %i0, %o0
-       bl,pn   %xcc, .Lsmall
-       or      %g0, 0, %g2
-
-       ldx     [%i1], %o1
-       add     %i1, 24, %g1
-       or      %g0, 3, %g2
-       srlx    %o1, 42, %g3
-       stx     %g3, [%sp+2279]
-       and     %o1, %o7, %o2
-       stx     %o2, [%sp+2263]
-       srlx    %o1, 21, %o1
-       ldd     [%sp+2279], %f0
-       and     %o1, %o7, %o1
-       stx     %o1, [%sp+2271]
-       ldx     [%i1+8], %o2
-       fxtod   %f0, %f12
-       srlx    %o2, 21, %o1
-       and     %o2, %o7, %g3
-       ldd     [%sp+2263], %f2
-       fmuld   %f12, %f12, %f10
-       srlx    %o2, 42, %o2
-       ldd     [%sp+2271], %f0
-       and     %o1, %o7, %o1
-       fxtod   %f2, %f8
-       stx     %o2, [%sp+2279]
-       stx     %o1, [%sp+2271]
-       fxtod   %f0, %f0
-       stx     %g3, [%sp+2263]
-       fdtox   %f10, %f14
-       fmuld   %f12, %f8, %f6
-       ldx     [%i1+16], %o2
-       std     %f14, [%sp+2255]
-       fmuld   %f0, %f0, %f2
-       fmuld   %f8, %f8, %f10
-       srlx    %o2, 42, %o1
-       faddd   %f6, %f6, %f6
-       fmuld   %f12, %f0, %f12
-       fmuld   %f0, %f8, %f8
-       ldd     [%sp+2279], %f0
-       ldd     [%sp+2263], %f4
-       fdtox   %f10, %f10
-       std     %f10, [%sp+2239]
-       faddd   %f2, %f6, %f6
-       ldd     [%sp+2271], %f2
-       fdtox   %f12, %f12
-       std     %f12, [%sp+2247]
-       fdtox   %f8, %f8
-       std     %f8, [%sp+2231]
-       fdtox   %f6, %f6
-       std     %f6, [%sp+2223]
-
-.Loop: srlx    %o2, 21, %g3
-       stx     %o1, [%sp+2279]
-       add     %g2, 1, %g2
-       and     %g3, %o7, %o1
-       ldx     [%sp+2255], %g4
-       cmp     %g2, %i2
-       stx     %o1, [%sp+2271]
-       add     %g1, 8, %g1
-       add     %o0, 16, %o0
-       ldx     [%sp+2239], %o1
-       fxtod   %f0, %f10
-       fxtod   %f4, %f14
-       ldx     [%sp+2231], %i0
-       ldx     [%sp+2223], %g5
-       ldx     [%sp+2247], %g3
-       and     %o2, %o7, %o2
-       fxtod   %f2, %f8
-       fmuld   %f10, %f10, %f0
-       stx     %o2, [%sp+2263]
-       fmuld   %f10, %f14, %f6
-       ldx     [%g1-8], %o2
-       fmuld   %f10, %f8, %f12
-       fdtox   %f0, %f2
-       ldd     [%sp+2279], %f0
-       fmuld   %f8, %f8, %f4
-       faddd   %f6, %f6, %f6
-       fmuld   %f14, %f14, %f10
-       std     %f2, [%sp+2255]
-       sllx    %g4, 20, %g4
-       ldd     [%sp+2271], %f2
-       fmuld   %f8, %f14, %f8
-       sllx    %i0, 22, %i1
-       fdtox   %f12, %f12
-       std     %f12, [%sp+2247]
-       sllx    %g5, 42, %i0
-       add     %o1, %i1, %o1
-       faddd   %f4, %f6, %f6
-       ldd     [%sp+2263], %f4
-       add     %o1, %i0, %o1
-       add     %g3, %g4, %g3
-       fdtox   %f10, %f10
-       std     %f10, [%sp+2239]
-       srlx    %o1, 42, %g4
-       and     %g5, %o4, %i0
-       fdtox   %f8, %f8
-       std     %f8, [%sp+2231]
-       srlx    %g5, 22, %g5
-       sub     %g4, %i0, %g4
-       fdtox   %f6, %f6
-       std     %f6, [%sp+2223]
-       srlx    %g4, 63, %g4
-       add     %g3, %g5, %g3
-       add     %g3, %g4, %g3
-       stx     %o1, [%o0-16]
-       srlx    %o2, 42, %o1
-       bl,pt   %xcc, .Loop
-       stx     %g3, [%o0-8]
-
-       stx     %o1, [%sp+2279]
-       srlx    %o2, 21, %o1
-       fxtod   %f0, %f16
-       ldx     [%sp+2223], %g3
-       fxtod   %f4, %f6
-       and     %o2, %o7, %o3
-       stx     %o3, [%sp+2263]
-       fxtod   %f2, %f4
-       and     %o1, %o7, %o1
-       ldx     [%sp+2231], %o2
-       sllx    %g3, 42, %g4
-       fmuld   %f16, %f16, %f14
-       stx     %o1, [%sp+2271]
-       fmuld   %f16, %f6, %f8
-       add     %o0, 48, %o0
-       ldx     [%sp+2239], %o1
-       sllx    %o2, 22, %o2
-       fmuld   %f4, %f4, %f10
-       ldx     [%sp+2255], %o3
-       fdtox   %f14, %f14
-       fmuld   %f4, %f6, %f2
-       std     %f14, [%sp+2255]
-       faddd   %f8, %f8, %f12
-       add     %o1, %o2, %o2
-       fmuld   %f16, %f4, %f4
-       ldd     [%sp+2279], %f0
-       sllx    %o3, 20, %g5
-       add     %o2, %g4, %o2
-       fmuld   %f6, %f6, %f6
-       srlx    %o2, 42, %o3
-       and     %g3, %o4, %g4
-       srlx    %g3, 22, %g3
-       faddd   %f10, %f12, %f16
-       ldd     [%sp+2271], %f12
-       ldd     [%sp+2263], %f8
-       fxtod   %f0, %f0
-       sub     %o3, %g4, %o3
-       ldx     [%sp+2247], %o1
-       srlx    %o3, 63, %o3
-       fdtox   %f2, %f10
-       fxtod   %f8, %f8
-       std     %f10, [%sp+2231]
-       fdtox   %f6, %f6
-       std     %f6, [%sp+2239]
-       add     %o1, %g5, %o1
-       fmuld   %f0, %f0, %f2
-       fdtox   %f16, %f16
-       std     %f16, [%sp+2223]
-       add     %o1, %g3, %o1
-       fdtox   %f4, %f4
-       std     %f4, [%sp+2247]
-       fmuld   %f0, %f8, %f10
-       fxtod   %f12, %f12
-       add     %o1, %o3, %o1
-       stx     %o2, [%o0-48]
-       fmuld   %f8, %f8, %f6
-       stx     %o1, [%o0-40]
-       fdtox   %f2, %f2
-       ldx     [%sp+2231], %o2
-       faddd   %f10, %f10, %f10
-       ldx     [%sp+2223], %g3
-       fmuld   %f12, %f12, %f4
-       fdtox   %f6, %f6
-       ldx     [%sp+2239], %o1
-       sllx    %o2, 22, %o2
-       fmuld   %f12, %f8, %f8
-       sllx    %g3, 42, %g5
-       ldx     [%sp+2255], %o3
-       fmuld   %f0, %f12, %f0
-       add     %o1, %o2, %o2
-       faddd   %f4, %f10, %f4
-       ldx     [%sp+2247], %o1
-       add     %o2, %g5, %o2
-       and     %g3, %o4, %g4
-       fdtox   %f8, %f8
-       sllx    %o3, 20, %g5
-       std     %f8, [%sp+2231]
-       fdtox   %f0, %f0
-       srlx    %o2, 42, %o3
-       add     %o1, %g5, %o1
-       fdtox   %f4, %f4
-       srlx    %g3, 22, %g3
-       sub     %o3, %g4, %o3
-       std     %f6, [%sp+2239]
-       std     %f4, [%sp+2223]
-       srlx    %o3, 63, %o3
-       add     %o1, %g3, %o1
-       std     %f2, [%sp+2255]
-       add     %o1, %o3, %o1
-       std     %f0, [%sp+2247]
-       stx     %o2, [%o0-32]
-       stx     %o1, [%o0-24]
-       ldx     [%sp+2231], %o2
-       ldx     [%sp+2223], %o3
-       ldx     [%sp+2239], %o1
-       sllx    %o2, 22, %o2
-       sllx    %o3, 42, %g5
-       ldx     [%sp+2255], %g4
-       and     %o3, %o4, %g3
-       add     %o1, %o2, %o2
-       ldx     [%sp+2247], %o1
-       add     %o2, %g5, %o2
-       stx     %o2, [%o0-16]
-       sllx    %g4, 20, %g4
-       srlx    %o2, 42, %o2
-       add     %o1, %g4, %o1
-       srlx    %o3, 22, %o3
-       sub     %o2, %g3, %o2
-       srlx    %o2, 63, %o2
-       add     %o1, %o3, %o1
-       add     %o1, %o2, %o1
-       stx     %o1, [%o0-8]
-       ret
-       restore %g0, %g0, %g0
-.Lsmall:
-       ldx     [%g1], %o2
-.Loop0:
-       and     %o2, %o7, %o1
-       stx     %o1, [%sp+2263]
-       add     %g2, 1, %g2
-       srlx    %o2, 21, %o1
-       add     %g1, 8, %g1
-       srlx    %o2, 42, %o2
-       stx     %o2, [%sp+2279]
-       and     %o1, %o7, %o1
-       ldd     [%sp+2263], %f0
-       cmp     %g2, %i2
-       stx     %o1, [%sp+2271]
-       fxtod   %f0, %f6
-       ldd     [%sp+2279], %f0
-       ldd     [%sp+2271], %f4
-       fxtod   %f0, %f2
-       fmuld   %f6, %f6, %f0
-       fxtod   %f4, %f10
-       fmuld   %f2, %f6, %f4
-       fdtox   %f0, %f0
-       std     %f0, [%sp+2239]
-       fmuld   %f10, %f6, %f8
-       fmuld   %f10, %f10, %f0
-       faddd   %f4, %f4, %f6
-       fmuld   %f2, %f2, %f4
-       fdtox   %f8, %f8
-       std     %f8, [%sp+2231]
-       fmuld   %f2, %f10, %f2
-       faddd   %f0, %f6, %f0
-       fdtox   %f4, %f4
-       std     %f4, [%sp+2255]
-       fdtox   %f2, %f2
-       std     %f2, [%sp+2247]
-       fdtox   %f0, %f0
-       std     %f0, [%sp+2223]
-       ldx     [%sp+2239], %o1
-       ldx     [%sp+2255], %g4
-       ldx     [%sp+2231], %o2
-       sllx    %g4, 20, %g4
-       ldx     [%sp+2223], %o3
-       sllx    %o2, 22, %o2
-       sllx    %o3, 42, %g5
-       add     %o1, %o2, %o2
-       ldx     [%sp+2247], %o1
-       add     %o2, %g5, %o2
-       stx     %o2, [%o0]
-       and     %o3, %o4, %g3
-       srlx    %o2, 42, %o2
-       add     %o1, %g4, %o1
-       srlx    %o3, 22, %o3
-       sub     %o2, %g3, %o2
-       srlx    %o2, 63, %o2
-       add     %o1, %o3, %o1
-       add     %o1, %o2, %o1
-       stx     %o1, [%o0+8]
-       add     %o0, 16, %o0
-       bl,a,pt %xcc, .Loop0
-       ldx     [%g1], %o2
-       ret
-       restore %g0, %g0, %g0
-EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/sparc64/sub_n.asm b/mpn/sparc64/sub_n.asm

deleted file mode 100644 (file)

index e6fe9ee..0000000
--- a/mpn/sparc64/sub_n.asm
+++ /dev/null
@@ -1,220 +0,0 @@
-dnl  SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
-dnl  store difference in a third limb vector.
-
-dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                 cycles/limb
-C UltraSPARC 1&2:     4
-C UltraSPARC 3:              4.5
-
-C Compute carry-out from the most significant bits of u,v, and r, where
-C r=u-v-carry_in, using logic operations.
-
-C This code runs at 4 cycles/limb on UltraSPARC 1 and 2.  It has a 4 insn
-C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
-C Therefore, it seems futile to try to optimize this any further...
-
-C INPUT PARAMETERS
-define(`rp',`%i0')
-define(`up',`%i1')
-define(`vp',`%i2')
-define(`n',`%i3')
-
-define(`u0',`%l0')
-define(`u1',`%l2')
-define(`u2',`%l4')
-define(`u3',`%l6')
-define(`v0',`%l1')
-define(`v1',`%l3')
-define(`v2',`%l5')
-define(`v3',`%l7')
-
-define(`cy',`%i4')
-
-define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
-define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
-
-ASM_START()
-       REGISTER(%g2,#scratch)
-       REGISTER(%g3,#scratch)
-PROLOGUE(mpn_sub_n)
-       save    %sp,-160,%sp
-
-       fitod   %f0,%f0         C make sure f0 contains small, quiet number
-       subcc   n,4,%g0
-       bl,pn   %icc,.Loop0
-       mov     0,cy
-
-       ldx     [up+0],u0
-       ldx     [vp+0],v0
-       add     up,32,up
-       ldx     [up-24],u1
-       ldx     [vp+8],v1
-       add     vp,32,vp
-       ldx     [up-16],u2
-       ldx     [vp-16],v2
-       ldx     [up-8],u3
-       ldx     [vp-8],v3
-       subcc   n,8,n
-       sub     u0,v0,%g1       C main sub
-       sub     %g1,cy,%g4      C carry sub
-       orn     u0,v0,%g2
-       bl,pn   %icc,.Lend4567
-       fanop
-       b,a     .Loop
-
-       .align  16
-C START MAIN LOOP
-.Loop: orn     %g4,%g2,%g2
-       andn    u0,v0,%g3
-       ldx     [up+0],u0
-       fanop
-C --
-       andn    %g2,%g3,%g2
-       ldx     [vp+0],v0
-       add     up,32,up
-       fanop
-C --
-       srlx    %g2,63,cy
-       sub     u1,v1,%g1
-       stx     %g4,[rp+0]
-       fanop
-C --
-       sub     %g1,cy,%g4
-       orn     u1,v1,%g2
-       fmnop
-       fanop
-C --
-       orn     %g4,%g2,%g2
-       andn    u1,v1,%g3
-       ldx     [up-24],u1
-       fanop
-C --
-       andn    %g2,%g3,%g2
-       ldx     [vp+8],v1
-       add     vp,32,vp
-       fanop
-C --
-       srlx    %g2,63,cy
-       sub     u2,v2,%g1
-       stx     %g4,[rp+8]
-       fanop
-C --
-       sub     %g1,cy,%g4
-       orn     u2,v2,%g2
-       fmnop
-       fanop
-C --
-       orn     %g4,%g2,%g2
-       andn    u2,v2,%g3
-       ldx     [up-16],u2
-       fanop
-C --
-       andn    %g2,%g3,%g2
-       ldx     [vp-16],v2
-       add     rp,32,rp
-       fanop
-C --
-       srlx    %g2,63,cy
-       sub     u3,v3,%g1
-       stx     %g4,[rp-16]
-       fanop
-C --
-       sub     %g1,cy,%g4
-       orn     u3,v3,%g2
-       fmnop
-       fanop
-C --
-       orn     %g4,%g2,%g2
-       andn    u3,v3,%g3
-       ldx     [up-8],u3
-       fanop
-C --
-       andn    %g2,%g3,%g2
-       subcc   n,4,n
-       ldx     [vp-8],v3
-       fanop
-C --
-       srlx    %g2,63,cy
-       sub     u0,v0,%g1
-       stx     %g4,[rp-8]
-       fanop
-C --
-       sub     %g1,cy,%g4
-       orn     u0,v0,%g2
-       bge,pt  %icc,.Loop
-       fanop
-C END MAIN LOOP
-.Lend4567:
-       orn     %g4,%g2,%g2
-       andn    u0,v0,%g3
-       andn    %g2,%g3,%g2
-       srlx    %g2,63,cy
-       sub     u1,v1,%g1
-       stx     %g4,[rp+0]
-       sub     %g1,cy,%g4
-       orn     u1,v1,%g2
-       orn     %g4,%g2,%g2
-       andn    u1,v1,%g3
-       andn    %g2,%g3,%g2
-       srlx    %g2,63,cy
-       sub     u2,v2,%g1
-       stx     %g4,[rp+8]
-       sub     %g1,cy,%g4
-       orn     u2,v2,%g2
-       orn     %g4,%g2,%g2
-       andn    u2,v2,%g3
-       andn    %g2,%g3,%g2
-       add     rp,32,rp
-       srlx    %g2,63,cy
-       sub     u3,v3,%g1
-       stx     %g4,[rp-16]
-       sub     %g1,cy,%g4
-       orn     u3,v3,%g2
-       orn     %g4,%g2,%g2
-       andn    u3,v3,%g3
-       andn    %g2,%g3,%g2
-       srlx    %g2,63,cy
-       stx     %g4,[rp-8]
-
-       addcc   n,4,n
-       bz,pn   %icc,.Lret
-       fanop
-
-.Loop0:        ldx     [up],u0
-       add     up,8,up
-       ldx     [vp],v0
-       add     vp,8,vp
-       add     rp,8,rp
-       subcc   n,1,n
-       sub     u0,v0,%g1
-       orn     u0,v0,%g2
-       sub     %g1,cy,%g4
-       andn    u0,v0,%g3
-       orn     %g4,%g2,%g2
-       stx     %g4,[rp-8]
-       andn    %g2,%g3,%g2
-       bnz,pt  %icc,.Loop0
-       srlx    %g2,63,cy
-
-.Lret: mov     cy,%i0
-       ret
-       restore
-EPILOGUE(mpn_sub_n)
diff --git a/mpn/sparc64/submul_1.asm b/mpn/sparc64/submul_1.asm

deleted file mode 100644 (file)

index ba91200..0000000
--- a/mpn/sparc64/submul_1.asm
+++ /dev/null
@@ -1,57 +0,0 @@
-dnl  SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
-dnl  subtract the result from a second limb vector.
-
-dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                 cycles/limb
-C UltraSPARC 1&2:     18
-C UltraSPARC 3:              23
-
-C INPUT PARAMETERS
-C rp   i0
-C up   i1
-C n    i2
-C v    i3
-
-ASM_START()
-       REGISTER(%g2,#scratch)
-
-PROLOGUE(mpn_submul_1)
-       save    %sp,-176,%sp
-
-       sllx    %i2, 3, %g2
-       or      %g0, %i1, %o1
-       add     %g2, 15, %o0
-       or      %g0, %i2, %o2
-       and     %o0, -16, %o0
-       sub     %sp, %o0, %sp
-       add     %sp, 2223, %o0
-       or      %g0, %o0, %l0
-       call    mpn_mul_1
-       or      %g0, %i3, %o3
-       or      %g0, %o0, %l1           C preserve carry value from mpn_mul_1
-       or      %g0, %i0, %o0
-       or      %g0, %i0, %o1
-       or      %g0, %l0, %o2
-       call    mpn_sub_n
-       or      %g0, %i2, %o3
-       ret
-       restore %l1, %o0, %o0           C sum carry values
-EPILOGUE(mpn_submul_1)
diff --git a/mpn/sparc64/ultrasparc1234/add_n.asm b/mpn/sparc64/ultrasparc1234/add_n.asm

new file mode 100644 (file)

index 0000000..2dbac31
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/add_n.asm
@@ -0,0 +1,230 @@
+dnl  SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl  store sum in a third limb vector.
+
+dnl  Copyright 2001, 2002, 2003, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     4
+C UltraSPARC 3:              4.5
+
+C Compute carry-out from the most significant bits of u,v, and r, where
+C r=u+v+carry_in, using logic operations.
+
+C This code runs at 4 cycles/limb on UltraSPARC 1 and 2.  It has a 4 insn
+C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
+C Therefore, it seems futile to try to optimize this any further...
+
+C INPUT PARAMETERS
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`vp',`%i2')
+define(`n',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+define(`v0',`%l1')
+define(`v1',`%l3')
+define(`v2',`%l5')
+define(`v3',`%l7')
+
+define(`cy',`%i4')
+
+define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_add_nc)
+       save    %sp,-160,%sp
+
+       fitod   %f0,%f0         C make sure f0 contains small, quiet number
+       subcc   n,4,%g0
+       bl,pn   %xcc,.Loop0
+       nop
+       b,a     L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_add_n)
+       save    %sp,-160,%sp
+
+       fitod   %f0,%f0         C make sure f0 contains small, quiet number
+       subcc   n,4,%g0
+       bl,pn   %xcc,.Loop0
+       mov     0,cy
+L(com):
+       ldx     [up+0],u0
+       ldx     [vp+0],v0
+       add     up,32,up
+       ldx     [up-24],u1
+       ldx     [vp+8],v1
+       add     vp,32,vp
+       ldx     [up-16],u2
+       ldx     [vp-16],v2
+       ldx     [up-8],u3
+       ldx     [vp-8],v3
+       subcc   n,8,n
+       add     u0,v0,%g1       C main add
+       add     %g1,cy,%g4      C carry add
+       or      u0,v0,%g2
+       bl,pn   %xcc,.Lend4567
+       fanop
+       b,a     .Loop
+
+       .align  16
+C START MAIN LOOP
+.Loop: andn    %g2,%g4,%g2
+       and     u0,v0,%g3
+       ldx     [up+0],u0
+       fanop
+C --
+       or      %g3,%g2,%g2
+       ldx     [vp+0],v0
+       add     up,32,up
+       fanop
+C --
+       srlx    %g2,63,cy
+       add     u1,v1,%g1
+       stx     %g4,[rp+0]
+       fanop
+C --
+       add     %g1,cy,%g4
+       or      u1,v1,%g2
+       fmnop
+       fanop
+C --
+       andn    %g2,%g4,%g2
+       and     u1,v1,%g3
+       ldx     [up-24],u1
+       fanop
+C --
+       or      %g3,%g2,%g2
+       ldx     [vp+8],v1
+       add     vp,32,vp
+       fanop
+C --
+       srlx    %g2,63,cy
+       add     u2,v2,%g1
+       stx     %g4,[rp+8]
+       fanop
+C --
+       add     %g1,cy,%g4
+       or      u2,v2,%g2
+       fmnop
+       fanop
+C --
+       andn    %g2,%g4,%g2
+       and     u2,v2,%g3
+       ldx     [up-16],u2
+       fanop
+C --
+       or      %g3,%g2,%g2
+       ldx     [vp-16],v2
+       add     rp,32,rp
+       fanop
+C --
+       srlx    %g2,63,cy
+       add     u3,v3,%g1
+       stx     %g4,[rp-16]
+       fanop
+C --
+       add     %g1,cy,%g4
+       or      u3,v3,%g2
+       fmnop
+       fanop
+C --
+       andn    %g2,%g4,%g2
+       and     u3,v3,%g3
+       ldx     [up-8],u3
+       fanop
+C --
+       or      %g3,%g2,%g2
+       subcc   n,4,n
+       ldx     [vp-8],v3
+       fanop
+C --
+       srlx    %g2,63,cy
+       add     u0,v0,%g1
+       stx     %g4,[rp-8]
+       fanop
+C --
+       add     %g1,cy,%g4
+       or      u0,v0,%g2
+       bge,pt  %xcc,.Loop
+       fanop
+C END MAIN LOOP
+.Lend4567:
+       andn    %g2,%g4,%g2
+       and     u0,v0,%g3
+       or      %g3,%g2,%g2
+       srlx    %g2,63,cy
+       add     u1,v1,%g1
+       stx     %g4,[rp+0]
+       add     %g1,cy,%g4
+       or      u1,v1,%g2
+       andn    %g2,%g4,%g2
+       and     u1,v1,%g3
+       or      %g3,%g2,%g2
+       srlx    %g2,63,cy
+       add     u2,v2,%g1
+       stx     %g4,[rp+8]
+       add     %g1,cy,%g4
+       or      u2,v2,%g2
+       andn    %g2,%g4,%g2
+       and     u2,v2,%g3
+       or      %g3,%g2,%g2
+       add     rp,32,rp
+       srlx    %g2,63,cy
+       add     u3,v3,%g1
+       stx     %g4,[rp-16]
+       add     %g1,cy,%g4
+       or      u3,v3,%g2
+       andn    %g2,%g4,%g2
+       and     u3,v3,%g3
+       or      %g3,%g2,%g2
+       srlx    %g2,63,cy
+       stx     %g4,[rp-8]
+
+       addcc   n,4,n
+       bz,pn   %xcc,.Lret
+       fanop
+
+.Loop0:        ldx     [up],u0
+       add     up,8,up
+       ldx     [vp],v0
+       add     vp,8,vp
+       add     rp,8,rp
+       subcc   n,1,n
+       add     u0,v0,%g1
+       or      u0,v0,%g2
+       add     %g1,cy,%g4
+       and     u0,v0,%g3
+       andn    %g2,%g4,%g2
+       stx     %g4,[rp-8]
+       or      %g3,%g2,%g2
+       bnz,pt  %xcc,.Loop0
+       srlx    %g2,63,cy
+
+.Lret: mov     cy,%i0
+       ret
+       restore
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparc1234/addmul_1.asm b/mpn/sparc64/ultrasparc1234/addmul_1.asm

new file mode 100644 (file)

index 0000000..afffaf8
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/addmul_1.asm
@@ -0,0 +1,596 @@
+dnl  SPARC v9 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl  the result to a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     14
+C UltraSPARC 3:              17.5
+
+C Algorithm: We use eight floating-point multiplies per limb product, with the
+C invariant v operand split into four 16-bit pieces, and the up operand split
+C into 32-bit pieces.  We sum pairs of 48-bit partial products using
+C floating-point add, then convert the four 49-bit product-sums and transfer
+C them to the integer unit.
+
+C Possible optimizations:
+C   0. Rewrite to use algorithm of mpn_addmul_2.
+C   1. Align the stack area where we transfer the four 49-bit product-sums
+C      to a 32-byte boundary.  That would minimize the cache collision.
+C      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would
+C      be to align the area to map to the area immediately before up?)
+C   2. Sum the 4 49-bit quantities using 32-bit operations, as in the
+C      develop mpn_addmul_2.  This would save many integer instructions.
+C   3. Unrolling.  Questionable if it is worth the code expansion, given that
+C      it could only save 1 cycle/limb.
+C   4. Specialize for particular v values.  If its upper 32 bits are zero, we
+C      could save many operations, in the FPU (fmuld), but more so in the IEU
+C      since we'll be summing 48-bit quantities, which might be simpler.
+C   5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
+C      the i00,i16,i32,i48 RAW less apart.  The latter apart-scheduling should
+C      not be greater than needed for L2 cache latency, and also not so great
+C      that i16 needs to be copied.
+C   6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
+C      to get high IEU bandwidth.  (12 of the 14 cycles will be free for 2 IEU
+C      ops.)
+
+C Instruction classification (as per UltraSPARC-1/2 functional units):
+C    8 FM
+C   10 FA
+C   12 MEM
+C   10 ISHIFT + 14 IADDLOG
+C    1 BRANCH
+C   55 insns totally (plus one mov insn that should be optimized out)
+
+C The loop executes 56 instructions in 14 cycles on UltraSPARC-1/2, i.e we
+C sustain the peak execution rate of 4 instructions/cycle.
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+C v    i3
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+
+define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
+define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
+define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
+define(`u00',`%f32') define(`u32', `%f34')
+define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+PROLOGUE(mpn_addmul_1)
+
+C Initialization.  (1) Split v operand into four 16-bit chunks and store them
+C as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs
+C f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+
+       save    %sp, -256, %sp
+       mov     -1, %g4
+       srlx    %g4, 48, xffff          C store mask in register `xffff'
+       and     %i3, xffff, %g2
+       stx     %g2, [%sp+2223+0]
+       srlx    %i3, 16, %g3
+       and     %g3, xffff, %g3
+       stx     %g3, [%sp+2223+8]
+       srlx    %i3, 32, %g2
+       and     %g2, xffff, %g2
+       stx     %g2, [%sp+2223+16]
+       srlx    %i3, 48, %g3
+       stx     %g3, [%sp+2223+24]
+       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
+
+       sllx    %i2, 3, %i2
+       mov     0, cy                   C clear cy
+       add     %i0, %i2, %i0
+       add     %i1, %i2, %i1
+       neg     %i2
+       add     %i1, 4, %i5
+       add     %i0, -32, %i4
+       add     %i0, -16, %i0
+
+       ldd     [%sp+2223+0], v00
+       ldd     [%sp+2223+8], v16
+       ldd     [%sp+2223+16], v32
+       ldd     [%sp+2223+24], v48
+       ld      [%sp+2223+0],%f2        C zero f2
+       ld      [%sp+2223+0],%f4        C zero f4
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fxtod   v00, v00
+       fxtod   v16, v16
+       fxtod   v32, v32
+       fxtod   v48, v48
+
+C Start real work.  (We sneakingly read f3 and f5 above...)
+C The software pipeline is very deep, requiring 4 feed-in stages.
+
+       fxtod   %f2, u00
+       fxtod   %f4, u32
+       fmuld   u00, v00, a00
+       fmuld   u00, v16, a16
+       fmuld   u00, v32, p32
+       fmuld   u32, v00, r32
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .L_two_or_more
+       fmuld   u32, v16, r48
+
+.L_one:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       fdtox   a32, a32
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       std     a32, [%sp+2223+16]
+       std     a48, [%sp+2223+24]
+       add     %i2, 8, %i2
+
+       fdtox   r64, a00
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       fdtox   r80, a16
+       ldx     [%sp+2223+0], i00
+       ldx     [%sp+2223+8], i16
+       ldx     [%sp+2223+16], i32
+       ldx     [%sp+2223+24], i48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       add     %i2, 8, %i2
+
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       add     i00, %g5, %g5           C i00+ now in g5
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       sllx    i48, 32, %l6            C (i48 << 32)
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_1
+       add     %i2, 8, %i2
+
+.L_two_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       fdtox   a32, a32
+       fxtod   %f2, u00
+       fxtod   %f4, u32
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .L_three_or_more
+       fmuld   u32, v16, r48
+
+.L_two:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       ldx     [%sp+2223+8], i16
+       ldx     [%sp+2223+16], i32
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       std     a32, [%sp+2223+16]
+       std     a48, [%sp+2223+24]
+       add     %i2, 8, %i2
+
+       fdtox   r64, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       add     i00, %g5, %g5           C i00+ now in g5
+       fdtox   r80, a16
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_2
+       add     %i2, 8, %i2
+
+.L_three_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .L_four_or_more
+       fmuld   u32, v16, r48
+
+.L_three:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       add     i00, %g5, %g5           C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_3
+       add     %i2, 8, %i2
+
+.L_four_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       add     i00, %g5, %g5           C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .Loop
+       fmuld   u32, v16, r48
+
+.L_four:
+       b,a     .L_out_4
+
+C BEGIN MAIN LOOP
+       .align  16
+.Loop:
+C 00
+       srlx    %o4, 16, %o5            C (x >> 16)
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+C 01
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+C 02
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+C 03
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       add     i00, %g5, %g5           C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+C 04
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+C 05
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+C 06
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+C 07
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+C 08
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+C 09
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+C 10
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+C 11
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+C 12
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+C 13
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .Loop
+       fmuld   u32, v16, r48
+C END MAIN LOOP
+
+.L_out_4:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       fdtox   a00, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       faddd   p48, r48, a48
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       add     i00, %g5, %g5           C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_3:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       fdtox   r64, a00
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       ldx     [%i0+%i2], rlimb        C read rp[i]
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       add     i00, %g5, %g5           C i00+ now in g5
+       fdtox   r80, a16
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_2:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       srlx    rlimb, 32, %g4          C HI(rlimb)
+       and     rlimb, xffffffff, %g5   C LO(rlimb)
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       add     i00, %g5, %g5           C i00+ now in g5
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       add     i32, %g4, %g4           C i32+ now in g4
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_1:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       or      %i3, %o5, %o5
+       stx     %o5, [%i4+%i2]
+
+       sllx    i00, 0, %g2
+       add     %g2, cy, cy
+       sllx    i16, 16, %g3
+       add     %g3, cy, cy
+
+       return  %i7+8
+       mov     cy, %o0
+EPILOGUE(mpn_addmul_1)
diff --git a/mpn/sparc64/ultrasparc1234/addmul_2.asm b/mpn/sparc64/ultrasparc1234/addmul_2.asm

new file mode 100644 (file)

index 0000000..65efb51
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/addmul_2.asm
@@ -0,0 +1,540 @@
+dnl  SPARC v9 64-bit mpn_addmul_2 -- Multiply an n limb number with 2-limb
+dnl  number and add the result to a n limb vector.
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C UltraSPARC 1&2:      9
+C UltraSPARC 3:       10
+
+C Algorithm: We use 16 floating-point multiplies per limb product, with the
+C 2-limb v operand split into eight 16-bit pieces, and the n-limb u operand
+C split into 32-bit pieces.  We sum four 48-bit partial products using
+C floating-point add, then convert the resulting four 50-bit quantities and
+C transfer them to the integer unit.
+
+C Possible optimizations:
+C   1. Align the stack area where we transfer the four 50-bit product-sums
+C      to a 32-byte boundary.  That would minimize the cache collision.
+C      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would
+C      be to align the area to map to the area immediately before up?)
+C   2. Perform two of the fp->int conversions with integer instructions.  We
+C      can get almost ten free IEU slots, if we clean up bookkeeping and the
+C      silly carry-limb code.
+C   3. For an mpn_addmul_1 based on this, we need to fix the silly carry-limb
+C      code.
+
+C OSP (Overlapping software pipeline) version of mpn_mul_basecase:
+C Operand swap will require 8 LDDA and 8 FXTOD, which will mean 8 cycles.
+C FI   = 20
+C L    =  9 x un * vn
+C WDFI = 10 x vn / 2
+C WD   = 4
+
+C Instruction classification (as per UltraSPARC functional units).
+C Assuming silly carry code is fixed.  Includes bookkeeping.
+C
+C               mpn_addmul_X     mpn_mul_X
+C                1       2       1       2
+C               ==========      ==========
+C      FM        8      16       8      16
+C      FA       10      18      10      18
+C     MEM       12      12      10      10
+C  ISHIFT        6       6       6       6
+C IADDLOG       11      11      10      10
+C  BRANCH        1       1       1       1
+C
+C TOTAL IEU     17      17      16      16
+C TOTAL         48      64      45      61
+C
+C IEU cycles     8.5     8.5     8       8
+C MEM cycles    12      12      10      10
+C ISSUE cycles  12      16      11.25   15.25
+C FPU cycles    10      18      10      18
+C cycles/loop   12      18      12      18
+C cycles/limb   12       9      12       9
+
+
+C INPUT PARAMETERS
+C rp[n + 1]    i0
+C up[n]                i1
+C n            i2
+C vp[2]                i3
+
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+
+C Combine registers:
+C u00_hi= u32_hi
+C u00_lo= u32_lo
+C a000  = out000
+C a016  = out016
+C Free: f52 f54
+
+
+define(`p000', `%f8')  define(`p016',`%f10')
+define(`p032',`%f12')  define(`p048',`%f14')
+define(`p064',`%f16')  define(`p080',`%f18')
+define(`p096a',`%f20') define(`p112a',`%f22')
+define(`p096b',`%f56') define(`p112b',`%f58')
+
+define(`out000',`%f0') define(`out016',`%f6')
+
+define(`v000',`%f24')  define(`v016',`%f26')
+define(`v032',`%f28')  define(`v048',`%f30')
+define(`v064',`%f44')  define(`v080',`%f46')
+define(`v096',`%f48')  define(`v112',`%f50')
+
+define(`u00',`%f32')   define(`u32', `%f34')
+
+define(`a000',`%f36')  define(`a016',`%f38')
+define(`a032',`%f40')  define(`a048',`%f42')
+define(`a064',`%f60')  define(`a080',`%f62')
+
+define(`u00_hi',`%f2') define(`u32_hi',`%f4')
+define(`u00_lo',`%f3') define(`u32_lo',`%f5')
+
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0')    define(`i16',`%l1')
+define(`r00',`%l2')    define(`r32',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+
+PROLOGUE(mpn_addmul_2)
+
+C Initialization.  (1) Split v operand into eight 16-bit chunks and store them
+C as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs
+C f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+C This code could be better scheduled.
+
+       save    %sp, -256, %sp
+
+ifdef(`HAVE_VIS',
+`      mov     -1, %g4
+       wr      %g0, 0xD2, %asi
+       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
+       ldda    [%i3+6] %asi, v000
+       ldda    [%i3+4] %asi, v016
+       ldda    [%i3+2] %asi, v032
+       ldda    [%i3+0] %asi, v048
+       fxtod   v000, v000
+       ldda    [%i3+14] %asi, v064
+       fxtod   v016, v016
+       ldda    [%i3+12] %asi, v080
+       fxtod   v032, v032
+       ldda    [%i3+10] %asi, v096
+       fxtod   v048, v048
+       ldda    [%i3+8] %asi, v112
+       fxtod   v064, v064
+       fxtod   v080, v080
+       fxtod   v096, v096
+       fxtod   v112, v112
+       fzero   u00_hi
+       fzero   u32_hi
+',
+`      mov     -1, %g4
+       ldx     [%i3+0], %l0            C vp[0]
+       srlx    %g4, 48, xffff          C store mask in register `xffff'
+       ldx     [%i3+8], %l1            C vp[1]
+
+       and     %l0, xffff, %g2
+       stx     %g2, [%sp+2223+0]
+       srlx    %l0, 16, %g3
+       and     %g3, xffff, %g3
+       stx     %g3, [%sp+2223+8]
+       srlx    %l0, 32, %g2
+       and     %g2, xffff, %g2
+       stx     %g2, [%sp+2223+16]
+       srlx    %l0, 48, %g3
+       stx     %g3, [%sp+2223+24]
+       and     %l1, xffff, %g2
+       stx     %g2, [%sp+2223+32]
+       srlx    %l1, 16, %g3
+       and     %g3, xffff, %g3
+       stx     %g3, [%sp+2223+40]
+       srlx    %l1, 32, %g2
+       and     %g2, xffff, %g2
+       stx     %g2, [%sp+2223+48]
+       srlx    %l1, 48, %g3
+       stx     %g3, [%sp+2223+56]
+
+       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
+
+       ldd     [%sp+2223+0], v000
+       ldd     [%sp+2223+8], v016
+       ldd     [%sp+2223+16], v032
+       ldd     [%sp+2223+24], v048
+       fxtod   v000, v000
+       ldd     [%sp+2223+32], v064
+       fxtod   v016, v016
+       ldd     [%sp+2223+40], v080
+       fxtod   v032, v032
+       ldd     [%sp+2223+48], v096
+       fxtod   v048, v048
+       ldd     [%sp+2223+56], v112
+       fxtod   v064, v064
+       ld      [%sp+2223+0], u00_hi    C zero u00_hi
+       fxtod   v080, v080
+       ld      [%sp+2223+0], u32_hi    C zero u32_hi
+       fxtod   v096, v096
+       fxtod   v112, v112
+')
+C Initialization done.
+       mov     0, %g2
+       mov     0, rlimb
+       mov     0, %g4
+       add     %i0, -8, %i0            C BOOKKEEPING
+
+C Start software pipeline.
+
+       ld      [%i1+4], u00_lo         C read low 32 bits of up[i]
+       fxtod   u00_hi, u00
+C mid
+       ld      [%i1+0], u32_lo         C read high 32 bits of up[i]
+       fmuld   u00, v000, a000
+       fmuld   u00, v016, a016
+       fmuld   u00, v032, a032
+       fmuld   u00, v048, a048
+       add     %i2, -1, %i2            C BOOKKEEPING
+       fmuld   u00, v064, p064
+       add     %i1, 8, %i1             C BOOKKEEPING
+       fxtod   u32_hi, u32
+       fmuld   u00, v080, p080
+       fmuld   u00, v096, p096a
+       brnz,pt %i2, .L_2_or_more
+        fmuld  u00, v112, p112a
+
+.L1:   fdtox   a000, out000
+       fmuld   u32, v000, p000
+       fdtox   a016, out016
+       fmuld   u32, v016, p016
+       fmovd   p064, a064
+       fmuld   u32, v032, p032
+       fmovd   p080, a080
+       fmuld   u32, v048, p048
+       std     out000, [%sp+2223+16]
+       faddd   p000, a032, a000
+       fmuld   u32, v064, p064
+       std     out016, [%sp+2223+24]
+       fxtod   u00_hi, u00
+       faddd   p016, a048, a016
+       fmuld   u32, v080, p080
+       faddd   p032, a064, a032
+       fmuld   u32, v096, p096b
+       faddd   p048, a080, a048
+       fmuld   u32, v112, p112b
+C mid
+       fdtox   a000, out000
+       fdtox   a016, out016
+       faddd   p064, p096a, a064
+       faddd   p080, p112a, a080
+       std     out000, [%sp+2223+0]
+       b       .L_wd2
+        std    out016, [%sp+2223+8]
+
+.L_2_or_more:
+       ld      [%i1+4], u00_lo         C read low 32 bits of up[i]
+       fdtox   a000, out000
+       fmuld   u32, v000, p000
+       fdtox   a016, out016
+       fmuld   u32, v016, p016
+       fmovd   p064, a064
+       fmuld   u32, v032, p032
+       fmovd   p080, a080
+       fmuld   u32, v048, p048
+       std     out000, [%sp+2223+16]
+       faddd   p000, a032, a000
+       fmuld   u32, v064, p064
+       std     out016, [%sp+2223+24]
+       fxtod   u00_hi, u00
+       faddd   p016, a048, a016
+       fmuld   u32, v080, p080
+       faddd   p032, a064, a032
+       fmuld   u32, v096, p096b
+       faddd   p048, a080, a048
+       fmuld   u32, v112, p112b
+C mid
+       ld      [%i1+0], u32_lo         C read high 32 bits of up[i]
+       fdtox   a000, out000
+       fmuld   u00, v000, p000
+       fdtox   a016, out016
+       fmuld   u00, v016, p016
+       faddd   p064, p096a, a064
+       fmuld   u00, v032, p032
+       faddd   p080, p112a, a080
+       fmuld   u00, v048, p048
+       add     %i2, -1, %i2            C BOOKKEEPING
+       std     out000, [%sp+2223+0]
+       faddd   p000, a032, a000
+       fmuld   u00, v064, p064
+       add     %i1, 8, %i1             C BOOKKEEPING
+       std     out016, [%sp+2223+8]
+       fxtod   u32_hi, u32
+       faddd   p016, a048, a016
+       fmuld   u00, v080, p080
+       faddd   p032, a064, a032
+       fmuld   u00, v096, p096a
+       faddd   p048, a080, a048
+       brnz,pt %i2, .L_3_or_more
+        fmuld  u00, v112, p112a
+
+       b       .Lend
+        nop
+
+C  64      32       0
+C   .       .       .
+C   .       |__rXXX_|  32
+C   .      |___cy___|  34
+C   .  |_______i00__|  50
+C  |_______i16__|   .  50
+
+
+C BEGIN MAIN LOOP
+       .align  16
+.L_3_or_more:
+.Loop: ld      [%i1+4], u00_lo         C read low 32 bits of up[i]
+       and     %g2, xffffffff, %g2
+       fdtox   a000, out000
+       fmuld   u32, v000, p000
+C
+       lduw    [%i0+4+8], r00          C read low 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a016, out016
+       fmuld   u32, v016, p016
+C
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], i00
+       faddd   p064, p096b, a064
+       fmuld   u32, v032, p032
+C
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+       faddd   p080, p112b, a080
+       fmuld   u32, v048, p048
+C
+       nop
+       std     out000, [%sp+2223+16]
+       faddd   p000, a032, a000
+       fmuld   u32, v064, p064
+C
+       add     i00, r00, rlimb
+       add     %i0, 8, %i0             C BOOKKEEPING
+       std     out016, [%sp+2223+24]
+       fxtod   u00_hi, u00
+C
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       faddd   p016, a048, a016
+       fmuld   u32, v080, p080
+C
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       faddd   p032, a064, a032
+       fmuld   u32, v096, p096b
+C
+       stw     %l5, [%i0+4]
+       nop
+       faddd   p048, a080, a048
+       fmuld   u32, v112, p112b
+C midloop
+       ld      [%i1+0], u32_lo         C read high 32 bits of up[i]
+       and     %g2, xffffffff, %g2
+       fdtox   a000, out000
+       fmuld   u00, v000, p000
+C
+       lduw    [%i0+0], r32            C read high 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a016, out016
+       fmuld   u00, v016, p016
+C
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+0], i00
+       faddd   p064, p096a, a064
+       fmuld   u00, v032, p032
+C
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+8], i16
+       faddd   p080, p112a, a080
+       fmuld   u00, v048, p048
+C
+       add     %i2, -1, %i2            C BOOKKEEPING
+       std     out000, [%sp+2223+0]
+       faddd   p000, a032, a000
+       fmuld   u00, v064, p064
+C
+       add     i00, r32, rlimb
+       add     %i1, 8, %i1             C BOOKKEEPING
+       std     out016, [%sp+2223+8]
+       fxtod   u32_hi, u32
+C
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       faddd   p016, a048, a016
+       fmuld   u00, v080, p080
+C
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       faddd   p032, a064, a032
+       fmuld   u00, v096, p096a
+C
+       stw     %l5, [%i0+0]
+       faddd   p048, a080, a048
+       brnz,pt %i2, .Loop
+        fmuld  u00, v112, p112a
+C END MAIN LOOP
+
+C WIND-DOWN PHASE 1
+.Lend: and     %g2, xffffffff, %g2
+       fdtox   a000, out000
+       fmuld   u32, v000, p000
+       lduw    [%i0+4+8], r00          C read low 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a016, out016
+       fmuld   u32, v016, p016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], i00
+       faddd   p064, p096b, a064
+       fmuld   u32, v032, p032
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+       faddd   p080, p112b, a080
+       fmuld   u32, v048, p048
+       std     out000, [%sp+2223+16]
+       faddd   p000, a032, a000
+       fmuld   u32, v064, p064
+       add     i00, r00, rlimb
+       add     %i0, 8, %i0             C BOOKKEEPING
+       std     out016, [%sp+2223+24]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       faddd   p016, a048, a016
+       fmuld   u32, v080, p080
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       faddd   p032, a064, a032
+       fmuld   u32, v096, p096b
+       stw     %l5, [%i0+4]
+       faddd   p048, a080, a048
+       fmuld   u32, v112, p112b
+C mid
+       and     %g2, xffffffff, %g2
+       fdtox   a000, out000
+       lduw    [%i0+0], r32            C read high 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a016, out016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+0], i00
+       faddd   p064, p096a, a064
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+8], i16
+       faddd   p080, p112a, a080
+       std     out000, [%sp+2223+0]
+       add     i00, r32, rlimb
+       std     out016, [%sp+2223+8]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+0]
+
+C WIND-DOWN PHASE 2
+.L_wd2:        and     %g2, xffffffff, %g2
+       fdtox   a032, out000
+       lduw    [%i0+4+8], r00          C read low 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a048, out016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], i00
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+       std     out000, [%sp+2223+16]
+       add     i00, r00, rlimb
+       add     %i0, 8, %i0             C BOOKKEEPING
+       std     out016, [%sp+2223+24]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+4]
+C mid
+       and     %g2, xffffffff, %g2
+       fdtox   a064, out000
+       lduw    [%i0+0], r32            C read high 32 bits of rp[i]
+       add     %g2, rlimb, %l5
+       fdtox   a080, out016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+0], i00
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+8], i16
+       std     out000, [%sp+2223+0]
+       add     i00, r32, rlimb
+       std     out016, [%sp+2223+8]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+0]
+
+C WIND-DOWN PHASE 3
+.L_wd3:        and     %g2, xffffffff, %g2
+       fdtox   p096b, out000
+       add     %g2, rlimb, %l5
+       fdtox   p112b, out016
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], rlimb
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+       std     out000, [%sp+2223+16]
+       add     %i0, 8, %i0             C BOOKKEEPING
+       std     out016, [%sp+2223+24]
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+4]
+C mid
+       and     %g2, xffffffff, %g2
+       add     %g2, rlimb, %l5
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+0], rlimb
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+8], i16
+       sllx    i16, 16, %g2
+       add     cy, rlimb, rlimb
+       srlx    i16, 16, %g4
+       add     %g2, rlimb, %l5
+       stw     %l5, [%i0+0]
+
+       and     %g2, xffffffff, %g2
+       add     %g2, rlimb, %l5
+       srlx    %l5, 32, cy
+       ldx     [%sp+2223+16], i00
+       add     %g4, cy, cy             C new cy
+       ldx     [%sp+2223+24], i16
+
+       sllx    i16, 16, %g2
+       add     i00, cy, cy
+       return  %i7+8
+       add     %g2, cy, %o0
+EPILOGUE(mpn_addmul_2)
diff --git a/mpn/sparc64/ultrasparc1234/lshift.asm b/mpn/sparc64/ultrasparc1234/lshift.asm

new file mode 100644 (file)

index 0000000..5fa7025
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/lshift.asm
@@ -0,0 +1,150 @@
+dnl  SPARC v9 mpn_lshift
+
+dnl  Copyright 1996, 2000, 2001, 2002, 2003, 2010 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:              2.5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n',  `%i2')
+define(`cnt',`%i3')
+
+define(`u0', `%l0')
+define(`u1', `%l2')
+define(`u2', `%l4')
+define(`u3', `%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshift)
+       save    %sp,-160,%sp
+
+       sllx    n,3,%g1
+       sub     %g0,cnt,tnc             C negate shift count
+       add     up,%g1,up               C make %o1 point at end of src
+       add     rp,%g1,rp               C make %o0 point at end of res
+       ldx     [up-8],u3               C load first limb
+       subcc   n,5,n
+       srlx    u3,tnc,%i5              C compute function result
+       bl,pn   %xcc,.Lend1234
+       sllx    u3,cnt,%g3
+
+       subcc   n,4,n
+       ldx     [up-16],u0
+       ldx     [up-24],u1
+       add     up,-32,up
+       ldx     [up-0],u2
+       ldx     [up-8],u3
+
+       bl,pn   %xcc,.Lend5678
+       srlx    u0,tnc,%g2
+
+       b,a     .Loop
+       ALIGN(16)
+.Loop:
+       sllx    u0,cnt,%g1
+       or      %g3,%g2,%g3
+       ldx     [up-16],u0
+       fanop
+C --
+       srlx    u1,tnc,%g2
+       subcc   n,4,n
+       stx     %g3,[rp-8]
+       fanop
+C --
+       sllx    u1,cnt,%g3
+       or      %g1,%g2,%g1
+       ldx     [up-24],u1
+       fanop
+C --
+       srlx    u2,tnc,%g2
+       stx     %g1,[rp-16]
+       add     up,-32,up
+       fanop
+C --
+       sllx    u2,cnt,%g1
+       or      %g3,%g2,%g3
+       ldx     [up-0],u2
+       fanop
+C --
+       srlx    u3,tnc,%g2
+       stx     %g3,[rp-24]
+       add     rp,-32,rp
+       fanop
+C --
+       sllx    u3,cnt,%g3
+       or      %g1,%g2,%g1
+       ldx     [up-8],u3
+       fanop
+C --
+       srlx    u0,tnc,%g2
+       stx     %g1,[rp-0]
+       bge,pt  %xcc,.Loop
+       fanop
+C --
+.Lend5678:
+       sllx    u0,cnt,%g1
+       or      %g3,%g2,%g3
+       srlx    u1,tnc,%g2
+       stx     %g3,[rp-8]
+       sllx    u1,cnt,%g3
+       or      %g1,%g2,%g1
+       srlx    u2,tnc,%g2
+       stx     %g1,[rp-16]
+       sllx    u2,cnt,%g1
+       or      %g3,%g2,%g3
+       srlx    u3,tnc,%g2
+       stx     %g3,[rp-24]
+       add     rp,-32,rp
+       sllx    u3,cnt,%g3              C carry...
+       or      %g1,%g2,%g1
+       stx     %g1,[rp-0]
+
+.Lend1234:
+       addcc   n,4,n
+       bz,pn   %xcc,.Lret
+       fanop
+.Loop0:
+       add     rp,-8,rp
+       subcc   n,1,n
+       ldx     [up-16],u3
+       add     up,-8,up
+       srlx    u3,tnc,%g2
+       or      %g3,%g2,%g3
+       stx     %g3,[rp]
+       sllx    u3,cnt,%g3
+       bnz,pt  %xcc,.Loop0
+       fanop
+.Lret:
+       stx     %g3,[rp-8]
+       mov     %i5,%i0
+       ret
+       restore
+EPILOGUE(mpn_lshift)
diff --git a/mpn/sparc64/ultrasparc1234/lshiftc.asm b/mpn/sparc64/ultrasparc1234/lshiftc.asm

new file mode 100644 (file)

index 0000000..09fe652
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/lshiftc.asm
@@ -0,0 +1,155 @@
+dnl  SPARC v9 mpn_lshiftc
+
+dnl  Copyright 1996, 2000, 2001, 2002, 2003, 2010 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     ?
+C UltraSPARC 3:              2.67
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n',  `%i2')
+define(`cnt',`%i3')
+
+define(`u0', `%l0')
+define(`u1', `%l2')
+define(`u2', `%l4')
+define(`u3', `%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshiftc)
+       save    %sp,-160,%sp
+
+       sllx    n,3,%g1
+       sub     %g0,cnt,tnc             C negate shift count
+       add     up,%g1,up               C make %o1 point at end of src
+       add     rp,%g1,rp               C make %o0 point at end of res
+       ldx     [up-8],u3               C load first limb
+       subcc   n,5,n
+       srlx    u3,tnc,%i5              C compute function result
+       bl,pn   %xcc,.Lend1234
+       sllx    u3,cnt,%g3
+
+       subcc   n,4,n
+       ldx     [up-16],u0
+       ldx     [up-24],u1
+       add     up,-32,up
+       ldx     [up-0],u2
+       ldx     [up-8],u3
+       srlx    u0,tnc,%g2
+       bl,pn   %xcc,.Lend5678
+       not     %g3, %g3
+
+       b,a     .Loop
+       ALIGN(16)
+.Loop:
+       sllx    u0,cnt,%g1
+       andn    %g3,%g2,%g3
+       ldx     [up-16],u0
+       fanop
+C --
+       srlx    u1,tnc,%g2
+       subcc   n,4,n
+       stx     %g3,[rp-8]
+       not     %g1, %g1
+C --
+       sllx    u1,cnt,%g3
+       andn    %g1,%g2,%g1
+       ldx     [up-24],u1
+       fanop
+C --
+       srlx    u2,tnc,%g2
+       stx     %g1,[rp-16]
+       add     up,-32,up
+       not     %g3, %g3
+C --
+       sllx    u2,cnt,%g1
+       andn    %g3,%g2,%g3
+       ldx     [up-0],u2
+       fanop
+C --
+       srlx    u3,tnc,%g2
+       stx     %g3,[rp-24]
+       add     rp,-32,rp
+       not     %g1, %g1
+C --
+       sllx    u3,cnt,%g3
+       andn    %g1,%g2,%g1
+       ldx     [up-8],u3
+       fanop
+C --
+       srlx    u0,tnc,%g2
+       stx     %g1,[rp-0]
+       bge,pt  %xcc,.Loop
+       not     %g3, %g3
+C --
+.Lend5678:
+       sllx    u0,cnt,%g1
+       andn    %g3,%g2,%g3
+       srlx    u1,tnc,%g2
+       stx     %g3,[rp-8]
+       not     %g1, %g1
+       sllx    u1,cnt,%g3
+       andn    %g1,%g2,%g1
+       srlx    u2,tnc,%g2
+       stx     %g1,[rp-16]
+       not     %g3, %g3
+       sllx    u2,cnt,%g1
+       andn    %g3,%g2,%g3
+       srlx    u3,tnc,%g2
+       stx     %g3,[rp-24]
+       add     rp,-32,rp
+       not     %g1, %g1
+       sllx    u3,cnt,%g3              C carry...
+       andn    %g1,%g2,%g1
+       stx     %g1,[rp-0]
+
+.Lend1234:
+       addcc   n,4,n
+       bz,pn   %xcc,.Lret
+       fanop
+.Loop0:
+       add     rp,-8,rp
+       subcc   n,1,n
+       ldx     [up-16],u3
+       add     up,-8,up
+       srlx    u3,tnc,%g2
+       not     %g3, %g3
+       andn    %g3,%g2,%g3
+       stx     %g3,[rp]
+       sllx    u3,cnt,%g3
+       bnz,pt  %xcc,.Loop0
+       fanop
+.Lret:
+       not     %g3, %g3
+       stx     %g3,[rp-8]
+       mov     %i5,%i0
+       ret
+       restore
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparc1234/mul_1.asm b/mpn/sparc64/ultrasparc1234/mul_1.asm

new file mode 100644 (file)

index 0000000..752abc8
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/mul_1.asm
@@ -0,0 +1,569 @@
+dnl  SPARC v9 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl  the result in a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     14
+C UltraSPARC 3:              18.5
+
+C Algorithm: We use eight floating-point multiplies per limb product, with the
+C invariant v operand split into four 16-bit pieces, and the s1 operand split
+C into 32-bit pieces.  We sum pairs of 48-bit partial products using
+C floating-point add, then convert the four 49-bit product-sums and transfer
+C them to the integer unit.
+
+C Possible optimizations:
+C   1. Align the stack area where we transfer the four 49-bit product-sums
+C      to a 32-byte boundary.  That would minimize the cache collision.
+C      (UltraSPARC-1/2 use a direct-mapped cache.)  (Perhaps even better would
+C      be to align the area to map to the area immediately before s1?)
+C   2. Sum the 4 49-bit quantities using 32-bit operations, as in the
+C      develop mpn_addmul_2.  This would save many integer instructions.
+C   3. Unrolling.  Questionable if it is worth the code expansion, given that
+C      it could only save 1 cycle/limb.
+C   4. Specialize for particular v values.  If its upper 32 bits are zero, we
+C      could save many operations, in the FPU (fmuld), but more so in the IEU
+C      since we'll be summing 48-bit quantities, which might be simpler.
+C   5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
+C      the i00,i16,i32,i48 RAW less apart.  The latter apart-scheduling should
+C      not be greater than needed for L2 cache latency, and also not so great
+C      that i16 needs to be copied.
+C   6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
+C      to get high IEU bandwidth.  (12 of the 14 cycles will be free for 2 IEU
+C      ops.)
+
+C Instruction classification (as per UltraSPARC-1/2 functional units):
+C    8 FM
+C   10 FA
+C   11 MEM
+C   9 ISHIFT + 10? IADDLOG
+C    1 BRANCH
+C   49 insns totally (plus three mov insns that should be optimized out)
+
+C The loop executes 53 instructions in 14 cycles on UltraSPARC-1/2, i.e we
+C sustain 3.79 instructions/cycle.
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+C v    i3
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+
+define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
+define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
+define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
+define(`u00',`%f32') define(`u32', `%f34')
+define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+PROLOGUE(mpn_mul_1)
+
+C Initialization.  (1) Split v operand into four 16-bit chunks and store them
+C as IEEE double in fp registers.  (2) Clear upper 32 bits of fp register pairs
+C f2 and f4.  (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+
+       save    %sp, -256, %sp
+       mov     -1, %g4
+       srlx    %g4, 48, xffff          C store mask in register `xffff'
+       and     %i3, xffff, %g2
+       stx     %g2, [%sp+2223+0]
+       srlx    %i3, 16, %g3
+       and     %g3, xffff, %g3
+       stx     %g3, [%sp+2223+8]
+       srlx    %i3, 32, %g2
+       and     %g2, xffff, %g2
+       stx     %g2, [%sp+2223+16]
+       srlx    %i3, 48, %g3
+       stx     %g3, [%sp+2223+24]
+       srlx    %g4, 32, xffffffff      C store mask in register `xffffffff'
+
+       sllx    %i2, 3, %i2
+       mov     0, cy                   C clear cy
+       add     %i0, %i2, %i0
+       add     %i1, %i2, %i1
+       neg     %i2
+       add     %i1, 4, %i5
+       add     %i0, -32, %i4
+       add     %i0, -16, %i0
+
+       ldd     [%sp+2223+0], v00
+       ldd     [%sp+2223+8], v16
+       ldd     [%sp+2223+16], v32
+       ldd     [%sp+2223+24], v48
+       ld      [%sp+2223+0],%f2        C zero f2
+       ld      [%sp+2223+0],%f4        C zero f4
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fxtod   v00, v00
+       fxtod   v16, v16
+       fxtod   v32, v32
+       fxtod   v48, v48
+
+C Start real work.  (We sneakingly read f3 and f5 above...)
+C The software pipeline is very deep, requiring 4 feed-in stages.
+
+       fxtod   %f2, u00
+       fxtod   %f4, u32
+       fmuld   u00, v00, a00
+       fmuld   u00, v16, a16
+       fmuld   u00, v32, p32
+       fmuld   u32, v00, r32
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .L_two_or_more
+       fmuld   u32, v16, r48
+
+.L_one:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       fdtox   a32, a32
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       std     a32, [%sp+2223+16]
+       std     a48, [%sp+2223+24]
+       add     %i2, 8, %i2
+
+       fdtox   r64, a00
+       fdtox   r80, a16
+       ldx     [%sp+2223+0], i00
+       ldx     [%sp+2223+8], i16
+       ldx     [%sp+2223+16], i32
+       ldx     [%sp+2223+24], i48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       add     %i2, 8, %i2
+
+       mov     i00, %g5                C i00+ now in g5
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       sllx    i48, 32, %l6            C (i48 << 32)
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_1
+       add     %i2, 8, %i2
+
+.L_two_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       fdtox   a32, a32
+       fxtod   %f2, u00
+       fxtod   %f4, u32
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .L_three_or_more
+       fmuld   u32, v16, r48
+
+.L_two:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       ldx     [%sp+2223+8], i16
+       ldx     [%sp+2223+16], i32
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       std     a16, [%sp+2223+8]
+       std     a32, [%sp+2223+16]
+       std     a48, [%sp+2223+24]
+       add     %i2, 8, %i2
+
+       fdtox   r64, a00
+       mov     i00, %g5                C i00+ now in g5
+       fdtox   r80, a16
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_2
+       add     %i2, 8, %i2
+
+.L_three_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .L_four_or_more
+       fmuld   u32, v16, r48
+
+.L_three:
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       mov     i00, %g5                C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       b       .L_out_3
+       add     %i2, 8, %i2
+
+.L_four_or_more:
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       mov     i00, %g5                C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .Loop
+       fmuld   u32, v16, r48
+
+.L_four:
+       b,a     .L_out_4
+
+C BEGIN MAIN LOOP
+       .align  16
+.Loop:
+C 00
+       srlx    %o4, 16, %o5            C (x >> 16)
+       ld      [%i5+%i2], %f3          C read low 32 bits of up[i]
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+C 01
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       ld      [%i1+%i2], %f5          C read high 32 bits of up[i]
+       fdtox   a00, a00
+C 02
+       faddd   p48, r48, a48
+C 03
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       mov     i00, %g5                C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+C 04
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+C 05
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       fxtod   %f2, u00
+C 06
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       fxtod   %f4, u32
+C 07
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+C 08
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       fmuld   u00, v00, p00
+C 09
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       fmuld   u00, v16, p16
+C 10
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       fmuld   u00, v32, p32
+C 11
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       faddd   p00, r64, a00
+       fmuld   u32, v00, r32
+C 12
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       faddd   p16, r80, a16
+       fmuld   u00, v48, p48
+C 13
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       addcc   %i2, 8, %i2
+       bnz,pt  %xcc, .Loop
+       fmuld   u32, v16, r48
+C END MAIN LOOP
+
+.L_out_4:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       fmuld   u32, v32, r64   C FIXME not urgent
+       faddd   p32, r32, a32
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       fdtox   a00, a00
+       faddd   p48, r48, a48
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       mov     i00, %g5                C i00+ now in g5
+       fmuld   u32, v48, r80   C FIXME not urgent
+       fdtox   a16, a16
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       fdtox   a32, a32
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       fdtox   a48, a48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       std     a32, [%sp+2223+16]
+       add     %l6, %o2, %o2           C mi64- in %o2
+       std     a48, [%sp+2223+24]
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_3:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       fdtox   r64, a00
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       mov     i00, %g5                C i00+ now in g5
+       fdtox   r80, a16
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       ldx     [%sp+2223+16], i32
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       ldx     [%sp+2223+24], i48
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       std     a00, [%sp+2223+0]
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       std     a16, [%sp+2223+8]
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_2:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       mov     i00, %g5                C i00+ now in g5
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       ldx     [%sp+2223+0], i00
+       srlx    i16, 48, %l4            C (i16 >> 48)
+       mov     i16, %g2
+       ldx     [%sp+2223+8], i16
+       srlx    i48, 16, %l5            C (i48 >> 16)
+       mov     i32, %g4                C i32+ now in g4
+       sllx    i48, 32, %l6            C (i48 << 32)
+       or      %i3, %o5, %o5
+       srlx    %g4, 32, %o3            C (i32 >> 32)
+       add     %l5, %l4, %o1           C hi64- in %o1
+       sllx    %g4, 16, %o2            C (i32 << 16)
+       add     %o3, %o1, %o1           C hi64 in %o1   1st ASSIGNMENT
+       sllx    %o1, 48, %o3            C (hi64 << 48)
+       add     %g2, %o2, %o2           C mi64- in %o2
+       add     %l6, %o2, %o2           C mi64- in %o2
+       sub     %o2, %o3, %o2           C mi64 in %o2   1st ASSIGNMENT
+       stx     %o5, [%i4+%i2]
+       add     cy, %g5, %o4            C x = prev(i00) + cy
+       add     %i2, 8, %i2
+.L_out_1:
+       srlx    %o4, 16, %o5            C (x >> 16)
+       add     %o5, %o2, %o2           C mi64 in %o2   2nd ASSIGNMENT
+       and     %o4, xffff, %o5         C (x & 0xffff)
+       srlx    %o2, 48, %o7            C (mi64 >> 48)
+       sllx    %o2, 16, %i3            C (mi64 << 16)
+       add     %o7, %o1, cy            C new cy
+       or      %i3, %o5, %o5
+       stx     %o5, [%i4+%i2]
+
+       sllx    i00, 0, %g2
+       add     %g2, cy, cy
+       sllx    i16, 16, %g3
+       add     %g3, cy, cy
+
+       return  %i7+8
+       mov     cy, %o0
+EPILOGUE(mpn_mul_1)
diff --git a/mpn/sparc64/ultrasparc1234/rshift.asm b/mpn/sparc64/ultrasparc1234/rshift.asm

new file mode 100644 (file)

index 0000000..aeba31a
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/rshift.asm
@@ -0,0 +1,147 @@
+dnl  SPARC v9 mpn_rshift
+
+dnl  Copyright 1996, 2000, 2001, 2002, 2003, 2010 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:              2.5       (for some up/rp alignments)
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n',  `%i2')
+define(`cnt',`%i3')
+
+define(`u0', `%l0')
+define(`u1', `%l2')
+define(`u2', `%l4')
+define(`u3', `%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_rshift)
+       save    %sp,-160,%sp
+
+       sub     %g0,cnt,tnc             C negate shift count
+       ldx     [up],u3                 C load first limb
+       subcc   n,5,n
+       sllx    u3,tnc,%i5              C compute function result
+       bl,pn   %xcc,.Lend1234
+       srlx    u3,cnt,%g3
+
+       subcc   n,4,n
+       ldx     [up+8],u0
+       ldx     [up+16],u1
+       add     up,32,up
+       ldx     [up-8],u2
+       ldx     [up+0],u3
+
+       bl,pn   %xcc,.Lend5678
+       sllx    u0,tnc,%g2
+
+       b,a     .Loop
+       ALIGN(16)
+.Loop:
+       srlx    u0,cnt,%g1
+       or      %g3,%g2,%g3
+       ldx     [up+8],u0
+       fanop
+C --
+       sllx    u1,tnc,%g2
+       subcc   n,4,n
+       stx     %g3,[rp+0]
+       fanop
+C --
+       srlx    u1,cnt,%g3
+       or      %g1,%g2,%g1
+       ldx     [up+16],u1
+       fanop
+C --
+       sllx    u2,tnc,%g2
+       stx     %g1,[rp+8]
+       add     up,32,up
+       fanop
+C --
+       srlx    u2,cnt,%g1
+       or      %g3,%g2,%g3
+       ldx     [up-8],u2
+       fanop
+C --
+       sllx    u3,tnc,%g2
+       stx     %g3,[rp+16]
+       add     rp,32,rp
+       fanop
+C --
+       srlx    u3,cnt,%g3
+       or      %g1,%g2,%g1
+       ldx     [up+0],u3
+       fanop
+C --
+       sllx    u0,tnc,%g2
+       stx     %g1,[rp-8]
+       bge,pt  %xcc,.Loop
+       fanop
+C --
+.Lend5678:
+       srlx    u0,cnt,%g1
+       or      %g3,%g2,%g3
+       sllx    u1,tnc,%g2
+       stx     %g3,[rp+0]
+       srlx    u1,cnt,%g3
+       or      %g1,%g2,%g1
+       sllx    u2,tnc,%g2
+       stx     %g1,[rp+8]
+       srlx    u2,cnt,%g1
+       or      %g3,%g2,%g3
+       sllx    u3,tnc,%g2
+       stx     %g3,[rp+16]
+       add     rp,32,rp
+       srlx    u3,cnt,%g3              C carry...
+       or      %g1,%g2,%g1
+       stx     %g1,[rp-8]
+
+.Lend1234:
+       addcc   n,4,n
+       bz,pn   %xcc,.Lret
+       fanop
+.Loop0:
+       add     rp,8,rp
+       subcc   n,1,n
+       ldx     [up+8],u3
+       add     up,8,up
+       sllx    u3,tnc,%g2
+       or      %g3,%g2,%g3
+       stx     %g3,[rp-8]
+       srlx    u3,cnt,%g3
+       bnz,pt  %xcc,.Loop0
+       fanop
+.Lret:
+       stx     %g3,[rp+0]
+       mov     %i5,%i0
+       ret
+       restore
+EPILOGUE(mpn_rshift)
diff --git a/mpn/sparc64/ultrasparc1234/sqr_diagonal.asm b/mpn/sparc64/ultrasparc1234/sqr_diagonal.asm

new file mode 100644 (file)

index 0000000..fbbb4ff
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/sqr_diagonal.asm
@@ -0,0 +1,331 @@
+dnl  SPARC v9 64-bit mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     22
+C UltraSPARC 3:              36
+
+C This was generated by the Sun C compiler.  It runs at 22 cycles/limb on the
+C UltraSPARC-1/2, three cycles slower than theoretically possible for optimal
+C code using the same algorithm.  For 1-3 limbs, a special loop was generated,
+C which causes performance problems in particular for 2 and 3 limbs.
+C Ultimately, this should be replaced by hand-written code in the same software
+C pipeline style as e.g., addmul_1.asm.
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sqr_diagonal)
+       save    %sp, -240, %sp
+
+       sethi   %hi(0x1ffc00), %o0
+       sethi   %hi(0x3ffc00), %o1
+       add     %o0, 1023, %o7
+       cmp     %i2, 4
+       add     %o1, 1023, %o4
+       or      %g0, %i1, %g1
+       or      %g0, %i0, %o0
+       bl,pn   %xcc, .Lsmall
+       or      %g0, 0, %g2
+
+       ldx     [%i1], %o1
+       add     %i1, 24, %g1
+       or      %g0, 3, %g2
+       srlx    %o1, 42, %g3
+       stx     %g3, [%sp+2279]
+       and     %o1, %o7, %o2
+       stx     %o2, [%sp+2263]
+       srlx    %o1, 21, %o1
+       ldd     [%sp+2279], %f0
+       and     %o1, %o7, %o1
+       stx     %o1, [%sp+2271]
+       ldx     [%i1+8], %o2
+       fxtod   %f0, %f12
+       srlx    %o2, 21, %o1
+       and     %o2, %o7, %g3
+       ldd     [%sp+2263], %f2
+       fmuld   %f12, %f12, %f10
+       srlx    %o2, 42, %o2
+       ldd     [%sp+2271], %f0
+       and     %o1, %o7, %o1
+       fxtod   %f2, %f8
+       stx     %o2, [%sp+2279]
+       stx     %o1, [%sp+2271]
+       fxtod   %f0, %f0
+       stx     %g3, [%sp+2263]
+       fdtox   %f10, %f14
+       fmuld   %f12, %f8, %f6
+       ldx     [%i1+16], %o2
+       std     %f14, [%sp+2255]
+       fmuld   %f0, %f0, %f2
+       fmuld   %f8, %f8, %f10
+       srlx    %o2, 42, %o1
+       faddd   %f6, %f6, %f6
+       fmuld   %f12, %f0, %f12
+       fmuld   %f0, %f8, %f8
+       ldd     [%sp+2279], %f0
+       ldd     [%sp+2263], %f4
+       fdtox   %f10, %f10
+       std     %f10, [%sp+2239]
+       faddd   %f2, %f6, %f6
+       ldd     [%sp+2271], %f2
+       fdtox   %f12, %f12
+       std     %f12, [%sp+2247]
+       fdtox   %f8, %f8
+       std     %f8, [%sp+2231]
+       fdtox   %f6, %f6
+       std     %f6, [%sp+2223]
+
+.Loop: srlx    %o2, 21, %g3
+       stx     %o1, [%sp+2279]
+       add     %g2, 1, %g2
+       and     %g3, %o7, %o1
+       ldx     [%sp+2255], %g4
+       cmp     %g2, %i2
+       stx     %o1, [%sp+2271]
+       add     %g1, 8, %g1
+       add     %o0, 16, %o0
+       ldx     [%sp+2239], %o1
+       fxtod   %f0, %f10
+       fxtod   %f4, %f14
+       ldx     [%sp+2231], %i0
+       ldx     [%sp+2223], %g5
+       ldx     [%sp+2247], %g3
+       and     %o2, %o7, %o2
+       fxtod   %f2, %f8
+       fmuld   %f10, %f10, %f0
+       stx     %o2, [%sp+2263]
+       fmuld   %f10, %f14, %f6
+       ldx     [%g1-8], %o2
+       fmuld   %f10, %f8, %f12
+       fdtox   %f0, %f2
+       ldd     [%sp+2279], %f0
+       fmuld   %f8, %f8, %f4
+       faddd   %f6, %f6, %f6
+       fmuld   %f14, %f14, %f10
+       std     %f2, [%sp+2255]
+       sllx    %g4, 20, %g4
+       ldd     [%sp+2271], %f2
+       fmuld   %f8, %f14, %f8
+       sllx    %i0, 22, %i1
+       fdtox   %f12, %f12
+       std     %f12, [%sp+2247]
+       sllx    %g5, 42, %i0
+       add     %o1, %i1, %o1
+       faddd   %f4, %f6, %f6
+       ldd     [%sp+2263], %f4
+       add     %o1, %i0, %o1
+       add     %g3, %g4, %g3
+       fdtox   %f10, %f10
+       std     %f10, [%sp+2239]
+       srlx    %o1, 42, %g4
+       and     %g5, %o4, %i0
+       fdtox   %f8, %f8
+       std     %f8, [%sp+2231]
+       srlx    %g5, 22, %g5
+       sub     %g4, %i0, %g4
+       fdtox   %f6, %f6
+       std     %f6, [%sp+2223]
+       srlx    %g4, 63, %g4
+       add     %g3, %g5, %g3
+       add     %g3, %g4, %g3
+       stx     %o1, [%o0-16]
+       srlx    %o2, 42, %o1
+       bl,pt   %xcc, .Loop
+       stx     %g3, [%o0-8]
+
+       stx     %o1, [%sp+2279]
+       srlx    %o2, 21, %o1
+       fxtod   %f0, %f16
+       ldx     [%sp+2223], %g3
+       fxtod   %f4, %f6
+       and     %o2, %o7, %o3
+       stx     %o3, [%sp+2263]
+       fxtod   %f2, %f4
+       and     %o1, %o7, %o1
+       ldx     [%sp+2231], %o2
+       sllx    %g3, 42, %g4
+       fmuld   %f16, %f16, %f14
+       stx     %o1, [%sp+2271]
+       fmuld   %f16, %f6, %f8
+       add     %o0, 48, %o0
+       ldx     [%sp+2239], %o1
+       sllx    %o2, 22, %o2
+       fmuld   %f4, %f4, %f10
+       ldx     [%sp+2255], %o3
+       fdtox   %f14, %f14
+       fmuld   %f4, %f6, %f2
+       std     %f14, [%sp+2255]
+       faddd   %f8, %f8, %f12
+       add     %o1, %o2, %o2
+       fmuld   %f16, %f4, %f4
+       ldd     [%sp+2279], %f0
+       sllx    %o3, 20, %g5
+       add     %o2, %g4, %o2
+       fmuld   %f6, %f6, %f6
+       srlx    %o2, 42, %o3
+       and     %g3, %o4, %g4
+       srlx    %g3, 22, %g3
+       faddd   %f10, %f12, %f16
+       ldd     [%sp+2271], %f12
+       ldd     [%sp+2263], %f8
+       fxtod   %f0, %f0
+       sub     %o3, %g4, %o3
+       ldx     [%sp+2247], %o1
+       srlx    %o3, 63, %o3
+       fdtox   %f2, %f10
+       fxtod   %f8, %f8
+       std     %f10, [%sp+2231]
+       fdtox   %f6, %f6
+       std     %f6, [%sp+2239]
+       add     %o1, %g5, %o1
+       fmuld   %f0, %f0, %f2
+       fdtox   %f16, %f16
+       std     %f16, [%sp+2223]
+       add     %o1, %g3, %o1
+       fdtox   %f4, %f4
+       std     %f4, [%sp+2247]
+       fmuld   %f0, %f8, %f10
+       fxtod   %f12, %f12
+       add     %o1, %o3, %o1
+       stx     %o2, [%o0-48]
+       fmuld   %f8, %f8, %f6
+       stx     %o1, [%o0-40]
+       fdtox   %f2, %f2
+       ldx     [%sp+2231], %o2
+       faddd   %f10, %f10, %f10
+       ldx     [%sp+2223], %g3
+       fmuld   %f12, %f12, %f4
+       fdtox   %f6, %f6
+       ldx     [%sp+2239], %o1
+       sllx    %o2, 22, %o2
+       fmuld   %f12, %f8, %f8
+       sllx    %g3, 42, %g5
+       ldx     [%sp+2255], %o3
+       fmuld   %f0, %f12, %f0
+       add     %o1, %o2, %o2
+       faddd   %f4, %f10, %f4
+       ldx     [%sp+2247], %o1
+       add     %o2, %g5, %o2
+       and     %g3, %o4, %g4
+       fdtox   %f8, %f8
+       sllx    %o3, 20, %g5
+       std     %f8, [%sp+2231]
+       fdtox   %f0, %f0
+       srlx    %o2, 42, %o3
+       add     %o1, %g5, %o1
+       fdtox   %f4, %f4
+       srlx    %g3, 22, %g3
+       sub     %o3, %g4, %o3
+       std     %f6, [%sp+2239]
+       std     %f4, [%sp+2223]
+       srlx    %o3, 63, %o3
+       add     %o1, %g3, %o1
+       std     %f2, [%sp+2255]
+       add     %o1, %o3, %o1
+       std     %f0, [%sp+2247]
+       stx     %o2, [%o0-32]
+       stx     %o1, [%o0-24]
+       ldx     [%sp+2231], %o2
+       ldx     [%sp+2223], %o3
+       ldx     [%sp+2239], %o1
+       sllx    %o2, 22, %o2
+       sllx    %o3, 42, %g5
+       ldx     [%sp+2255], %g4
+       and     %o3, %o4, %g3
+       add     %o1, %o2, %o2
+       ldx     [%sp+2247], %o1
+       add     %o2, %g5, %o2
+       stx     %o2, [%o0-16]
+       sllx    %g4, 20, %g4
+       srlx    %o2, 42, %o2
+       add     %o1, %g4, %o1
+       srlx    %o3, 22, %o3
+       sub     %o2, %g3, %o2
+       srlx    %o2, 63, %o2
+       add     %o1, %o3, %o1
+       add     %o1, %o2, %o1
+       stx     %o1, [%o0-8]
+       ret
+       restore %g0, %g0, %g0
+.Lsmall:
+       ldx     [%g1], %o2
+.Loop0:
+       and     %o2, %o7, %o1
+       stx     %o1, [%sp+2263]
+       add     %g2, 1, %g2
+       srlx    %o2, 21, %o1
+       add     %g1, 8, %g1
+       srlx    %o2, 42, %o2
+       stx     %o2, [%sp+2279]
+       and     %o1, %o7, %o1
+       ldd     [%sp+2263], %f0
+       cmp     %g2, %i2
+       stx     %o1, [%sp+2271]
+       fxtod   %f0, %f6
+       ldd     [%sp+2279], %f0
+       ldd     [%sp+2271], %f4
+       fxtod   %f0, %f2
+       fmuld   %f6, %f6, %f0
+       fxtod   %f4, %f10
+       fmuld   %f2, %f6, %f4
+       fdtox   %f0, %f0
+       std     %f0, [%sp+2239]
+       fmuld   %f10, %f6, %f8
+       fmuld   %f10, %f10, %f0
+       faddd   %f4, %f4, %f6
+       fmuld   %f2, %f2, %f4
+       fdtox   %f8, %f8
+       std     %f8, [%sp+2231]
+       fmuld   %f2, %f10, %f2
+       faddd   %f0, %f6, %f0
+       fdtox   %f4, %f4
+       std     %f4, [%sp+2255]
+       fdtox   %f2, %f2
+       std     %f2, [%sp+2247]
+       fdtox   %f0, %f0
+       std     %f0, [%sp+2223]
+       ldx     [%sp+2239], %o1
+       ldx     [%sp+2255], %g4
+       ldx     [%sp+2231], %o2
+       sllx    %g4, 20, %g4
+       ldx     [%sp+2223], %o3
+       sllx    %o2, 22, %o2
+       sllx    %o3, 42, %g5
+       add     %o1, %o2, %o2
+       ldx     [%sp+2247], %o1
+       add     %o2, %g5, %o2
+       stx     %o2, [%o0]
+       and     %o3, %o4, %g3
+       srlx    %o2, 42, %o2
+       add     %o1, %g4, %o1
+       srlx    %o3, 22, %o3
+       sub     %o2, %g3, %o2
+       srlx    %o2, 63, %o2
+       add     %o1, %o3, %o1
+       add     %o1, %o2, %o1
+       stx     %o1, [%o0+8]
+       add     %o0, 16, %o0
+       bl,a,pt %xcc, .Loop0
+       ldx     [%g1], %o2
+       ret
+       restore %g0, %g0, %g0
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/mpn/sparc64/ultrasparc1234/sub_n.asm b/mpn/sparc64/ultrasparc1234/sub_n.asm

new file mode 100644 (file)

index 0000000..d49245a
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/sub_n.asm
@@ -0,0 +1,230 @@
+dnl  SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright 2001, 2002, 2003, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     4
+C UltraSPARC 3:              4.5
+
+C Compute carry-out from the most significant bits of u,v, and r, where
+C r=u-v-carry_in, using logic operations.
+
+C This code runs at 4 cycles/limb on UltraSPARC 1 and 2.  It has a 4 insn
+C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
+C Therefore, it seems futile to try to optimize this any further...
+
+C INPUT PARAMETERS
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`vp',`%i2')
+define(`n',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+define(`v0',`%l1')
+define(`v1',`%l3')
+define(`v2',`%l5')
+define(`v3',`%l7')
+
+define(`cy',`%i4')
+
+define(`fanop',`fitod %f0,%f2')                dnl  A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4')    dnl  A quasi nop running in the FM pipe
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sub_nc)
+       save    %sp,-160,%sp
+
+       fitod   %f0,%f0         C make sure f0 contains small, quiet number
+       subcc   n,4,%g0
+       bl,pn   %xcc,.Loop0
+       nop
+       b,a     L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_sub_n)
+       save    %sp,-160,%sp
+
+       fitod   %f0,%f0         C make sure f0 contains small, quiet number
+       subcc   n,4,%g0
+       bl,pn   %xcc,.Loop0
+       mov     0,cy
+L(com):
+       ldx     [up+0],u0
+       ldx     [vp+0],v0
+       add     up,32,up
+       ldx     [up-24],u1
+       ldx     [vp+8],v1
+       add     vp,32,vp
+       ldx     [up-16],u2
+       ldx     [vp-16],v2
+       ldx     [up-8],u3
+       ldx     [vp-8],v3
+       subcc   n,8,n
+       sub     u0,v0,%g1       C main sub
+       sub     %g1,cy,%g4      C carry sub
+       orn     u0,v0,%g2
+       bl,pn   %xcc,.Lend4567
+       fanop
+       b,a     .Loop
+
+       .align  16
+C START MAIN LOOP
+.Loop: orn     %g4,%g2,%g2
+       andn    u0,v0,%g3
+       ldx     [up+0],u0
+       fanop
+C --
+       andn    %g2,%g3,%g2
+       ldx     [vp+0],v0
+       add     up,32,up
+       fanop
+C --
+       srlx    %g2,63,cy
+       sub     u1,v1,%g1
+       stx     %g4,[rp+0]
+       fanop
+C --
+       sub     %g1,cy,%g4
+       orn     u1,v1,%g2
+       fmnop
+       fanop
+C --
+       orn     %g4,%g2,%g2
+       andn    u1,v1,%g3
+       ldx     [up-24],u1
+       fanop
+C --
+       andn    %g2,%g3,%g2
+       ldx     [vp+8],v1
+       add     vp,32,vp
+       fanop
+C --
+       srlx    %g2,63,cy
+       sub     u2,v2,%g1
+       stx     %g4,[rp+8]
+       fanop
+C --
+       sub     %g1,cy,%g4
+       orn     u2,v2,%g2
+       fmnop
+       fanop
+C --
+       orn     %g4,%g2,%g2
+       andn    u2,v2,%g3
+       ldx     [up-16],u2
+       fanop
+C --
+       andn    %g2,%g3,%g2
+       ldx     [vp-16],v2
+       add     rp,32,rp
+       fanop
+C --
+       srlx    %g2,63,cy
+       sub     u3,v3,%g1
+       stx     %g4,[rp-16]
+       fanop
+C --
+       sub     %g1,cy,%g4
+       orn     u3,v3,%g2
+       fmnop
+       fanop
+C --
+       orn     %g4,%g2,%g2
+       andn    u3,v3,%g3
+       ldx     [up-8],u3
+       fanop
+C --
+       andn    %g2,%g3,%g2
+       subcc   n,4,n
+       ldx     [vp-8],v3
+       fanop
+C --
+       srlx    %g2,63,cy
+       sub     u0,v0,%g1
+       stx     %g4,[rp-8]
+       fanop
+C --
+       sub     %g1,cy,%g4
+       orn     u0,v0,%g2
+       bge,pt  %xcc,.Loop
+       fanop
+C END MAIN LOOP
+.Lend4567:
+       orn     %g4,%g2,%g2
+       andn    u0,v0,%g3
+       andn    %g2,%g3,%g2
+       srlx    %g2,63,cy
+       sub     u1,v1,%g1
+       stx     %g4,[rp+0]
+       sub     %g1,cy,%g4
+       orn     u1,v1,%g2
+       orn     %g4,%g2,%g2
+       andn    u1,v1,%g3
+       andn    %g2,%g3,%g2
+       srlx    %g2,63,cy
+       sub     u2,v2,%g1
+       stx     %g4,[rp+8]
+       sub     %g1,cy,%g4
+       orn     u2,v2,%g2
+       orn     %g4,%g2,%g2
+       andn    u2,v2,%g3
+       andn    %g2,%g3,%g2
+       add     rp,32,rp
+       srlx    %g2,63,cy
+       sub     u3,v3,%g1
+       stx     %g4,[rp-16]
+       sub     %g1,cy,%g4
+       orn     u3,v3,%g2
+       orn     %g4,%g2,%g2
+       andn    u3,v3,%g3
+       andn    %g2,%g3,%g2
+       srlx    %g2,63,cy
+       stx     %g4,[rp-8]
+
+       addcc   n,4,n
+       bz,pn   %xcc,.Lret
+       fanop
+
+.Loop0:        ldx     [up],u0
+       add     up,8,up
+       ldx     [vp],v0
+       add     vp,8,vp
+       add     rp,8,rp
+       subcc   n,1,n
+       sub     u0,v0,%g1
+       orn     u0,v0,%g2
+       sub     %g1,cy,%g4
+       andn    u0,v0,%g3
+       orn     %g4,%g2,%g2
+       stx     %g4,[rp-8]
+       andn    %g2,%g3,%g2
+       bnz,pt  %xcc,.Loop0
+       srlx    %g2,63,cy
+
+.Lret: mov     cy,%i0
+       ret
+       restore
+EPILOGUE(mpn_sub_n)
diff --git a/mpn/sparc64/ultrasparc1234/submul_1.asm b/mpn/sparc64/ultrasparc1234/submul_1.asm

new file mode 100644 (file)

index 0000000..ba91200
--- /dev/null
+++ b/mpn/sparc64/ultrasparc1234/submul_1.asm
@@ -0,0 +1,57 @@
+dnl  SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl  subtract the result from a second limb vector.
+
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC 1&2:     18
+C UltraSPARC 3:              23
+
+C INPUT PARAMETERS
+C rp   i0
+C up   i1
+C n    i2
+C v    i3
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+
+PROLOGUE(mpn_submul_1)
+       save    %sp,-176,%sp
+
+       sllx    %i2, 3, %g2
+       or      %g0, %i1, %o1
+       add     %g2, 15, %o0
+       or      %g0, %i2, %o2
+       and     %o0, -16, %o0
+       sub     %sp, %o0, %sp
+       add     %sp, 2223, %o0
+       or      %g0, %o0, %l0
+       call    mpn_mul_1
+       or      %g0, %i3, %o3
+       or      %g0, %o0, %l1           C preserve carry value from mpn_mul_1
+       or      %g0, %i0, %o0
+       or      %g0, %i0, %o1
+       or      %g0, %l0, %o2
+       call    mpn_sub_n
+       or      %g0, %i2, %o3
+       ret
+       restore %l1, %o0, %o0           C sum carry values
+EPILOGUE(mpn_submul_1)
diff --git a/mpn/sparc64/ultrasparc34/gmp-mparam.h b/mpn/sparc64/ultrasparc34/gmp-mparam.h

index cd1f89a076fc4dbf2640d65fe34addc1226e4d76..afd75aff7924e3bd6fb02b7ed404b0c0b6c64d94 100644 (file)
--- a/mpn/sparc64/ultrasparc34/gmp-mparam.h
+++ b/mpn/sparc64/ultrasparc34/gmp-mparam.h
@@ -25,37 +25,41 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define DIVREM_1_NORM_THRESHOLD              0  /* always */
  #define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      2
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         10
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        20
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     29
  #define USE_PREINV_DIVREM_1                  1
-#define DIVREM_2_THRESHOLD                   0  /* always */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always */
  #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
  
  #define MUL_TOOM22_THRESHOLD                30
  #define MUL_TOOM33_THRESHOLD                93
-#define MUL_TOOM44_THRESHOLD               143
+#define MUL_TOOM44_THRESHOLD               139
  #define MUL_TOOM6H_THRESHOLD               165
-#define MUL_TOOM8H_THRESHOLD               303
+#define MUL_TOOM8H_THRESHOLD               278
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      93
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      95
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      86
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     105
  #define MUL_TOOM42_TO_TOOM53_THRESHOLD      85
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      50
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      68
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      67
  
-#define SQR_BASECASE_THRESHOLD              10
+#define SQR_BASECASE_THRESHOLD               9
  #define SQR_TOOM2_THRESHOLD                 72
-#define SQR_TOOM3_THRESHOLD                 97
-#define SQR_TOOM4_THRESHOLD                179
-#define SQR_TOOM6_THRESHOLD                191
+#define SQR_TOOM3_THRESHOLD                 94
+#define SQR_TOOM4_THRESHOLD                184
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
  #define SQR_TOOM8_THRESHOLD                339
  
-#define MULMOD_BNM1_THRESHOLD               14
+#define MULMID_TOOM42_THRESHOLD             40
+
+#define MULMOD_BNM1_THRESHOLD               13
  #define SQRMOD_BNM1_THRESHOLD                9
  
  #define MUL_FFT_MODF_THRESHOLD             212  /* k = 5 */
@@ -104,9 +108,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
      {4194304,23}, {8388608,24} }
  #define MUL_FFT_TABLE3_SIZE 170
-#define MUL_FFT_THRESHOLD                 2240
+#define MUL_FFT_THRESHOLD                 1984
  
-#define SQR_FFT_MODF_THRESHOLD             244  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             236  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
    { {    244, 5}, {      8, 4}, {     17, 5}, {     17, 6}, \
      {     17, 7}, {      9, 6}, {     19, 7}, {     17, 8}, \
@@ -155,38 +159,45 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
      {4194304,23}, {8388608,24} }
  #define SQR_FFT_TABLE3_SIZE 182
-#define SQR_FFT_THRESHOLD                 1984
+#define SQR_FFT_THRESHOLD                 1728
  
-#define MULLO_BASECASE_THRESHOLD            13
+#define MULLO_BASECASE_THRESHOLD            12
  #define MULLO_DC_THRESHOLD                   0  /* never mpn_mullo_basecase */
  #define MULLO_MUL_N_THRESHOLD             3791
  
  #define DC_DIV_QR_THRESHOLD                 16
  #define DC_DIVAPPR_Q_THRESHOLD              66
-#define DC_BDIV_QR_THRESHOLD                26
-#define DC_BDIV_Q_THRESHOLD                 92
+#define DC_BDIV_QR_THRESHOLD                27
+#define DC_BDIV_Q_THRESHOLD                 86
  
  #define INV_MULMOD_BNM1_THRESHOLD           58
-#define INV_NEWTON_THRESHOLD                17
+#define INV_NEWTON_THRESHOLD                16
  #define INV_APPR_THRESHOLD                  17
  
-#define BINV_NEWTON_THRESHOLD              134
-#define REDC_1_TO_REDC_2_THRESHOLD          10
-#define REDC_2_TO_REDC_N_THRESHOLD         117
+#define BINV_NEWTON_THRESHOLD              110
+#define REDC_1_TO_REDC_2_THRESHOLD           0  /* always */
+#define REDC_2_TO_REDC_N_THRESHOLD         115
  
-#define MU_DIV_QR_THRESHOLD                748
-#define MU_DIVAPPR_Q_THRESHOLD             630
+#define MU_DIV_QR_THRESHOLD                618
+#define MU_DIVAPPR_Q_THRESHOLD             551
  #define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD               748
-#define MU_BDIV_Q_THRESHOLD                807
+#define MU_BDIV_QR_THRESHOLD               562
+#define MU_BDIV_Q_THRESHOLD                748
+
+#define POWM_SEC_TABLE  4,23,130,961,1926
  
  #define MATRIX22_STRASSEN_THRESHOLD         12
  #define HGCD_THRESHOLD                      39
-#define GCD_DC_THRESHOLD                   130
-#define GCDEXT_DC_THRESHOLD                134
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                18
-#define GET_STR_PRECOMPUTE_THRESHOLD        27
-#define SET_STR_DC_THRESHOLD               315
-#define SET_STR_PRECOMPUTE_THRESHOLD      1037
+#define HGCD_APPR_THRESHOLD                 50
+#define HGCD_REDUCE_THRESHOLD             1012
+#define GCD_DC_THRESHOLD                   134
+#define GCDEXT_DC_THRESHOLD                132
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                19
+#define GET_STR_PRECOMPUTE_THRESHOLD        28
+#define SET_STR_DC_THRESHOLD               300
+#define SET_STR_PRECOMPUTE_THRESHOLD      1043
+
+#define FAC_DSC_THRESHOLD                  462
+#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/mpn/sparc64/ultrasparct1/add_n.asm b/mpn/sparc64/ultrasparct1/add_n.asm

new file mode 100644 (file)

index 0000000..9dc0bf2
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/add_n.asm
@@ -0,0 +1,57 @@
+dnl  SPARC v9 mpn_add_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:        ?
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n',  `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_add_nc)
+       b,a     L(ent)
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+       mov     0, cy
+L(ent):        cmp     %g0, cy
+L(top):        ldx     [up+0], %o4
+       add     up, 8, up
+       ldx     [vp+0], %o5
+       add     vp, 8, vp
+       add     rp, 8, rp
+       add     n, -1, n
+       srlx    %o4, 32, %g1
+       srlx    %o5, 32, %g2
+       addccc  %o4, %o5, %g3
+       addccc  %g1, %g2, %g0
+       brgz    n, L(top)
+        stx    %g3, [rp-8]
+
+       retl
+       addc    %g0, %g0, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/addlsh1_n.asm b/mpn/sparc64/ultrasparct1/addlsh1_n.asm

new file mode 100644 (file)

index 0000000..4c25cac
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/addlsh1_n.asm
@@ -0,0 +1,30 @@
+dnl  SPARC v9 mpn_addlsh1_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH,             1)
+define(RSH,             63)
+
+define(func, mpn_addlsh1_n)
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n)
+
+include_mpn(`sparc64/ultrasparct1/addlshC_n.asm')
diff --git a/mpn/sparc64/ultrasparct1/addlsh2_n.asm b/mpn/sparc64/ultrasparct1/addlsh2_n.asm

new file mode 100644 (file)

index 0000000..5b3e613
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/addlsh2_n.asm
@@ -0,0 +1,30 @@
+dnl  SPARC v9 mpn_addlsh2_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH,             2)
+define(RSH,             62)
+
+define(func, mpn_addlsh2_n)
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n)
+
+include_mpn(`sparc64/ultrasparct1/addlshC_n.asm')
diff --git a/mpn/sparc64/ultrasparct1/addlshC_n.asm b/mpn/sparc64/ultrasparct1/addlshC_n.asm

new file mode 100644 (file)

index 0000000..010e6c7
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/addlshC_n.asm
@@ -0,0 +1,58 @@
+dnl  SPARC v9 mpn_addlshC_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+C                 cycles/limb
+C UltraSPARC T1:       21
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n',  `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(func)
+       mov     0, cy
+       mov     0, %g5
+       cmp     %g0, cy
+L(top):        ldx     [up+0], %o4
+       add     up, 8, up
+       ldx     [vp+0], %o5
+       add     vp, 8, vp
+       add     rp, 8, rp
+
+       sllx    %o5, LSH, %g4
+       add     n, -1, n
+       or      %g5, %g4, %g4
+       srlx    %o5, RSH, %g5
+
+       srlx    %o4, 32, %g1
+       srlx    %g4, 32, %g2
+       addccc  %o4, %g4, %g3
+       addccc  %g1, %g2, %g0
+       brgz    n, L(top)
+        stx    %g3, [rp-8]
+
+       retl
+       addc    %g5, %g0, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/addmul_1.asm b/mpn/sparc64/ultrasparct1/addmul_1.asm

new file mode 100644 (file)

index 0000000..6b2b2c2
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/addmul_1.asm
@@ -0,0 +1,75 @@
+dnl  SPARC v9 mpn_addmul_1 for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:       74
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n',  `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_addmul_1)
+       save    %sp, -176, %sp
+       mov     1, %o2
+       mov     %i0, %g2
+       srlx    %i3, 32, %o4
+       sllx    %o2, 32, %o2
+       srl     %i3, 0, %i3
+       mov     0, %g3
+       mov     0, %i0
+
+L(top):        ldx     [%i1+%g3], %g1
+       srl     %g1, 0, %g4
+       mulx    %g4, %i3, %o5
+       srlx    %g1, 32, %g1
+       mulx    %g1, %i3, %g5
+       mulx    %g4, %o4, %g4
+       mulx    %g1, %o4, %g1
+       srlx    %o5, 32, %o1
+       add     %g5, %o1, %o1
+       addcc   %o1, %g4, %g4
+       srl     %o5, 0, %o0
+       ldx     [%g2+%g3], %o5
+       sllx    %g4, 32, %o1
+       add     %g1, %o2, %l1
+       movlu   %xcc, %l1, %g1
+       add     %o1, %o0, %l0
+       addcc   %l0, %i0, %g5
+       srlx    %g4, 32, %i0
+       add     %i0, 1, %g4
+       movlu   %xcc, %g4, %i0
+       addcc   %o5, %g5, %g5
+       stx     %g5, [%g2+%g3]
+       add     %i0, 1, %g4
+       movlu   %xcc, %g4, %i0
+       add     %i2, -1, %i2
+       add     %i0, %g1, %i0
+       brnz,pt %i2, L(top)
+        add    %g3, 8, %g3
+       return  %i7+8
+        nop
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/gmp-mparam.h b/mpn/sparc64/ultrasparct1/gmp-mparam.h

new file mode 100644 (file)

index 0000000..b071b5a
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/gmp-mparam.h
@@ -0,0 +1,143 @@
+/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2006, 2008, 2009,
+2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 1000 MHz ultrasparc t1 running GNU/Linux */
+
+#define DIVREM_1_NORM_THRESHOLD              0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1P_METHOD                      2
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         13
+#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     34
+#define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
+
+#define MUL_TOOM22_THRESHOLD                 8
+#define MUL_TOOM33_THRESHOLD                50
+#define MUL_TOOM44_THRESHOLD                99
+#define MUL_TOOM6H_THRESHOLD               125
+#define MUL_TOOM8H_THRESHOLD               187
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      77
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      65
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      50
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      34
+
+#define SQR_BASECASE_THRESHOLD               0  /* always */
+#define SQR_TOOM2_THRESHOLD                 14
+#define SQR_TOOM3_THRESHOLD                 57
+#define SQR_TOOM4_THRESHOLD                133
+#define SQR_TOOM6_THRESHOLD                156
+#define SQR_TOOM8_THRESHOLD                260
+
+#define MULMID_TOOM42_THRESHOLD             12
+
+#define MULMOD_BNM1_THRESHOLD                7
+#define SQRMOD_BNM1_THRESHOLD                7
+
+#define MUL_FFT_MODF_THRESHOLD             176  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    176, 5}, {      7, 6}, {      4, 5}, {      9, 6}, \
+    {      5, 5}, {     11, 6}, {     11, 7}, {      6, 6}, \
+    {     13, 7}, {      7, 6}, {     15, 7}, {      9, 8}, \
+    {      5, 7}, {     13, 8}, {      7, 7}, {     15, 6}, \
+    {     32, 7}, {     24, 8}, {     21, 9}, {     11, 8}, \
+    {     23,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
+    {     19, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
+    {     43,10}, {     23,11}, {     15,10}, {     31, 9}, \
+    {     63, 8}, {    127, 9}, {     67,10}, {     39, 9}, \
+    {     79, 8}, {    159,10}, {     47, 9}, {     95,11}, \
+    {   2048,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 53
+#define MUL_FFT_THRESHOLD                 1728
+
+
+#define SQR_FFT_MODF_THRESHOLD             148  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    148, 5}, {      7, 6}, {      4, 5}, {      9, 6}, \
+    {      5, 5}, {     11, 6}, {     11, 7}, {      6, 6}, \
+    {     13, 7}, {      7, 6}, {     15, 7}, {     13, 8}, \
+    {      7, 7}, {     16, 8}, {      9, 6}, {     38, 7}, \
+    {     20, 8}, {     11, 7}, {     24, 8}, {     13, 9}, \
+    {      7, 7}, {     30, 8}, {     19, 9}, {     11, 8}, \
+    {     25,10}, {      7, 9}, {     15, 8}, {     31, 9}, \
+    {     19, 8}, {     39, 9}, {     27,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     47, 8}, {     95, 9}, \
+    {     51,11}, {     15,10}, {     31, 8}, {    127,10}, \
+    {     39, 9}, {     79, 8}, {    159,10}, {     47, 9}, \
+    {     95,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 58
+#define SQR_FFT_THRESHOLD                 1344
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  28
+#define MULLO_MUL_N_THRESHOLD             3176
+
+#define DC_DIV_QR_THRESHOLD                 27
+#define DC_DIVAPPR_Q_THRESHOLD             106
+#define DC_BDIV_QR_THRESHOLD                27
+#define DC_BDIV_Q_THRESHOLD                 62
+
+#define INV_MULMOD_BNM1_THRESHOLD           14
+#define INV_NEWTON_THRESHOLD               163
+#define INV_APPR_THRESHOLD                 117
+
+#define BINV_NEWTON_THRESHOLD              166
+#define REDC_1_TO_REDC_N_THRESHOLD          31
+
+#define MU_DIV_QR_THRESHOLD                734
+#define MU_DIVAPPR_Q_THRESHOLD             748
+#define MUPI_DIV_QR_THRESHOLD               67
+#define MU_BDIV_QR_THRESHOLD               562
+#define MU_BDIV_Q_THRESHOLD                734
+
+#define POWM_SEC_TABLE  4,29,188,643,2741
+
+#define MATRIX22_STRASSEN_THRESHOLD         11
+#define HGCD_THRESHOLD                      58
+#define HGCD_APPR_THRESHOLD                 55
+#define HGCD_REDUCE_THRESHOLD              637
+#define GCD_DC_THRESHOLD                   186
+#define GCDEXT_DC_THRESHOLD                140
+#define JACOBI_BASE_METHOD                   3
+
+#define GET_STR_DC_THRESHOLD                20
+#define GET_STR_PRECOMPUTE_THRESHOLD        33
+#define SET_STR_DC_THRESHOLD               268
+#define SET_STR_PRECOMPUTE_THRESHOLD       960
+
+#define FAC_DSC_THRESHOLD                  268
+#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/mpn/sparc64/ultrasparct1/lshift.asm b/mpn/sparc64/ultrasparct1/lshift.asm

new file mode 100644 (file)

index 0000000..2729a40
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/lshift.asm
@@ -0,0 +1,59 @@
+dnl  SPARC v9 mpn_lshift for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:       17
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n',  `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshift)
+       add     %o1, -8, %o1
+       add     %o0, 8, %g1
+       sllx    %o2, 3, %g5
+       sub     %g0, %o3, %o4
+       ldx     [%o1+%g5], %g2
+       add     %g5, -8, %g5
+       brz,pn  %g5, L(end)
+       sllx    %g2, %o3, %g4
+
+L(top):        ldx     [%o1+%g5], %o5
+       nop
+       add     %g5, -8, %g5
+       srlx    %o5, %o4, %g3
+       or      %g4, %g3, %g3
+       sllx    %o5, %o3, %g4
+       stx     %g3, [%g1+%g5]
+       brnz    %g5, L(top)
+       nop
+
+L(end):        stx     %g4, [%g1-8]
+       retl
+        srlx   %g2, %o4, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/lshiftc.asm b/mpn/sparc64/ultrasparct1/lshiftc.asm

new file mode 100644 (file)

index 0000000..d645af7
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/lshiftc.asm
@@ -0,0 +1,60 @@
+dnl  SPARC v9 mpn_lshiftc for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:       17
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n',  `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshiftc)
+       add     %o1, -8, %o1
+       add     %o0, 8, %g1
+       sllx    %o2, 3, %g5
+       sub     %g0, %o3, %o4
+       ldx     [%o1+%g5], %g2
+       add     %g5, -8, %g5
+       brz,pn  %g5, L(end)
+       sllx    %g2, %o3, %g4
+
+L(top):        ldx     [%o1+%g5], %o5
+       not     %g4
+       add     %g5, -8, %g5
+       srlx    %o5, %o4, %g3
+       andn    %g4, %g3, %g3
+       sllx    %o5, %o3, %g4
+       stx     %g3, [%g1+%g5]
+       brnz    %g5, L(top)
+       nop
+
+L(end):        not     %g4
+       stx     %g4, [%g1-8]
+       retl
+        srlx   %g2, %o4, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/mul_1.asm b/mpn/sparc64/ultrasparct1/mul_1.asm

new file mode 100644 (file)

index 0000000..fa2ae44
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/mul_1.asm
@@ -0,0 +1,71 @@
+dnl  SPARC v9 mpn_mul_1 for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:       68
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n',  `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_mul_1)
+       save    %sp, -176, %sp
+       mov     1, %o2
+       mov     %i0, %g2
+       srlx    %i3, 32, %o4
+       sllx    %o2, 32, %o2
+       srl     %i3, 0, %i3
+       mov     0, %g3
+       mov     0, %i0
+
+L(top):        ldx     [%i1+%g3], %g1
+       srl     %g1, 0, %g4
+       mulx    %g4, %i3, %o5
+       srlx    %g1, 32, %g1
+       mulx    %g1, %i3, %g5
+       mulx    %g4, %o4, %g4
+       mulx    %g1, %o4, %g1
+       srlx    %o5, 32, %o1
+       add     %g5, %o1, %o1
+       addcc   %o1, %g4, %g4
+       srl     %o5, 0, %o0
+       sllx    %g4, 32, %o1
+       add     %g1, %o2, %l1
+       movlu   %xcc, %l1, %g1
+       add     %o1, %o0, %l0
+       addcc   %l0, %i0, %g5
+       srlx    %g4, 32, %i0
+       add     %i0, 1, %g4
+       movlu   %xcc, %g4, %i0
+       stx     %g5, [%g2+%g3]
+       add     %i2, -1, %i2
+       add     %i0, %g1, %i0
+       brnz,pt %i2, L(top)
+        add    %g3, 8, %g3
+       return  %i7+8
+        nop
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/rsblsh1_n.asm b/mpn/sparc64/ultrasparct1/rsblsh1_n.asm

new file mode 100644 (file)

index 0000000..8493bff
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/rsblsh1_n.asm
@@ -0,0 +1,30 @@
+dnl  SPARC v9 mpn_rsblsh1_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH,             1)
+define(RSH,             63)
+
+define(func, mpn_rsblsh1_n)
+
+MULFUNC_PROLOGUE(mpn_rsblsh1_n)
+
+include_mpn(`sparc64/ultrasparct1/rsblshC_n.asm')
diff --git a/mpn/sparc64/ultrasparct1/rsblsh2_n.asm b/mpn/sparc64/ultrasparct1/rsblsh2_n.asm

new file mode 100644 (file)

index 0000000..ea2498a
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/rsblsh2_n.asm
@@ -0,0 +1,30 @@
+dnl  SPARC v9 mpn_rsblsh2_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH,             2)
+define(RSH,             62)
+
+define(func, mpn_rsblsh2_n)
+
+MULFUNC_PROLOGUE(mpn_rsblsh2_n)
+
+include_mpn(`sparc64/ultrasparct1/rsblshC_n.asm')
diff --git a/mpn/sparc64/ultrasparct1/rsblshC_n.asm b/mpn/sparc64/ultrasparct1/rsblshC_n.asm

new file mode 100644 (file)

index 0000000..df18c4f
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/rsblshC_n.asm
@@ -0,0 +1,58 @@
+dnl  SPARC v9 mpn_rsblshC_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+C                 cycles/limb
+C UltraSPARC T1:       21
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n',  `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(func)
+       mov     0, cy
+       mov     0, %g5
+       cmp     %g0, cy
+L(top):        ldx     [up+0], %o4
+       add     up, 8, up
+       ldx     [vp+0], %o5
+       add     vp, 8, vp
+       add     rp, 8, rp
+
+       sllx    %o5, LSH, %g4
+       add     n, -1, n
+       or      %g5, %g4, %g4
+       srlx    %o5, RSH, %g5
+
+       srlx    %o4, 32, %g1
+       srlx    %g4, 32, %g2
+       subccc  %g4, %o4, %g3
+       subccc  %g2, %g1, %g0
+       brgz    n, L(top)
+        stx    %g3, [rp-8]
+
+       retl
+       subc    %g5, %g0, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/rshift.asm b/mpn/sparc64/ultrasparct1/rshift.asm

new file mode 100644 (file)

index 0000000..b5f5be7
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/rshift.asm
@@ -0,0 +1,61 @@
+dnl  SPARC v9 mpn_rshift for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:       17
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n',  `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_rshift)
+       add     %o1, 0, %o1
+       add     %o0, -16, %g1
+       sllx    %o2, 3, %g5
+       add     %o1, %g5, %o1
+       add     %g1, %g5, %g1
+       neg     %g5
+       sub     %g0, %o3, %o4
+       ldx     [%o1+%g5], %g2
+       add     %g5, 8, %g5
+       brz,pn  %g5, L(end)
+       srlx    %g2, %o3, %g4
+
+L(top):        ldx     [%o1+%g5], %o5
+       add     %g5, 8, %g5
+       sllx    %o5, %o4, %g3
+       or      %g4, %g3, %g3
+       srlx    %o5, %o3, %g4
+       stx     %g3, [%g1+%g5]
+       brnz    %g5, L(top)
+       nop
+
+L(end):        stx     %g4, [%g1+8]
+       retl
+        sllx   %g2, %o4, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/sub_n.asm b/mpn/sparc64/ultrasparct1/sub_n.asm

new file mode 100644 (file)

index 0000000..3eb8f1a
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/sub_n.asm
@@ -0,0 +1,57 @@
+dnl  SPARC v9 mpn_sub_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:        ?
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n',  `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sub_nc)
+       b,a     L(ent)
+EPILOGUE()
+PROLOGUE(mpn_sub_n)
+       mov     0, cy
+L(ent):        cmp     %g0, cy
+L(top):        ldx     [up+0], %o4
+       add     up, 8, up
+       ldx     [vp+0], %o5
+       add     vp, 8, vp
+       add     rp, 8, rp
+       add     n, -1, n
+       srlx    %o4, 32, %g1
+       srlx    %o5, 32, %g2
+       subccc  %o4, %o5, %g3
+       subccc  %g1, %g2, %g0
+       brgz    n, L(top)
+        stx    %g3, [rp-8]
+
+       retl
+       addc    %g0, %g0, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/sublsh1_n.asm b/mpn/sparc64/ultrasparct1/sublsh1_n.asm

new file mode 100644 (file)

index 0000000..48498d1
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/sublsh1_n.asm
@@ -0,0 +1,30 @@
+dnl  SPARC v9 mpn_sublsh1_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH,             1)
+define(RSH,             63)
+
+define(func, mpn_sublsh1_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh1_n)
+
+include_mpn(`sparc64/ultrasparct1/sublshC_n.asm')
diff --git a/mpn/sparc64/ultrasparct1/sublsh2_n.asm b/mpn/sparc64/ultrasparct1/sublsh2_n.asm

new file mode 100644 (file)

index 0000000..dcf631a
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/sublsh2_n.asm
@@ -0,0 +1,30 @@
+dnl  SPARC v9 mpn_sublsh2_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH,             2)
+define(RSH,             62)
+
+define(func, mpn_sublsh2_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh2_n)
+
+include_mpn(`sparc64/ultrasparct1/sublshC_n.asm')
diff --git a/mpn/sparc64/ultrasparct1/sublshC_n.asm b/mpn/sparc64/ultrasparct1/sublshC_n.asm

new file mode 100644 (file)

index 0000000..85da0f6
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/sublshC_n.asm
@@ -0,0 +1,58 @@
+dnl  SPARC v9 mpn_sublshC_n for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+C                 cycles/limb
+C UltraSPARC T1:       21
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n',  `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(func)
+       mov     0, cy
+       mov     0, %g5
+       cmp     %g0, cy
+L(top):        ldx     [up+0], %o4
+       add     up, 8, up
+       ldx     [vp+0], %o5
+       add     vp, 8, vp
+       add     rp, 8, rp
+
+       sllx    %o5, LSH, %g4
+       add     n, -1, n
+       or      %g5, %g4, %g4
+       srlx    %o5, RSH, %g5
+
+       srlx    %o4, 32, %g1
+       srlx    %g4, 32, %g2
+       subccc  %o4, %g4, %g3
+       subccc  %g1, %g2, %g0
+       brgz    n, L(top)
+        stx    %g3, [rp-8]
+
+       retl
+       addc    %g5, %g0, %o0
+EPILOGUE()
diff --git a/mpn/sparc64/ultrasparct1/submul_1.asm b/mpn/sparc64/ultrasparct1/submul_1.asm

new file mode 100644 (file)

index 0000000..9023ea2
--- /dev/null
+++ b/mpn/sparc64/ultrasparct1/submul_1.asm
@@ -0,0 +1,75 @@
+dnl  SPARC v9 mpn_submul_1 for T1/T2.
+
+dnl  Copyright 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T1:       74
+C UltraSPARC T2:        ?
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n',  `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+PROLOGUE(mpn_submul_1)
+       save    %sp, -176, %sp
+       mov     1, %o2
+       mov     %i0, %g2
+       srlx    %i3, 32, %o4
+       sllx    %o2, 32, %o2
+       srl     %i3, 0, %i3
+       mov     0, %g3
+       mov     0, %i0
+
+L(top):        ldx     [%i1+%g3], %g1
+       srl     %g1, 0, %g4
+       mulx    %g4, %i3, %o5
+       srlx    %g1, 32, %g1
+       mulx    %g1, %i3, %g5
+       mulx    %g4, %o4, %g4
+       mulx    %g1, %o4, %g1
+       srlx    %o5, 32, %o1
+       add     %g5, %o1, %o1
+       addcc   %o1, %g4, %g4
+       srl     %o5, 0, %o0
+       ldx     [%g2+%g3], %o5
+       sllx    %g4, 32, %o1
+       add     %g1, %o2, %l1
+       movlu   %xcc, %l1, %g1
+       add     %o1, %o0, %l0
+       addcc   %l0, %i0, %g5
+       srlx    %g4, 32, %i0
+       add     %i0, 1, %g4
+       movlu   %xcc, %g4, %i0
+       subcc   %o5, %g5, %g5
+       stx     %g5, [%g2+%g3]
+       add     %i0, 1, %g4
+       movlu   %xcc, %g4, %i0
+       add     %i2, -1, %i2
+       add     %i0, %g1, %i0
+       brnz,pt %i2, L(top)
+        add    %g3, 8, %g3
+       return  %i7+8
+        nop
+EPILOGUE()
diff --git a/mpn/thumb/add_n.asm b/mpn/thumb/add_n.asm

new file mode 100644 (file)

index 0000000..e28a4ad
--- /dev/null
+++ b/mpn/thumb/add_n.asm
@@ -0,0 +1,52 @@
+dnl  ARM/Thumb mpn_add_n.
+
+dnl  Copyright 1997, 2000, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published by
+dnl  the Free Software Foundation; either version 3 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp',   r0)
+define(`up',   r1)
+define(`vp',   r2)
+define(`n',    r3)
+
+ASM_START()
+       .thumb
+PROLOGUE(mpn_add_nc)
+       push    {r4, r5, r6}
+       ldr     r6, [sp, #12]           C init carry save register
+       sub     r6, #1
+       b       L(top)
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+       push    {r4, r5, r6}
+       neg     r6, n                   C init carry save register
+
+L(top):        ldmia   up!, {r4}               C load next limb from S1
+       cmp     n, r6                   C tricky carry restore
+       ldmia   vp!, {r5}               C load next limb from S2
+       adc     r4, r5
+       stmia   rp!, {r4}               C store result limb to RES
+       sbc     r6, r6                  C save negated carry
+       sub     n, #1
+       bne     L(top)
+
+       add     r0, r6, #1
+       pop     {r4, r5, r6}
+       bx      lr
+EPILOGUE()
diff --git a/mpn/thumb/sub_n.asm b/mpn/thumb/sub_n.asm

new file mode 100644 (file)

index 0000000..7ba7375
--- /dev/null
+++ b/mpn/thumb/sub_n.asm
@@ -0,0 +1,52 @@
+dnl  ARM/Thumb mpn_sub_n.
+
+dnl  Copyright 1997, 2000, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published by
+dnl  the Free Software Foundation; either version 3 of the License, or (at your
+dnl  option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp',   r0)
+define(`up',   r1)
+define(`vp',   r2)
+define(`n',    r3)
+
+ASM_START()
+       .thumb
+PROLOGUE(mpn_sub_nc)
+       push    {r4, r5, r6}
+       ldr     r6, [sp, #12]           C init carry save register
+       neg     r6, r6
+       b       L(top)
+EPILOGUE()
+PROLOGUE(mpn_sub_n)
+       push    {r4, r5, r6}
+       mov     r6, n                   C init carry save register
+
+L(top):        ldmia   up!, {r4}               C load next limb from S1
+       cmp     n, r6                   C tricky carry restore
+       ldmia   vp!, {r5}               C load next limb from S2
+       sbc     r4, r5
+       stmia   rp!, {r4}               C store result limb to RES
+       sbc     r6, r6                  C save negated carry
+       sub     n, #1
+       bne     L(top)
+
+       neg     r0, r6
+       pop     {r4, r5, r6}
+       bx      lr
+EPILOGUE()
diff --git a/mpn/vax/add_n.asm b/mpn/vax/add_n.asm

new file mode 100644 (file)

index 0000000..1854bec
--- /dev/null
+++ b/mpn/vax/add_n.asm
@@ -0,0 +1,53 @@
+dnl  VAX mpn_add_n -- Add two limb vectors of the same length > 0 and store sum
+dnl  in a third limb vector.
+
+dnl  Copyright 1999, 2000, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+       .word   0x0
+       movl    16(ap), r0
+       movl    12(ap), r1
+       movl    8(ap), r2
+       movl    4(ap), r3
+       mnegl   r0, r5
+       addl2   $3, r0
+       ashl    $-2, r0, r0     C unroll loop count
+       bicl2   $-4, r5         C mask out low 2 bits
+       movaq   (r5)[r5], r5    C 9x
+       jmp     L(top)[r5]
+
+L(top):        movl    (r2)+, r4
+       adwc    (r1)+, r4
+       movl    r4, (r3)+
+       movl    (r2)+, r4
+       adwc    (r1)+, r4
+       movl    r4, (r3)+
+       movl    (r2)+, r4
+       adwc    (r1)+, r4
+       movl    r4, (r3)+
+       movl    (r2)+, r4
+       adwc    (r1)+, r4
+       movl    r4, (r3)+
+       sobgtr  r0, L(top)
+
+       adwc    r0, r0
+       ret
+EPILOGUE()
diff --git a/mpn/vax/add_n.s b/mpn/vax/add_n.s

deleted file mode 100644 (file)

index 60773cc..0000000
--- a/mpn/vax/add_n.s
+++ /dev/null
@@ -1,59 +0,0 @@
-# VAX __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
-# sum in a third limb vector.
-
-# Copyright 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr      (sp + 4)
-# s1_ptr       (sp + 8)
-# s2_ptr       (sp + 12)
-# size         (sp + 16)
-
-.text
-       .align 1
-.globl ___gmpn_add_n
-___gmpn_add_n:
-       .word   0x0
-       movl    16(ap),r0
-       movl    12(ap),r1
-       movl    8(ap),r2
-       movl    4(ap),r3
-       mnegl   r0,r5
-       addl2   $3,r0
-       ashl    $-2,r0,r0       # unroll loop count
-       bicl2   $-4,r5          # mask out low 2 bits
-       movaq   (r5)[r5],r5     # 9x
-       jmp     Loop(r5)
-
-Loop:  movl    (r2)+,r4
-       adwc    (r1)+,r4
-       movl    r4,(r3)+
-       movl    (r2)+,r4
-       adwc    (r1)+,r4
-       movl    r4,(r3)+
-       movl    (r2)+,r4
-       adwc    (r1)+,r4
-       movl    r4,(r3)+
-       movl    (r2)+,r4
-       adwc    (r1)+,r4
-       movl    r4,(r3)+
-       sobgtr  r0,Loop
-
-       adwc    r0,r0
-       ret
diff --git a/mpn/vax/addmul_1.asm b/mpn/vax/addmul_1.asm

new file mode 100644 (file)

index 0000000..a4bd311
--- /dev/null
+++ b/mpn/vax/addmul_1.asm
@@ -0,0 +1,113 @@
+dnl  VAX mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
+dnl  to a second limb vector.
+
+dnl  Copyright 1992, 1994, 1996, 2000, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+       .word   0xfc0
+       movl    12(ap), r4
+       movl    8(ap), r8
+       movl    4(ap), r9
+       clrl    r3
+       incl    r4
+       ashl    $-1, r4, r7
+       clrl    r11
+       movl    16(ap), r6
+       jlss    L(v0_big)
+       jlbc    r4, L(1)
+
+C Loop for v0 < 0x80000000
+L(tp1):        movl    (r8)+, r1
+       jlss    L(1n0)
+       emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    $0, r3
+       addl2   r2, (r9)+
+       adwc    $0, r3
+L(1):  movl    (r8)+, r1
+       jlss    L(1n1)
+L(1p1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    $0, r11
+       addl2   r10, (r9)+
+       adwc    $0, r11
+
+       sobgtr  r7, L(tp1)
+       movl    r11, r0
+       ret
+
+L(1n0):        emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    r6, r3
+       addl2   r2, (r9)+
+       adwc    $0, r3
+       movl    (r8)+, r1
+       jgeq    L(1p1)
+L(1n1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    r6, r11
+       addl2   r10, (r9)+
+       adwc    $0, r11
+
+       sobgtr  r7, L(tp1)
+       movl    r11, r0
+       ret
+
+L(v0_big):
+       jlbc    r4, L(2)
+
+C Loop for v0 >= 0x80000000
+L(tp2):        movl    (r8)+, r1
+       jlss    L(2n0)
+       emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    r1, r3
+       addl2   r2, (r9)+
+       adwc    $0, r3
+L(2):  movl    (r8)+, r1
+       jlss    L(2n1)
+L(2p1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    r1, r11
+       addl2   r10, (r9)+
+       adwc    $0, r11
+
+       sobgtr  r7, L(tp2)
+       movl    r11, r0
+       ret
+
+L(2n0):        emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    r6, r3
+       addl2   r2, (r9)+
+       adwc    r1, r3
+       movl    (r8)+, r1
+       jgeq    L(2p1)
+L(2n1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    r6, r11
+       addl2   r10, (r9)+
+       adwc    r1, r11
+
+       sobgtr  r7, L(tp2)
+       movl    r11, r0
+       ret
+EPILOGUE()
diff --git a/mpn/vax/addmul_1.s b/mpn/vax/addmul_1.s

deleted file mode 100644 (file)

index e2f86e0..0000000
--- a/mpn/vax/addmul_1.s
+++ /dev/null
@@ -1,124 +0,0 @@
-# VAX __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
-# the result to a second limb vector.
-
-# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr      (sp + 4)
-# s1_ptr       (sp + 8)
-# size         (sp + 12)
-# s2_limb      (sp + 16)
-
-.text
-       .align 1
-.globl ___gmpn_addmul_1
-___gmpn_addmul_1:
-       .word   0xfc0
-       movl    12(ap),r4
-       movl    8(ap),r8
-       movl    4(ap),r9
-       movl    16(ap),r6
-       jlss    s2_big
-
-       clrl    r3
-       incl    r4
-       ashl    $-1,r4,r7
-       jlbc    r4,L1
-       clrl    r11
-
-# Loop for S2_LIMB < 0x80000000
-Loop1: movl    (r8)+,r1
-       jlss    L1n0
-       emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    $0,r3
-       addl2   r2,(r9)+
-       adwc    $0,r3
-L1:    movl    (r8)+,r1
-       jlss    L1n1
-L1p1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    $0,r11
-       addl2   r10,(r9)+
-       adwc    $0,r11
-
-       sobgtr  r7,Loop1
-       movl    r11,r0
-       ret
-
-L1n0:  emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    r6,r3
-       addl2   r2,(r9)+
-       adwc    $0,r3
-       movl    (r8)+,r1
-       jgeq    L1p1
-L1n1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    r6,r11
-       addl2   r10,(r9)+
-       adwc    $0,r11
-
-       sobgtr  r7,Loop1
-       movl    r11,r0
-       ret
-
-
-s2_big:        clrl    r3
-       incl    r4
-       ashl    $-1,r4,r7
-       jlbc    r4,L2
-       clrl    r11
-
-# Loop for S2_LIMB >= 0x80000000
-Loop2: movl    (r8)+,r1
-       jlss    L2n0
-       emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    r1,r3
-       addl2   r2,(r9)+
-       adwc    $0,r3
-L2:    movl    (r8)+,r1
-       jlss    L2n1
-L2p1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    r1,r11
-       addl2   r10,(r9)+
-       adwc    $0,r11
-
-       sobgtr  r7,Loop2
-       movl    r11,r0
-       ret
-
-L2n0:  emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    r6,r3
-       addl2   r2,(r9)+
-       adwc    r1,r3
-       movl    (r8)+,r1
-       jgeq    L2p1
-L2n1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    r6,r11
-       addl2   r10,(r9)+
-       adwc    r1,r11
-
-       sobgtr  r7,Loop2
-       movl    r11,r0
-       ret
diff --git a/mpn/vax/elf.m4 b/mpn/vax/elf.m4

new file mode 100644 (file)

index 0000000..4946c98
--- /dev/null
+++ b/mpn/vax/elf.m4
@@ -0,0 +1,43 @@
+divert(-1)
+
+dnl  m4 macros for VAX assembler.
+
+dnl  Copyright 2001, 2012 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+defreg(r0,`%r``''0')
+defreg(r1,`%r``''1')
+defreg(r2,`%r``''2')
+defreg(r3,`%r``''3')
+defreg(r4,`%r``''4')
+defreg(r5,`%r``''5')
+defreg(r6,`%r``''6')
+defreg(r7,`%r``''7')
+defreg(r8,`%r``''8')
+defreg(r9,`%r``''9')
+defreg(r10,`%r``''10')
+defreg(r11,`%r``''11')
+defreg(r12,`%r``''12')
+defreg(r13,`%r``''13')
+defreg(r14,`%r``''14')
+defreg(r15,`%r``''15')
+defreg(ap,`%a``''p')
+
+define(`foo', blablabla)
+
+divert
diff --git a/mpn/vax/lshift.asm b/mpn/vax/lshift.asm

new file mode 100644 (file)

index 0000000..dd22c64
--- /dev/null
+++ b/mpn/vax/lshift.asm
@@ -0,0 +1,48 @@
+dnl  VAX mpn_lshift -- left shift.
+
+dnl  Copyright 1999, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+       .word   0x1c0
+       movl    4(ap), r7
+       movl    8(ap), r6
+       movl    12(ap), r1
+       movl    16(ap), r8
+
+       moval   (r6)[r1], r6
+       moval   (r7)[r1], r7
+       clrl    r3
+       movl    -(r6), r2
+       ashq    r8, r2, r4
+       movl    r5, r0
+       movl    r2, r3
+       decl    r1
+       jeql    L(end)
+
+L(top):        movl    -(r6), r2
+       ashq    r8, r2, r4
+       movl    r5, -(r7)
+       movl    r2, r3
+       sobgtr  r1, L(top)
+
+L(end):        movl    r4, -4(r7)
+       ret
+EPILOGUE()
diff --git a/mpn/vax/lshift.s b/mpn/vax/lshift.s

deleted file mode 100644 (file)

index 6f3d600..0000000
--- a/mpn/vax/lshift.s
+++ /dev/null
@@ -1,56 +0,0 @@
-# VAX mpn_lshift -- left shift.
-
-# Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# rptr         (sp + 4)
-# sptr         (sp + 8)
-# size         (sp + 12)
-# cnt          (sp + 16)
-# r0=retval r1=size r2,r3=itmp r4,r5=otmp      call-used registers
-# r6=sptr r7=rptr r8=cnt r9 r10 r11            call-saved registers
-
-.text
-       .align 1
-.globl ___gmpn_lshift
-___gmpn_lshift:
-       .word   0x1c0
-       movl    4(ap),r7
-       movl    8(ap),r6
-       movl    12(ap),r1
-       movl    16(ap),r8
-
-       moval   (r6)[r1],r6
-       moval   (r7)[r1],r7
-       clrl    r3
-       movl    -(r6),r2
-       ashq    r8,r2,r4
-       movl    r5,r0
-       movl    r2,r3
-       decl    r1
-       jeql    Lend
-
-Loop:  movl    -(r6),r2
-       ashq    r8,r2,r4
-       movl    r5,-(r7)
-       movl    r2,r3
-       sobgtr  r1,Loop
-
-Lend:  movl    r4,-4(r7)
-       ret
diff --git a/mpn/vax/mul_1.asm b/mpn/vax/mul_1.asm

new file mode 100644 (file)

index 0000000..1588476
--- /dev/null
+++ b/mpn/vax/mul_1.asm
@@ -0,0 +1,107 @@
+dnl  VAX mpn_mul_1 -- Multiply a limb vector with a limb and store the result
+dnl  in a second limb vector.
+
+dnl  Copyright 1992, 1994, 1996, 2000, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+       .word   0xfc0
+       movl    12(ap), r4
+       movl    8(ap), r8
+       movl    4(ap), r9
+       clrl    r3
+       incl    r4
+       ashl    $-1, r4, r7
+       clrl    r11
+       movl    16(ap), r6
+       jlss    L(v0_big)
+       jlbc    r4, L(1)
+
+C Loop for v0 < 0x80000000
+L(tp1):        movl    (r8)+, r1
+       jlss    L(1n0)
+       emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    $0, r3
+       movl    r2, (r9)+
+L(1):  movl    (r8)+, r1
+       jlss    L(1n1)
+L(1p1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    $0, r11
+       movl    r10, (r9)+
+
+       sobgtr  r7, L(tp1)
+       movl    r11, r0
+       ret
+
+L(1n0):        emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    r6, r3
+       movl    r2, (r9)+
+       movl    (r8)+, r1
+       jgeq    L(1p1)
+L(1n1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    r6, r11
+       movl    r10, (r9)+
+
+       sobgtr  r7, L(tp1)
+       movl    r11, r0
+       ret
+
+L(v0_big):
+       jlbc    r4, L(2)
+
+C Loop for v0 >= 0x80000000
+L(tp2):        movl    (r8)+, r1
+       jlss    L(2n0)
+       emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    r1, r3
+       movl    r2, (r9)+
+L(2):  movl    (r8)+, r1
+       jlss    L(2n1)
+L(2p1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    r1, r11
+       movl    r10, (r9)+
+
+       sobgtr  r7, L(tp2)
+       movl    r11, r0
+       ret
+
+L(2n0):        emul    r1, r6, $0, r2
+       addl2   r1, r3
+       addl2   r11, r2
+       adwc    r6, r3
+       movl    r2, (r9)+
+       movl    (r8)+, r1
+       jgeq    L(2p1)
+L(2n1):        emul    r1, r6, $0, r10
+       addl2   r1, r11
+       addl2   r3, r10
+       adwc    r6, r11
+       movl    r10, (r9)+
+
+       sobgtr  r7, L(tp2)
+       movl    r11, r0
+       ret
+EPILOGUE()
diff --git a/mpn/vax/mul_1.s b/mpn/vax/mul_1.s

deleted file mode 100644 (file)

index c6f4594..0000000
--- a/mpn/vax/mul_1.s
+++ /dev/null
@@ -1,121 +0,0 @@
-# VAX __gmpn_mul_1 -- Multiply a limb vector with a limb and store
-# the result in a second limb vector.
-
-# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr      (sp + 4)
-# s1_ptr       (sp + 8)
-# size         (sp + 12)
-# s2_limb      (sp + 16)
-
-.text
-       .align 1
-.globl ___gmpn_mul_1
-___gmpn_mul_1:
-       .word   0xfc0
-       movl    12(ap),r4
-       movl    8(ap),r8
-       movl    4(ap),r9
-       movl    16(ap),r6
-       jlss    s2_big
-
-# One might want to combine the addl2 and the store below, but that
-# is actually just slower according to my timing tests.  (VAX 3600)
-
-       clrl    r3
-       incl    r4
-       ashl    $-1,r4,r7
-       jlbc    r4,L1
-       clrl    r11
-
-# Loop for S2_LIMB < 0x80000000
-Loop1: movl    (r8)+,r1
-       jlss    L1n0
-       emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    $0,r3
-       movl    r2,(r9)+
-L1:    movl    (r8)+,r1
-       jlss    L1n1
-L1p1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    $0,r11
-       movl    r10,(r9)+
-
-       sobgtr  r7,Loop1
-       movl    r11,r0
-       ret
-
-L1n0:  emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    r6,r3
-       movl    r2,(r9)+
-       movl    (r8)+,r1
-       jgeq    L1p1
-L1n1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    r6,r11
-       movl    r10,(r9)+
-
-       sobgtr  r7,Loop1
-       movl    r11,r0
-       ret
-
-
-s2_big:        clrl    r3
-       incl    r4
-       ashl    $-1,r4,r7
-       jlbc    r4,L2
-       clrl    r11
-
-# Loop for S2_LIMB >= 0x80000000
-Loop2: movl    (r8)+,r1
-       jlss    L2n0
-       emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    r1,r3
-       movl    r2,(r9)+
-L2:    movl    (r8)+,r1
-       jlss    L2n1
-L2p1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    r1,r11
-       movl    r10,(r9)+
-
-       sobgtr  r7,Loop2
-       movl    r11,r0
-       ret
-
-L2n0:  emul    r1,r6,$0,r2
-       addl2   r1,r3
-       addl2   r11,r2
-       adwc    r6,r3
-       movl    r2,(r9)+
-       movl    (r8)+,r1
-       jgeq    L2p1
-L2n1:  emul    r1,r6,$0,r10
-       addl2   r1,r11
-       addl2   r3,r10
-       adwc    r6,r11
-       movl    r10,(r9)+
-
-       sobgtr  r7,Loop2
-       movl    r11,r0
-       ret
diff --git a/mpn/vax/rshift.asm b/mpn/vax/rshift.asm

new file mode 100644 (file)

index 0000000..ffa8806
--- /dev/null
+++ b/mpn/vax/rshift.asm
@@ -0,0 +1,46 @@
+dnl  VAX mpn_rshift -- right shift.
+
+dnl  Copyright 1999, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+       .word   0x1c0
+       movl    4(ap), r7
+       movl    8(ap), r6
+       movl    12(ap), r1
+       movl    16(ap), r8
+
+       movl    (r6)+, r2
+       subl3   r8, $32, r8
+       ashl    r8, r2, r0
+       decl    r1
+       jeql    L(end)
+
+L(top):        movl    (r6)+, r3
+       ashq    r8, r2, r4
+       movl    r5, (r7)+
+       movl    r3, r2
+       sobgtr  r1, L(top)
+
+L(end):        clrl    r3
+       ashq    r8, r2, r4
+       movl    r5, (r7)
+       ret
+EPILOGUE()
diff --git a/mpn/vax/rshift.s b/mpn/vax/rshift.s

deleted file mode 100644 (file)

index ae27208..0000000
--- a/mpn/vax/rshift.s
+++ /dev/null
@@ -1,54 +0,0 @@
-# VAX mpn_rshift -- right shift.
-
-# Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# rptr         (sp + 4)
-# sptr         (sp + 8)
-# size         (sp + 12)
-# cnt          (sp + 16)
-# r0=retval r1=size r2,r3=itmp r4,r5=otmp      call-used registers
-# r6=sptr r7=rptr r8=cnt r9 r10 r11            call-saved registers
-
-.text
-       .align 1
-.globl ___gmpn_rshift
-___gmpn_rshift:
-       .word   0x1c0
-       movl    4(ap),r7
-       movl    8(ap),r6
-       movl    12(ap),r1
-       movl    16(ap),r8
-
-       movl    (r6)+,r2
-       subl3   r8,$32,r8
-       ashl    r8,r2,r0
-       decl    r1
-       jeql    Lend
-
-Loop:  movl    (r6)+,r3
-       ashq    r8,r2,r4
-       movl    r5,(r7)+
-       movl    r3,r2
-       sobgtr  r1,Loop
-
-Lend:  clrl    r3
-       ashq    r8,r2,r4
-       movl    r5,(r7)
-       ret
diff --git a/mpn/vax/sub_n.asm b/mpn/vax/sub_n.asm

new file mode 100644 (file)

index 0000000..4504497
--- /dev/null
+++ b/mpn/vax/sub_n.asm
@@ -0,0 +1,53 @@
+dnl  VAX mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+
+dnl  Copyright 1999, 2000, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+       .word   0x0
+       movl    16(ap), r0
+       movl    12(ap), r1
+       movl    8(ap), r2
+       movl    4(ap), r3
+       mnegl   r0, r5
+       addl2   $3, r0
+       ashl    $-2, r0, r0     C unroll loop count
+       bicl2   $-4, r5         C mask out low 2 bits
+       movaq   (r5)[r5], r5    C 9x
+       jmp     L(top)[r5]
+
+L(top):        movl    (r2)+, r4
+       sbwc    (r1)+, r4
+       movl    r4, (r3)+
+       movl    (r2)+, r4
+       sbwc    (r1)+, r4
+       movl    r4, (r3)+
+       movl    (r2)+, r4
+       sbwc    (r1)+, r4
+       movl    r4, (r3)+
+       movl    (r2)+, r4
+       sbwc    (r1)+, r4
+       movl    r4, (r3)+
+       sobgtr  r0, L(top)
+
+       adwc    r0, r0
+       ret
+EPILOGUE()
diff --git a/mpn/vax/sub_n.s b/mpn/vax/sub_n.s

deleted file mode 100644 (file)

index c9ad1ec..0000000
--- a/mpn/vax/sub_n.s
+++ /dev/null
@@ -1,59 +0,0 @@
-# VAX __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
-# difference in a third limb vector.
-
-# Copyright 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr      (sp + 4)
-# s1_ptr       (sp + 8)
-# s2_ptr       (sp + 12)
-# size         (sp + 16)
-
-.text
-       .align 1
-.globl ___gmpn_sub_n
-___gmpn_sub_n:
-       .word   0x0
-       movl    16(ap),r0
-       movl    12(ap),r1
-       movl    8(ap),r2
-       movl    4(ap),r3
-       mnegl   r0,r5
-       addl2   $3,r0
-       ashl    $-2,r0,r0       # unroll loop count
-       bicl2   $-4,r5          # mask out low 2 bits
-       movaq   (r5)[r5],r5     # 9x
-       jmp     Loop(r5)
-
-Loop:  movl    (r2)+,r4
-       sbwc    (r1)+,r4
-       movl    r4,(r3)+
-       movl    (r2)+,r4
-       sbwc    (r1)+,r4
-       movl    r4,(r3)+
-       movl    (r2)+,r4
-       sbwc    (r1)+,r4
-       movl    r4,(r3)+
-       movl    (r2)+,r4
-       sbwc    (r1)+,r4
-       movl    r4,(r3)+
-       sobgtr  r0,Loop
-
-       adwc    r0,r0
-       ret
diff --git a/mpn/vax/submul_1.asm b/mpn/vax/submul_1.asm

new file mode 100644 (file)

index 0000000..06277d1
--- /dev/null
+++ b/mpn/vax/submul_1.asm
@@ -0,0 +1,113 @@
+dnl  VAX mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
+dnl  result from a second limb vector.
+
+dnl  Copyright 1992, 1994, 1996, 2000, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+       .word   0xfc0
+       movl    12(ap), r4
+       movl    8(ap), r8
+       movl    4(ap), r9
+       clrl    r3
+       incl    r4
+       ashl    $-1, r4, r7
+       clrl    r11
+       movl    16(ap), r6
+       jlss    L(v0_big)
+       jlbc    r4, L(1)
+
+C Loop for v0 < 0x80000000
+L(tp1):        movl    (r8)+, r1
+       jlss    L(1n0)
+       emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    $0, r3
+       subl2   r2, (r9)+
+       adwc    $0, r3
+L(1):  movl    (r8)+, r1
+       jlss    L(1n1)
+L(1p1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    $0, r11
+       subl2   r10, (r9)+
+       adwc    $0, r11
+
+       sobgtr  r7, L(tp1)
+       movl    r11, r0
+       ret
+
+L(1n0):        emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    r6, r3
+       subl2   r2, (r9)+
+       adwc    $0, r3
+       movl    (r8)+, r1
+       jgeq    L(1p1)
+L(1n1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    r6, r11
+       subl2   r10, (r9)+
+       adwc    $0, r11
+
+       sobgtr  r7, L(tp1)
+       movl    r11, r0
+       ret
+
+L(v0_big):
+       jlbc    r4, L(2)
+
+C Loop for v0 >= 0x80000000
+L(tp2):        movl    (r8)+, r1
+       jlss    L(2n0)
+       emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    r1, r3
+       subl2   r2, (r9)+
+       adwc    $0, r3
+L(2):  movl    (r8)+, r1
+       jlss    L(2n1)
+L(2p1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    r1, r11
+       subl2   r10, (r9)+
+       adwc    $0, r11
+
+       sobgtr  r7, L(tp2)
+       movl    r11, r0
+       ret
+
+L(2n0):        emul    r1, r6, $0, r2
+       addl2   r11, r2
+       adwc    r6, r3
+       subl2   r2, (r9)+
+       adwc    r1, r3
+       movl    (r8)+, r1
+       jgeq    L(2p1)
+L(2n1):        emul    r1, r6, $0, r10
+       addl2   r3, r10
+       adwc    r6, r11
+       subl2   r10, (r9)+
+       adwc    r1, r11
+
+       sobgtr  r7, L(tp2)
+       movl    r11, r0
+       ret
+EPILOGUE()
diff --git a/mpn/vax/submul_1.s b/mpn/vax/submul_1.s

deleted file mode 100644 (file)

index ad0ddbb..0000000
--- a/mpn/vax/submul_1.s
+++ /dev/null
@@ -1,124 +0,0 @@
-# VAX __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
-# the result from a second limb vector.
-
-# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr      (sp + 4)
-# s1_ptr       (sp + 8)
-# size         (sp + 12)
-# s2_limb      (sp + 16)
-
-.text
-       .align 1
-.globl ___gmpn_submul_1
-___gmpn_submul_1:
-       .word   0xfc0
-       movl    12(ap),r4
-       movl    8(ap),r8
-       movl    4(ap),r9
-       movl    16(ap),r6
-       jlss    s2_big
-
-       clrl    r3
-       incl    r4
-       ashl    $-1,r4,r7
-       jlbc    r4,L1
-       clrl    r11
-
-# Loop for S2_LIMB < 0x80000000
-Loop1: movl    (r8)+,r1
-       jlss    L1n0
-       emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    $0,r3
-       subl2   r2,(r9)+
-       adwc    $0,r3
-L1:    movl    (r8)+,r1
-       jlss    L1n1
-L1p1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    $0,r11
-       subl2   r10,(r9)+
-       adwc    $0,r11
-
-       sobgtr  r7,Loop1
-       movl    r11,r0
-       ret
-
-L1n0:  emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    r6,r3
-       subl2   r2,(r9)+
-       adwc    $0,r3
-       movl    (r8)+,r1
-       jgeq    L1p1
-L1n1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    r6,r11
-       subl2   r10,(r9)+
-       adwc    $0,r11
-
-       sobgtr  r7,Loop1
-       movl    r11,r0
-       ret
-
-
-s2_big:        clrl    r3
-       incl    r4
-       ashl    $-1,r4,r7
-       jlbc    r4,L2
-       clrl    r11
-
-# Loop for S2_LIMB >= 0x80000000
-Loop2: movl    (r8)+,r1
-       jlss    L2n0
-       emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    r1,r3
-       subl2   r2,(r9)+
-       adwc    $0,r3
-L2:    movl    (r8)+,r1
-       jlss    L2n1
-L2p1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    r1,r11
-       subl2   r10,(r9)+
-       adwc    $0,r11
-
-       sobgtr  r7,Loop2
-       movl    r11,r0
-       ret
-
-L2n0:  emul    r1,r6,$0,r2
-       addl2   r11,r2
-       adwc    r6,r3
-       subl2   r2,(r9)+
-       adwc    r1,r3
-       movl    (r8)+,r1
-       jgeq    L2p1
-L2n1:  emul    r1,r6,$0,r10
-       addl2   r3,r10
-       adwc    r6,r11
-       subl2   r10,(r9)+
-       adwc    r1,r11
-
-       sobgtr  r7,Loop2
-       movl    r11,r0
-       ret
diff --git a/mpn/x86/aors_n.asm b/mpn/x86/aors_n.asm

index c8969995c83cc10c388f581064290805c3fc359c..a0412c9390cb78b0dc7b9edd62a36b040cfaab7a 100644 (file)
--- a/mpn/x86/aors_n.asm
+++ b/mpn/x86/aors_n.asm
@@ -22,11 +22,11 @@ include(`../config.m4')
  
  
  C     cycles/limb
-C P5:   3.375
-C P6:   3.125
-C K6:   3.5
-C K7:   2.25
-C P4:   8.75
+C P5   3.375
+C P6   3.125
+C K6   3.5
+C K7   2.25
+C P4   8.75
  
  
  ifdef(`OPERATION_add_n',`
@@ -99,7 +99,7 @@ L(0a):        leal    (%eax,%eax,8),%eax
         C possible to simplify.
         pushl   %ebp            FRAME_pushl()
         movl    PARAM_CARRY,%ebp
-       shrl    $1,%ebp                 C shift bit 0 into carry
+       shrl    %ebp                    C shift bit 0 into carry
         popl    %ebp            FRAME_popl()
  
         jmp     *%eax                   C jump into loop
@@ -148,7 +148,7 @@ L(0b):      leal    (%eax,%eax,8),%eax
  L(oopgo):
         pushl   %ebp            FRAME_pushl()
         movl    PARAM_CARRY,%ebp
-       shrl    $1,%ebp                 C shift bit 0 into carry
+       shrl    %ebp                    C shift bit 0 into carry
         popl    %ebp            FRAME_popl()
  
         ALIGN(16)
diff --git a/mpn/x86/aorsmul_1.asm b/mpn/x86/aorsmul_1.asm

index b4db4276572927f8ac813152b2d8a977cca1c71b..de49443d6cbdc3aeb5a24f434f58ab38650942c7 100644 (file)
--- a/mpn/x86/aorsmul_1.asm
+++ b/mpn/x86/aorsmul_1.asm
@@ -21,20 +21,21 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-
-C                           cycles/limb
-C P5:                           14.75
-C P6 model 0-8,10-12)            7.5
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)           6.75
-C P4 model 0  (Willamette)      24.0
-C P4 model 1  (?)               24.0
-C P4 model 2  (Northwood)       24.0
+C                          cycles/limb
+C P5                           14.75
+C P6 model 0-8,10-12            7.5
+C P6 model 9  (Banias)          6.7
+C P6 model 13 (Dothan)          6.75
+C P4 model 0  (Willamette)     24.0
+C P4 model 1  (?)              24.0
+C P4 model 2  (Northwood)      24.0
  C P4 model 3  (Prescott)
  C P4 model 4  (Nocona)
-C K6:                           12.5
-C K7:                            5.25
-C K8:
+C Intel Atom
+C AMD K6                       12.5
+C AMD K7                        5.25
+C AMD K8
+C AMD K10
  
  
  ifdef(`OPERATION_addmul_1',`
diff --git a/mpn/x86/atom/aorrlsh1_n.asm b/mpn/x86/atom/aorrlsh1_n.asm

new file mode 100644 (file)

index 0000000..596c3db
--- /dev/null
+++ b/mpn/x86/atom/aorrlsh1_n.asm
@@ -0,0 +1,42 @@
+dnl  Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 31)
+
+ifdef(`OPERATION_addlsh1_n', `
+       define(M4_inst,        adc)
+       define(M4_opp,         sub)
+       define(M4_function,    mpn_addlsh1_n)
+       define(M4_function_c,  mpn_addlsh1_nc)
+',`ifdef(`OPERATION_rsblsh1_n', `
+       define(M4_inst,        sbb)
+       define(M4_opp,         add)
+       define(M4_function,    mpn_rsblsh1_n)
+       define(M4_function_c,  mpn_rsblsh1_nc)
+',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+include_mpn(`x86/atom/aorrlshC_n.asm')
diff --git a/mpn/x86/atom/aorrlsh2_n.asm b/mpn/x86/atom/aorrlsh2_n.asm

new file mode 100644 (file)

index 0000000..01245fe
--- /dev/null
+++ b/mpn/x86/atom/aorrlsh2_n.asm
@@ -0,0 +1,42 @@
+dnl  Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+       define(M4_inst,        adcl)
+       define(M4_opp,         subl)
+       define(M4_function,    mpn_addlsh2_n)
+       define(M4_function_c,  mpn_addlsh2_nc)
+',`ifdef(`OPERATION_rsblsh2_n', `
+       define(M4_inst,        sbbl)
+       define(M4_opp,         addl)
+       define(M4_function,    mpn_rsblsh2_n)
+       define(M4_function_c,  mpn_rsblsh2_nc)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+
+include_mpn(`x86/atom/aorrlshC_n.asm')
diff --git a/mpn/x86/atom/aorrlshC_n.asm b/mpn/x86/atom/aorrlshC_n.asm

new file mode 100644 (file)

index 0000000..c24dcd8
--- /dev/null
+++ b/mpn/x86/atom/aorrlshC_n.asm
@@ -0,0 +1,145 @@
+dnl  Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_signed_limb_t carry);
+
+C                              cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    6
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CORB,   20)
+defframe(PARAM_SIZE,   16)
+defframe(PARAM_DBLD,   12)
+defframe(PARAM_SRC,     8)
+defframe(PARAM_DST,     4)
+
+dnl  re-use parameter space
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBP,`PARAM_DBLD')
+define(SAVE_VP,`PARAM_SRC')
+define(SAVE_UP,`PARAM_DST')
+
+define(M, eval(m4_lshift(1,LSH)))
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebx')
+
+ASM_START()
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(M4_function_c)
+deflit(`FRAME',0)
+       movl    PARAM_CORB, %eax
+       movl    %eax, %edx
+       shr     $LSH, %edx
+       andl    $1, %edx
+       M4_opp  %edx, %eax
+       jmp     L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+       xor     %eax, %eax
+       xor     %edx, %edx
+L(start_nc):
+       push    rp                      FRAME_pushl()
+
+       mov     PARAM_SIZE, %ecx        C size
+       mov     PARAM_DST, rp
+       mov     up, SAVE_UP
+       incl    %ecx                    C size + 1
+       mov     PARAM_SRC, up
+       mov     vp, SAVE_VP
+       shr     %ecx                    C (size+1)\2
+       mov     PARAM_DBLD, vp
+       mov     %ebp, SAVE_EBP
+       mov     %ecx, VAR_COUNT
+       jnc     L(entry)                C size odd
+
+       shr     %edx                    C size even
+       mov     (vp), %ecx
+       lea     4(vp), vp
+       lea     (%eax,%ecx,M), %edx
+       mov     %ecx, %eax
+       lea     -4(up), up
+       lea     -4(rp), rp
+       jmp     L(enteven)
+
+       ALIGN(16)
+L(oop):
+       lea     (%eax,%ecx,M), %ebp
+       shr     $RSH, %ecx
+       mov     4(vp), %eax
+       shr     %edx
+       lea     8(vp), vp
+       M4_inst (up), %ebp
+       lea     (%ecx,%eax,M), %edx
+       mov     %ebp, (rp)
+L(enteven):
+       M4_inst 4(up), %edx
+       lea     8(up), up
+       mov     %edx, 4(rp)
+       adc     %edx, %edx
+       shr     $RSH, %eax
+       lea     8(rp), rp
+L(entry):
+       mov     (vp), %ecx
+       decl    VAR_COUNT
+       jnz     L(oop)
+
+       lea     (%eax,%ecx,M), %ebp
+       shr     $RSH, %ecx
+       shr     %edx
+       mov     SAVE_VP, vp
+       M4_inst (up), %ebp
+       mov     %ecx, %eax
+       mov     SAVE_UP, up
+       M4_inst $0, %eax
+       mov     %ebp, (rp)
+       mov     SAVE_EBP, %ebp
+       pop     rp                      FRAME_popl()
+       ret
+EPILOGUE()
+
+ASM_END()
diff --git a/mpn/x86/atom/aors_n.asm b/mpn/x86/atom/aors_n.asm

new file mode 100644 (file)

index 0000000..64f982e
--- /dev/null
+++ b/mpn/x86/atom/aors_n.asm
@@ -0,0 +1,148 @@
+dnl  Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[].
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                          cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    3
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+ifdef(`OPERATION_add_n', `
+       define(M4_inst,        adcl)
+       define(M4_function_n,  mpn_add_n)
+       define(M4_function_nc, mpn_add_nc)
+       define(M4_description, add)
+',`ifdef(`OPERATION_sub_n', `
+       define(M4_inst,        sbbl)
+       define(M4_function_n,  mpn_sub_n)
+       define(M4_function_nc, mpn_sub_nc)
+       define(M4_description, subtract)
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                         mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                         mp_size_t size, mp_limb_t carry);
+C
+C Calculate src1,size M4_description src2,size, and store the result in
+C dst,size.  The return value is the carry bit from the top of the result (1
+C or 0).
+C
+C The _nc version accepts 1 or 0 for an initial carry into the low limb of
+C the calculation.  Note values other than 1 or 0 here will lead to garbage
+C results.
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_RP,`PARAM_SIZE')
+define(SAVE_VP,`PARAM_SRC1')
+define(SAVE_UP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebx')
+define(`cy',  `%ecx')
+define(`r1',  `%ecx')
+define(`r2',  `%edx')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function_n)
+       xor     cy, cy                  C carry
+L(start):
+       mov     PARAM_SIZE, %eax        C size
+       mov     rp, SAVE_RP
+       mov     PARAM_DST, rp
+       mov     up, SAVE_UP
+       mov     PARAM_SRC1, up
+       shr     %eax                    C size >> 1
+       mov     vp, SAVE_VP
+       mov     PARAM_SRC2, vp
+       jz      L(one)                  C size == 1
+       jc      L(three)                C size % 2 == 1
+
+       shr     cy
+       mov     (up), r2
+       lea     4(up), up
+       lea     4(vp), vp
+       lea     -4(rp), rp
+       jmp     L(entry)
+L(one):
+       shr     cy
+       mov     (up), r1
+       jmp     L(end)
+L(three):
+       shr     cy
+       mov     (up), r1
+
+       ALIGN(16)
+L(oop):
+       M4_inst (vp), r1
+       lea     8(up), up
+       mov     -4(up), r2
+       lea     8(vp), vp
+       mov     r1, (rp)
+L(entry):
+       M4_inst -4(vp), r2
+       lea     8(rp), rp
+       dec     %eax
+       mov     (up), r1
+       mov     r2, -4(rp)
+       jnz     L(oop)
+
+L(end):                                        C %eax is zero here
+       mov     SAVE_UP, up
+       M4_inst (vp), r1
+       mov     SAVE_VP, vp
+       mov     r1, (rp)
+       adc     %eax, %eax
+       mov     SAVE_RP, rp
+       ret
+EPILOGUE()
+
+PROLOGUE(M4_function_nc)
+       mov     PARAM_CARRY, cy         C carry
+       jmp     L(start)
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/atom/aorslshC_n.asm b/mpn/x86/atom/aorslshC_n.asm

new file mode 100644 (file)

index 0000000..5d6ac98
--- /dev/null
+++ b/mpn/x86/atom/aorslshC_n.asm
@@ -0,0 +1,236 @@
+dnl  Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C)
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                              mp_limb_t carry);
+C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,);
+C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                              mp_signed_limb_t borrow);
+
+defframe(PARAM_CORB,   16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,     8)
+defframe(PARAM_DST,     4)
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size,);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size,);
+C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t borrow);
+
+C if src1 == dst, _ip1 is used
+
+C                                      cycles/limb
+C                              dst!=src1,src2  dst==src1
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    7               6
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(GPARAM_CORB,  20)
+defframe(GPARAM_SIZE,  16)
+defframe(GPARAM_SRC2,  12)
+
+dnl  re-use parameter space
+define(SAVE_EBP,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_UP,`PARAM_DST')
+
+define(M, eval(m4_lshift(1,LSH)))
+define(`rp',  `%edi')
+define(`up',  `%esi')
+
+ASM_START()
+       TEXT
+       ALIGN(8)
+
+PROLOGUE(M4_ip_function_c)
+deflit(`FRAME',0)
+       movl    PARAM_CORB, %ecx
+       movl    %ecx, %edx
+       shr     $LSH, %edx
+       andl    $1, %edx
+       M4_opp  %edx, %ecx
+       jmp     L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_ip_function)
+deflit(`FRAME',0)
+
+       xor     %ecx, %ecx
+       xor     %edx, %edx
+L(start_nc):
+       push    rp                      FRAME_pushl()
+       mov     PARAM_DST, rp
+       mov     up, SAVE_UP
+       mov     PARAM_SRC, up
+       mov     %ebx, SAVE_EBX
+       mov     PARAM_SIZE, %ebx        C size
+L(inplace):
+       incl    %ebx                    C size + 1
+       shr     %ebx                    C (size+1)\2
+       mov     %ebp, SAVE_EBP
+       jnc     L(entry)                C size odd
+
+       add     %edx, %edx              C size even
+       mov     %ecx, %ebp
+       mov     (up), %ecx
+       lea     -4(rp), rp
+       lea     (%ebp,%ecx,M), %eax
+       lea     4(up), up
+       jmp     L(enteven)
+
+       ALIGN(16)
+L(oop):
+       lea     (%ecx,%eax,M), %ebp
+       shr     $RSH, %eax
+       mov     4(up), %ecx
+       add     %edx, %edx
+       lea     8(up), up
+       M4_inst %ebp, (rp)
+       lea     (%eax,%ecx,M), %eax
+
+L(enteven):
+       M4_inst %eax, 4(rp)
+       lea     8(rp), rp
+
+       sbb     %edx, %edx
+       shr     $RSH, %ecx
+
+L(entry):
+       mov     (up), %eax
+       decl    %ebx
+       jnz     L(oop)
+
+       lea     (%ecx,%eax,M), %ebp
+       shr     $RSH, %eax
+       shr     %edx
+       M4_inst %ebp, (rp)
+       mov     SAVE_UP, up
+       adc     $0, %eax
+       mov     SAVE_EBP, %ebp
+       mov     SAVE_EBX, %ebx
+       pop     rp                      FRAME_popl()
+       ret
+EPILOGUE()
+
+PROLOGUE(M4_function_c)
+deflit(`FRAME',0)
+       movl    GPARAM_CORB, %ecx
+       movl    %ecx, %edx
+       shr     $LSH, %edx
+       andl    $1, %edx
+       M4_opp  %edx, %ecx
+       jmp     L(generic_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+       xor     %ecx, %ecx
+       xor     %edx, %edx
+L(generic_nc):
+       push    rp                      FRAME_pushl()
+       mov     PARAM_DST, rp
+       mov     up, SAVE_UP
+       mov     PARAM_SRC, up
+       cmp     rp, up
+       mov     %ebx, SAVE_EBX
+       jne     L(general)
+       mov     GPARAM_SIZE, %ebx       C size
+       mov     GPARAM_SRC2, up
+       jmp     L(inplace)
+
+L(general):
+       mov     GPARAM_SIZE, %eax       C size
+       mov     %ebx, SAVE_EBX
+       incl    %eax                    C size + 1
+       mov     up, %ebx                C vp
+       mov     GPARAM_SRC2, up         C up
+       shr     %eax                    C (size+1)\2
+       mov     %ebp, SAVE_EBP
+       mov     %eax, GPARAM_SIZE
+       jnc     L(entry2)               C size odd
+
+       add     %edx, %edx              C size even
+       mov     %ecx, %ebp
+       mov     (up), %ecx
+       lea     -4(rp), rp
+       lea     -4(%ebx), %ebx
+       lea     (%ebp,%ecx,M), %eax
+       lea     4(up), up
+       jmp     L(enteven2)
+
+       ALIGN(16)
+L(oop2):
+       lea     (%ecx,%eax,M), %ebp
+       shr     $RSH, %eax
+       mov     4(up), %ecx
+       add     %edx, %edx
+       lea     8(up), up
+       mov     (%ebx), %edx
+       M4_inst %ebp, %edx
+       lea     (%eax,%ecx,M), %eax
+       mov     %edx, (rp)
+L(enteven2):
+       mov     4(%ebx), %edx
+       lea     8(%ebx), %ebx
+       M4_inst %eax, %edx
+       mov     %edx, 4(rp)
+       sbb     %edx, %edx
+       shr     $RSH, %ecx
+       lea     8(rp), rp
+L(entry2):
+       mov     (up), %eax
+       decl    GPARAM_SIZE
+       jnz     L(oop2)
+
+       lea     (%ecx,%eax,M), %ebp
+       shr     $RSH, %eax
+       shr     %edx
+       mov     (%ebx), %edx
+       M4_inst %ebp, %edx
+       mov     %edx, (rp)
+       mov     SAVE_UP, up
+       adc     $0, %eax
+       mov     SAVE_EBP, %ebp
+       mov     SAVE_EBX, %ebx
+       pop     rp                      FRAME_popl()
+       ret
+EPILOGUE()
+
+ASM_END()
diff --git a/mpn/x86/atom/bdiv_q_1.asm b/mpn/x86/atom/bdiv_q_1.asm

new file mode 100644 (file)

index 0000000..19e7a4e
--- /dev/null
+++ b/mpn/x86/atom/bdiv_q_1.asm
@@ -0,0 +1,24 @@
+dnl  Intel Atom mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel
+dnl  division by 1-limb divisor, returning quotient only.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+include_mpn(`x86/pentium/bdiv_q_1.asm')
diff --git a/mpn/x86/atom/dive_1.asm b/mpn/x86/atom/dive_1.asm

new file mode 100644 (file)

index 0000000..c7c5a89
--- /dev/null
+++ b/mpn/x86/atom/dive_1.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_divexact_1)
+include_mpn(`x86/pentium/dive_1.asm')
diff --git a/mpn/x86/atom/gmp-mparam.h b/mpn/x86/atom/gmp-mparam.h

index daadd414800c55f4242a3fe5a693d8af9ba962e4..61ce540dcbb92d6ebd32f2d328a82c2b51be6218 100644 (file)
--- a/mpn/x86/atom/gmp-mparam.h
+++ b/mpn/x86/atom/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* Intel Atom/32 gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,157 +23,131 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /* Generated by tuneup.c */
  
-#define MOD_1_NORM_THRESHOLD                 3
-#define MOD_1_UNNORM_THRESHOLD               9
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         13
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        16
+#define MOD_1_NORM_THRESHOLD                 4
+#define MOD_1_UNNORM_THRESHOLD               8
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
  #define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     31
-#define USE_PREINV_DIVREM_1                  1
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD          102
+#define BMOD_1_TO_MOD_1_THRESHOLD           33
  
-#define MUL_TOOM22_THRESHOLD                16
-#define MUL_TOOM33_THRESHOLD                66
-#define MUL_TOOM44_THRESHOLD               171
-#define MUL_TOOM6H_THRESHOLD               258
-#define MUL_TOOM8H_THRESHOLD               357
+#define MUL_TOOM22_THRESHOLD                22
+#define MUL_TOOM33_THRESHOLD                81
+#define MUL_TOOM44_THRESHOLD               178
+#define MUL_TOOM6H_THRESHOLD               270
+#define MUL_TOOM8H_THRESHOLD               406
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     113
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     129
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     115
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      85
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     126
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     121
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD     129
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     113
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 16
-#define SQR_TOOM3_THRESHOLD                113
-#define SQR_TOOM4_THRESHOLD                193
-#define SQR_TOOM6_THRESHOLD                254
-#define SQR_TOOM8_THRESHOLD                381
+#define SQR_TOOM2_THRESHOLD                 32
+#define SQR_TOOM3_THRESHOLD                109
+#define SQR_TOOM4_THRESHOLD                262
+#define SQR_TOOM6_THRESHOLD                396
+#define SQR_TOOM8_THRESHOLD                547
  
-#define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               11
+#define MULMID_TOOM42_THRESHOLD             54
  
-#define MUL_FFT_MODF_THRESHOLD             332  /* k = 5 */
+#define MULMOD_BNM1_THRESHOLD               16
+#define SQRMOD_BNM1_THRESHOLD               18
+
+#define MUL_FFT_MODF_THRESHOLD             404  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    332, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     11, 5}, {     23, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     11, 6}, {     25, 7}, {     15, 6}, \
-    {     31, 7}, {     19, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     35, 7}, {     71, 8}, {     39, 9}, {     23, 8}, \
+  { {    376, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     21, 7}, {     11, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
      {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     55,10}, {     31, 9}, \
-    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
-    {     63, 9}, {    127, 8}, {    255, 9}, {    135,10}, \
-    {     79, 9}, {    159,10}, {     95, 9}, {    191, 8}, \
-    {    383,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
-    {    607,10}, {    159, 9}, {    319,11}, {     95,10}, \
-    {    191, 9}, {    383,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
-    {   1087,10}, {    287, 9}, {    607,11}, {    159,10}, \
-    {    351, 9}, {    703, 8}, {   1407,11}, {    191,10}, \
-    {    415, 9}, {    831,11}, {    223,10}, {    479, 9}, \
-    {    959,12}, {    127,11}, {    255,10}, {    543, 9}, \
-    {   1087,11}, {    287,10}, {    607, 9}, {   1215,11}, \
-    {    351,10}, {    703, 9}, {   1407,12}, {    191,11}, \
-    {    383,10}, {    767,11}, {    415,10}, {    831,11}, \
-    {    479,10}, {    959,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    703,10}, {   1407,11}, {    735,10}, \
-    {   1471,12}, {    383,11}, {    831,12}, {    447,11}, \
-    {    959,10}, {   1919,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,10}, {   2431,12}, \
-    {    703,11}, {   1471,13}, {    383,12}, {    959,11}, \
-    {   1919,14}, {    255,13}, {    511,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,11}, {   2943,10}, \
-    {   5887,13}, {    767,12}, {   1599,13}, {    895,12}, \
-    {   1919,11}, {   3839,14}, {    511,13}, {   1023,12}, \
-    {   2111,13}, {   1151,12}, {   2431,13}, {   1407,12}, \
-    {   2943,11}, {   5887,14}, {    767,13}, {   1919,12}, \
-    {   3839,15}, {    511,14}, {   1023,13}, {   2431,14}, \
-    {   1279,13}, {   2943,12}, {   5887,14}, {   1535,13}, \
-    {   3199,14}, {   1791,13}, {   3839,12}, {   7679,15}, \
-    {   1023,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 163
-#define MUL_FFT_THRESHOLD                 3456
-
-#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
+    {     39, 8}, {     79, 9}, {     47, 8}, {     95,10}, \
+    {     31, 9}, {     79,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255, 9}, \
+    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    511, 9}, {    271,10}, {    143, 9}, \
+    {    287, 8}, {    575, 9}, {    303,10}, {    159, 9}, \
+    {    319,11}, {     95,10}, {    191, 9}, {    383,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271, 9}, {    543,10}, {    287, 9}, {    575,10}, \
+    {    303,11}, {    159,10}, {    319, 9}, {    639,10}, \
+    {    335, 9}, {    671,10}, {    351, 9}, {    703,11}, \
+    {    191,10}, {    383, 9}, {    767,10}, {    415,11}, \
+    {    223,10}, {    447,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 86
+#define MUL_FFT_THRESHOLD                 4544
+
+#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    308, 5}, {     13, 6}, {      7, 5}, {     17, 6}, \
-    {      9, 5}, {     19, 6}, {     13, 7}, {      7, 6}, \
-    {     17, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
-    {     24, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     15, 7}, {     31, 8}, \
+  { {    280, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     17, 7}, {      9, 6}, {     21, 7}, {     11, 6}, \
+    {     24, 7}, {     13, 6}, {     27, 7}, {     21, 8}, \
+    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
      {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     31, 4}, {    607, 5}, \
-    {    319, 7}, {     95, 8}, {     55, 9}, {     31, 8}, \
-    {     63, 9}, {     39, 8}, {     79, 9}, {     47,10}, \
-    {     31, 9}, {     79,10}, {     47,11}, {     31,10}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
+    {     47,10}, {     15, 9}, {     31, 8}, {     63, 9}, \
+    {     39, 8}, {     79, 9}, {     47,10}, {     31, 9}, \
+    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
      {     63, 9}, {    127, 8}, {    255, 7}, {    511,10}, \
-    {     79, 9}, {    159, 8}, {    319,10}, {     95, 9}, \
-    {    191, 8}, {    383,11}, {     63,10}, {    127, 9}, \
-    {    255, 8}, {    543, 7}, {   1087, 9}, {    287, 8}, \
-    {    607,10}, {    159, 9}, {    319,11}, {     95,10}, \
-    {    191, 9}, {    383,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    543, 8}, {   1087,10}, {    287, 9}, \
-    {    607,11}, {    159,10}, {    351, 9}, {    703, 8}, \
-    {   1407, 9}, {    735,11}, {    191,10}, {    415, 9}, \
-    {    831,11}, {    223,10}, {    479, 9}, {    959, 8}, \
-    {   1919,12}, {    127,11}, {    255,10}, {    543, 9}, \
-    {   1087,11}, {    287,10}, {    607, 9}, {   1215,11}, \
-    {    351,10}, {    703, 9}, {   1407,12}, {    191,11}, \
-    {    415,10}, {    831,11}, {    479,10}, {    959, 9}, \
-    {   1919,13}, {    127,12}, {    255,11}, {    543,10}, \
-    {   1087,11}, {    607,10}, {   1215,12}, {    319,11}, \
-    {    703,10}, {   1407,11}, {    735,12}, {    383,11}, \
-    {    831,12}, {    447,11}, {    959,10}, {   1919, 9}, \
-    {   3839,13}, {    255,12}, {    511,11}, {   1087,12}, \
-    {    575,11}, {   1215,10}, {   2431,12}, {    703,11}, \
-    {   1407,13}, {    383,12}, {    959,11}, {   1919,10}, \
-    {   3839,14}, {    255,13}, {    511,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,11}, {   2943,13}, \
-    {    767,12}, {   1599,13}, {    895,12}, {   1919,11}, \
-    {   3839,14}, {    511,13}, {   1151,12}, {   2431,13}, \
-    {   1407,12}, {   2943,14}, {    767,13}, {   1919,12}, \
-    {   3839,15}, {    511,14}, {   1023,13}, {   2431,14}, \
-    {   1279,13}, {   2943,14}, {   1791,13}, {   3839,15}, \
-    {   1023,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 155
-#define SQR_FFT_THRESHOLD                 2368
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  56
-#define MULLO_MUL_N_THRESHOLD             5240
-
-#define DC_DIV_QR_THRESHOLD                 59
-#define DC_DIVAPPR_Q_THRESHOLD             216
-#define DC_BDIV_QR_THRESHOLD                56
-#define DC_BDIV_Q_THRESHOLD                136
-
-#define INV_MULMOD_BNM1_THRESHOLD           30
-#define INV_NEWTON_THRESHOLD               260
-#define INV_APPR_THRESHOLD                 244
-
-#define BINV_NEWTON_THRESHOLD              266
-#define REDC_1_TO_REDC_N_THRESHOLD          62
-
-#define MU_DIV_QR_THRESHOLD               1308
-#define MU_DIVAPPR_Q_THRESHOLD            1334
-#define MUPI_DIV_QR_THRESHOLD              130
-#define MU_BDIV_QR_THRESHOLD              1017
-#define MU_BDIV_Q_THRESHOLD               1308
+    {     79, 9}, {    159, 8}, {    319, 9}, {    175,10}, \
+    {     95, 9}, {    191, 8}, {    383, 9}, {    207,11}, \
+    {     63,10}, {    127, 9}, {    255, 8}, {    511, 9}, \
+    {    271,10}, {    143, 9}, {    287,10}, {    159, 9}, \
+    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    207,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543,10}, \
+    {    287,11}, {    159,10}, {    319, 9}, {    639,10}, \
+    {    351, 9}, {    703,11}, {    191,10}, {    415,11}, \
+    {    223,10}, {    479,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 82
+#define SQR_FFT_THRESHOLD                 3712
+
+#define MULLO_BASECASE_THRESHOLD             6
+#define MULLO_DC_THRESHOLD                  53
+#define MULLO_MUL_N_THRESHOLD             8907
+
+#define DC_DIV_QR_THRESHOLD                 63
+#define DC_DIVAPPR_Q_THRESHOLD             266
+#define DC_BDIV_QR_THRESHOLD                63
+#define DC_BDIV_Q_THRESHOLD                175
+
+#define INV_MULMOD_BNM1_THRESHOLD           42
+#define INV_NEWTON_THRESHOLD               250
+#define INV_APPR_THRESHOLD                 250
+
+#define BINV_NEWTON_THRESHOLD              274
+#define REDC_1_TO_REDC_N_THRESHOLD          68
+
+#define MU_DIV_QR_THRESHOLD               1334
+#define MU_DIVAPPR_Q_THRESHOLD            1442
+#define MUPI_DIV_QR_THRESHOLD              114
+#define MU_BDIV_QR_THRESHOLD              1078
+#define MU_BDIV_Q_THRESHOLD               1334
+
+#define POWM_SEC_TABLE  4,35,258,1084
  
  #define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     111
-#define GCD_DC_THRESHOLD                   606
-#define GCDEXT_DC_THRESHOLD                273
+#define HGCD_THRESHOLD                     135
+#define HGCD_APPR_THRESHOLD                164
+#define HGCD_REDUCE_THRESHOLD             2384
+#define GCD_DC_THRESHOLD                   487
+#define GCDEXT_DC_THRESHOLD                342
  #define JACOBI_BASE_METHOD                   3
  
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        26
-#define SET_STR_DC_THRESHOLD               270
-#define SET_STR_PRECOMPUTE_THRESHOLD       860
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        27
+#define SET_STR_DC_THRESHOLD               324
+#define SET_STR_PRECOMPUTE_THRESHOLD      1290
+
+#define FAC_DSC_THRESHOLD                  250
+#define FAC_ODD_THRESHOLD                   34
diff --git a/mpn/x86/atom/logops_n.asm b/mpn/x86/atom/logops_n.asm

new file mode 100644 (file)

index 0000000..2633639
--- /dev/null
+++ b/mpn/x86/atom/logops_n.asm
@@ -0,0 +1,140 @@
+dnl  Intel Atom mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                                 cycles/limb
+C                              op      nop     opn
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    3       3.5     3.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+define(M4_choose_op,
+`ifdef(`OPERATION_$1',`
+define(`M4_function', `mpn_$1')
+define(`M4_want_pre', `$4')
+define(`M4_inst',     `$3')
+define(`M4_want_post',`$2')
+')')
+define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
+define(M4post,`ifelse(M4_want_post,yes,`$1')')
+
+M4_choose_op( and_n,     , andl,    )
+M4_choose_op( andn_n,    , andl, yes)
+M4_choose_op( nand_n, yes, andl,    )
+M4_choose_op( ior_n,     ,  orl,    )
+M4_choose_op( iorn_n,    ,  orl, yes)
+M4_choose_op( nior_n, yes,  orl,    )
+M4_choose_op( xor_n,     , xorl,    )
+M4_choose_op( xnor_n, yes, xorl,    )
+
+ifdef(`M4_function',,
+`m4_error(`Unrecognised or undefined OPERATION symbol
+')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+C void M4_function (mp_ptr dst, mp_srcptr src2, mp_srcptr src1, mp_size_t size);
+C
+
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC1, 12)
+defframe(PARAM_SRC2, 8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_RP,`PARAM_SIZE')
+define(SAVE_VP,`PARAM_SRC1')
+define(SAVE_UP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebx')
+define(`cnt', `%eax')
+define(`r1',  `%ecx')
+define(`r2',  `%edx')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function)
+       mov     PARAM_SIZE, cnt         C size
+       mov     rp, SAVE_RP
+       mov     PARAM_DST, rp
+       mov     up, SAVE_UP
+       mov     PARAM_SRC1, up
+       shr     cnt                     C size >> 1
+       mov     vp, SAVE_VP
+       mov     PARAM_SRC2, vp
+       mov     (up), r1
+       jz      L(end)                  C size == 1
+       jnc     L(even)                 C size % 2 == 0
+
+       ALIGN(16)
+L(oop):
+M4pre(`        notl_or_xorl_GMP_NUMB_MASK(r1)')
+       M4_inst (vp), r1
+       lea     8(up), up
+       mov     -4(up), r2
+M4post(`       notl_or_xorl_GMP_NUMB_MASK(r1)')
+       lea     8(vp), vp
+       mov     r1, (rp)
+L(entry):
+M4pre(`        notl_or_xorl_GMP_NUMB_MASK(r2)')
+       M4_inst -4(vp), r2
+       lea     8(rp), rp
+M4post(`       notl_or_xorl_GMP_NUMB_MASK(r2)')
+       dec     cnt
+       mov     (up), r1
+       mov     r2, -4(rp)
+       jnz     L(oop)
+
+L(end):
+M4pre(`        notl_or_xorl_GMP_NUMB_MASK(r1)')
+       mov     SAVE_UP, up
+       M4_inst (vp), r1
+M4post(`notl_or_xorl_GMP_NUMB_MASK(r1)')
+       mov     SAVE_VP, vp
+       mov     r1, (rp)
+       mov     SAVE_RP, rp
+       ret
+
+L(even):
+       mov     r1, r2
+       lea     4(up), up
+       lea     4(vp), vp
+       lea     -4(rp), rp
+       jmp     L(entry)
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/atom/lshift.asm b/mpn/x86/atom/lshift.asm

new file mode 100644 (file)

index 0000000..1005cce
--- /dev/null
+++ b/mpn/x86/atom/lshift.asm
@@ -0,0 +1,207 @@
+dnl  Intel Atom mpn_lshift -- mpn left shift.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      unsigned cnt);
+
+C                                cycles/limb
+C                              cnt!=1  cnt==1
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    5       2.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`cnt',  `%ecx')
+
+ASM_START()
+       TEXT
+       ALIGN(8)
+deflit(`FRAME',0)
+PROLOGUE(mpn_lshift)
+       mov     PARAM_CNT, cnt
+       mov     PARAM_SIZE, %edx
+       mov     up, SAVE_UP
+       mov     PARAM_SRC, up
+       push    rp                      FRAME_pushl()
+       mov     PARAM_DST, rp
+
+C We can use faster code for shift-by-1 under certain conditions.
+       cmp     $1,cnt
+       jne     L(normal)
+       cmpl    rp, up
+       jnc     L(special)              C jump if s_ptr + 1 >= res_ptr
+       leal    (up,%edx,4),%eax
+       cmpl    %eax,rp
+       jnc     L(special)              C jump if res_ptr >= s_ptr + size
+
+L(normal):
+       lea     -4(up,%edx,4), up
+       mov     %ebx, SAVE_EBX
+       lea     -4(rp,%edx,4), rp
+
+       shr     %edx
+       mov     (up), %eax
+       mov     %edx, VAR_COUNT
+       jnc     L(evn)
+
+       mov     %eax, %ebx
+       shl     %cl, %ebx
+       neg     cnt
+       shr     %cl, %eax
+       test    %edx, %edx
+       jnz     L(gt1)
+       mov     %ebx, (rp)
+       jmp     L(quit)
+
+L(gt1):        mov     %ebp, SAVE_EBP
+       push    %eax
+       mov     -4(up), %eax
+       mov     %eax, %ebp
+       shr     %cl, %eax
+       jmp     L(lo1)
+
+L(evn):        mov     %ebp, SAVE_EBP
+       neg     cnt
+       mov     %eax, %ebp
+       mov     -4(up), %edx
+       shr     %cl, %eax
+       mov     %edx, %ebx
+       shr     %cl, %edx
+       neg     cnt
+       decl    VAR_COUNT
+       lea     4(rp), rp
+       lea     -4(up), up
+       jz      L(end)
+       push    %eax                    FRAME_pushl()
+
+       ALIGN(8)
+L(top):        shl     %cl, %ebp
+       or      %ebp, %edx
+       shl     %cl, %ebx
+       neg     cnt
+       mov     -4(up), %eax
+       mov     %eax, %ebp
+       mov     %edx, -4(rp)
+       shr     %cl, %eax
+       lea     -8(rp), rp
+L(lo1):        mov     -8(up), %edx
+       or      %ebx, %eax
+       mov     %edx, %ebx
+       shr     %cl, %edx
+       lea     -8(up), up
+       neg     cnt
+       mov     %eax, (rp)
+       decl    VAR_COUNT
+       jg      L(top)
+
+       pop     %eax                    FRAME_popl()
+L(end):
+       shl     %cl, %ebp
+       shl     %cl, %ebx
+       or      %ebp, %edx
+       mov     SAVE_EBP, %ebp
+       mov     %edx, -4(rp)
+       mov     %ebx, -8(rp)
+
+L(quit):
+       mov     SAVE_UP, up
+       mov     SAVE_EBX, %ebx
+       pop     rp                      FRAME_popl()
+       ret
+
+L(special):
+deflit(`FRAME',4)
+       lea     3(%edx), %eax           C size + 3
+       dec     %edx                    C size - 1
+       mov     (up), %ecx
+       shr     $2, %eax                C (size + 3) / 4
+       and     $3, %edx                C (size - 1) % 4
+       jz      L(goloop)               C jmp if  size == 1 (mod 4)
+       shr     %edx
+       jnc     L(odd)                  C jum if  size == 3 (mod 4)
+
+       add     %ecx, %ecx
+       lea     4(up), up
+       mov     %ecx, (rp)
+       mov     (up), %ecx
+       lea     4(rp), rp
+
+       dec     %edx
+       jnz     L(goloop)               C jump if  size == 0 (mod 4)
+L(odd):        lea     -8(up), up
+       lea     -8(rp), rp
+       jmp     L(sentry)               C reached if size == 2 or 3 (mod 4)
+
+L(sloop):
+       adc     %ecx, %ecx
+       mov     4(up), %edx
+       mov     %ecx, (rp)
+       adc     %edx, %edx
+       mov     8(up), %ecx
+       mov     %edx, 4(rp)
+L(sentry):
+       adc     %ecx, %ecx
+       mov     12(up), %edx
+       mov     %ecx, 8(rp)
+       adc     %edx, %edx
+       lea     16(up), up
+       mov     %edx, 12(rp)
+       lea     16(rp), rp
+       mov     (up), %ecx
+L(goloop):
+       decl    %eax
+       jnz     L(sloop)
+
+L(squit):
+       adc     %ecx, %ecx
+       mov     %ecx, (rp)
+       adc     %eax, %eax
+
+       mov     SAVE_UP, up
+       pop     rp                      FRAME_popl()
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/atom/lshiftc.asm b/mpn/x86/atom/lshiftc.asm

new file mode 100644 (file)

index 0000000..f26e271
--- /dev/null
+++ b/mpn/x86/atom/lshiftc.asm
@@ -0,0 +1,148 @@
+dnl  Intel Atom mpn_lshiftc -- mpn left shift with complement.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                       unsigned cnt);
+
+C                              cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    5.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`cnt',  `%ecx')
+
+ASM_START()
+       TEXT
+
+PROLOGUE(mpn_lshiftc)
+deflit(`FRAME',0)
+       mov     PARAM_CNT, cnt
+       mov     PARAM_SIZE, %edx
+       mov     up, SAVE_UP
+       mov     PARAM_SRC, up
+       push    rp                      FRAME_pushl()
+       mov     PARAM_DST, rp
+
+       lea     -4(up,%edx,4), up
+       mov     %ebx, SAVE_EBX
+       lea     -4(rp,%edx,4), rp
+
+       shr     %edx
+       mov     (up), %eax
+       mov     %edx, VAR_COUNT
+       jnc     L(evn)
+
+       mov     %eax, %ebx
+       shl     %cl, %ebx
+       neg     cnt
+       shr     %cl, %eax
+       test    %edx, %edx
+       jnz     L(gt1)
+       not     %ebx
+       mov     %ebx, (rp)
+       jmp     L(quit)
+
+L(gt1):        mov     %ebp, SAVE_EBP
+       push    %eax
+       mov     -4(up), %eax
+       mov     %eax, %ebp
+       shr     %cl, %eax
+       jmp     L(lo1)
+
+L(evn):        mov     %ebp, SAVE_EBP
+       neg     cnt
+       mov     %eax, %ebp
+       mov     -4(up), %edx
+       shr     %cl, %eax
+       mov     %edx, %ebx
+       shr     %cl, %edx
+       neg     cnt
+       decl    VAR_COUNT
+       lea     4(rp), rp
+       lea     -4(up), up
+       jz      L(end)
+       push    %eax                    FRAME_pushl()
+
+L(top):        shl     %cl, %ebp
+       or      %ebp, %edx
+       shl     %cl, %ebx
+       neg     cnt
+       not     %edx
+       mov     -4(up), %eax
+       mov     %eax, %ebp
+       mov     %edx, -4(rp)
+       shr     %cl, %eax
+       lea     -8(rp), rp
+L(lo1):        mov     -8(up), %edx
+       or      %ebx, %eax
+       mov     %edx, %ebx
+       shr     %cl, %edx
+       not     %eax
+       lea     -8(up), up
+       neg     cnt
+       mov     %eax, (rp)
+       decl    VAR_COUNT
+       jg      L(top)
+
+       pop     %eax                    FRAME_popl()
+L(end):
+       shl     %cl, %ebp
+       shl     %cl, %ebx
+       or      %ebp, %edx
+       mov     SAVE_EBP, %ebp
+       not     %edx
+       not     %ebx
+       mov     %edx, -4(rp)
+       mov     %ebx, -8(rp)
+
+L(quit):
+       mov     SAVE_UP, up
+       mov     SAVE_EBX, %ebx
+       pop     rp                      FRAME_popl()
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/atom/mmx/copyd.asm b/mpn/x86/atom/mmx/copyd.asm

new file mode 100644 (file)

index 0000000..0c46e5b
--- /dev/null
+++ b/mpn/x86/atom/mmx/copyd.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_copyd)
+include_mpn(`x86/k7/mmx/copyd.asm')
diff --git a/mpn/x86/atom/mmx/copyi.asm b/mpn/x86/atom/mmx/copyi.asm

new file mode 100644 (file)

index 0000000..855ab81
--- /dev/null
+++ b/mpn/x86/atom/mmx/copyi.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom mpn_copyi -- copy limb vector, incrementing.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_copyi)
+include_mpn(`x86/k7/mmx/copyi.asm')
diff --git a/mpn/x86/atom/mmx/hamdist.asm b/mpn/x86/atom/mmx/hamdist.asm

new file mode 100644 (file)

index 0000000..d1d96db
--- /dev/null
+++ b/mpn/x86/atom/mmx/hamdist.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom mpn_hamdist -- hamming distance.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86/k7/mmx/popham.asm')
diff --git a/mpn/x86/atom/mod_34lsub1.asm b/mpn/x86/atom/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..cc807ed
--- /dev/null
+++ b/mpn/x86/atom/mod_34lsub1.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_34lsub1)
+include_mpn(`x86/p6/mod_34lsub1.asm')
diff --git a/mpn/x86/atom/mode1o.asm b/mpn/x86/atom/mode1o.asm

new file mode 100644 (file)

index 0000000..0c06d04
--- /dev/null
+++ b/mpn/x86/atom/mode1o.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom mpn_modexact_1_odd -- exact division style remainder.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_modexact_1_odd mpn_modexact_1c_odd)
+include_mpn(`x86/pentium/mode1o.asm')
diff --git a/mpn/x86/atom/rshift.asm b/mpn/x86/atom/rshift.asm

new file mode 100644 (file)

index 0000000..ab0e39a
--- /dev/null
+++ b/mpn/x86/atom/rshift.asm
@@ -0,0 +1,141 @@
+dnl  Intel Atom mpn_rshift -- mpn right shift.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Converted from AMD64 by Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      unsigned cnt);
+
+C                              cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`cnt',  `%ecx')
+
+ASM_START()
+       TEXT
+       ALIGN(8)
+deflit(`FRAME',0)
+PROLOGUE(mpn_rshift)
+       mov     PARAM_CNT, cnt
+       mov     PARAM_SIZE, %edx
+       mov     up, SAVE_UP
+       mov     PARAM_SRC, up
+       push    rp                      FRAME_pushl()
+       mov     PARAM_DST, rp
+       mov     %ebx, SAVE_EBX
+
+       shr     %edx
+       mov     (up), %eax
+       mov     %edx, VAR_COUNT
+       jnc     L(evn)
+
+       mov     %eax, %ebx
+       shr     %cl, %ebx
+       neg     cnt
+       shl     %cl, %eax
+       test    %edx, %edx
+       jnz     L(gt1)
+       mov     %ebx, (rp)
+       jmp     L(quit)
+
+L(gt1):        mov     %ebp, SAVE_EBP
+       push    %eax
+       mov     4(up), %eax
+       mov     %eax, %ebp
+       shl     %cl, %eax
+       jmp     L(lo1)
+
+L(evn):        mov     %ebp, SAVE_EBP
+       neg     cnt
+       mov     %eax, %ebp
+       mov     4(up), %edx
+       shl     %cl, %eax
+       mov     %edx, %ebx
+       shl     %cl, %edx
+       neg     cnt
+       decl    VAR_COUNT
+       lea     -4(rp), rp
+       lea     4(up), up
+       jz      L(end)
+       push    %eax                    FRAME_pushl()
+
+       ALIGN(8)
+L(top):        shr     %cl, %ebp
+       or      %ebp, %edx
+       shr     %cl, %ebx
+       neg     cnt
+       mov     4(up), %eax
+       mov     %eax, %ebp
+       mov     %edx, 4(rp)
+       shl     %cl, %eax
+       lea     8(rp), rp
+L(lo1):        mov     8(up), %edx
+       or      %ebx, %eax
+       mov     %edx, %ebx
+       shl     %cl, %edx
+       lea     8(up), up
+       neg     cnt
+       mov     %eax, (rp)
+       decl    VAR_COUNT
+       jg      L(top)
+
+       pop     %eax                    FRAME_popl()
+L(end):
+       shr     %cl, %ebp
+       shr     %cl, %ebx
+       or      %ebp, %edx
+       mov     SAVE_EBP, %ebp
+       mov     %edx, 4(rp)
+       mov     %ebx, 8(rp)
+
+L(quit):
+       mov     SAVE_UP, up
+       mov     SAVE_EBX, %ebx
+       pop     rp                      FRAME_popl()
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/atom/sse2/aorsmul_1.asm b/mpn/x86/atom/sse2/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..c1e5373
--- /dev/null
+++ b/mpn/x86/atom/sse2/aorsmul_1.asm
@@ -0,0 +1,163 @@
+dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                          cycles/limb
+C                          cycles/limb
+C P5                            -
+C P6 model 0-8,10-12            -
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    8
+C AMD K6
+C AMD K7                        -
+C AMD K8
+C AMD K10
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`n',  `%ecx')
+
+ifdef(`OPERATION_addmul_1',`
+       define(ADDSUB,  add)
+       define(func_1,  mpn_addmul_1)
+       define(func_1c, mpn_addmul_1c)')
+ifdef(`OPERATION_submul_1',`
+       define(ADDSUB,  sub)
+       define(func_1,  mpn_submul_1)
+       define(func_1c, mpn_submul_1c)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(func_1)
+       xor     %edx, %edx
+L(ent):        push    %edi
+       push    %esi
+       push    %ebx
+       mov     16(%esp), rp
+       mov     20(%esp), up
+       mov     24(%esp), n
+       movd    28(%esp), %mm7
+       test    $1, n
+       jz      L(fi0or2)
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       shr     $2, n
+       jnc     L(fi1)
+
+L(fi3):        lea     -8(up), up
+       lea     -8(rp), rp
+       movd    12(up), %mm1
+       movd    %mm0, %ebx
+       pmuludq %mm7, %mm1
+       add     $1, n                   C increment and clear carry
+       jmp     L(lo3)
+
+L(fi1):        movd    %mm0, %ebx
+       jz      L(wd1)
+       movd    4(up), %mm1
+       pmuludq %mm7, %mm1
+       jmp     L(lo1)
+
+L(fi0or2):
+       movd    (up), %mm1
+       pmuludq %mm7, %mm1
+       shr     $2, n
+       movd    4(up), %mm0
+       jc      L(fi2)
+       lea     -4(up), up
+       lea     -4(rp), rp
+       movd    %mm1, %eax
+       pmuludq %mm7, %mm0
+       jmp     L(lo0)
+
+L(fi2):        lea     4(up), up
+       add     $1, n                   C increment and clear carry
+       movd    %mm1, %eax
+       lea     -12(rp), rp
+       jmp     L(lo2)
+
+C      ALIGN(16)                       C alignment seems irrelevant
+L(top):        movd    4(up), %mm1
+       adc     $0, %edx
+       ADDSUB  %eax, 12(rp)
+       movd    %mm0, %ebx
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+L(lo1):        psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       adc     $0, %edx
+       ADDSUB  %ebx, (rp)
+L(lo0):        psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       movd    %mm0, %ebx
+       movd    12(up), %mm1
+       pmuludq %mm7, %mm1
+       adc     $0, %edx
+       ADDSUB  %eax, 4(rp)
+L(lo3):        psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       lea     16(up), up
+       movd    (up), %mm0
+       adc     $0, %edx
+       ADDSUB  %ebx, 8(rp)
+L(lo2):        psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       pmuludq %mm7, %mm0
+       dec     n
+       jnz     L(top)
+
+L(end):        adc     n, %edx                 C n is zero here
+       ADDSUB  %eax, 12(rp)
+       movd    %mm0, %ebx
+       lea     16(rp), rp
+L(wd1):        psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %eax
+       adc     n, %eax
+       ADDSUB  %ebx, (rp)
+       emms
+       adc     n, %eax
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       ret
+EPILOGUE()
+PROLOGUE(func_1c)
+       mov     20(%esp), %edx          C carry
+       jmp     L(ent)
+EPILOGUE()
diff --git a/mpn/x86/atom/sse2/bdiv_dbm1c.asm b/mpn/x86/atom/sse2/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..1e5c147
--- /dev/null
+++ b/mpn/x86/atom/sse2/bdiv_dbm1c.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom  mpn_bdiv_dbm1.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_dbm1c)
+include_mpn(`x86/pentium4/sse2/bdiv_dbm1c.asm')
diff --git a/mpn/x86/atom/sse2/divrem_1.asm b/mpn/x86/atom/sse2/divrem_1.asm

new file mode 100644 (file)

index 0000000..2edcf2e
--- /dev/null
+++ b/mpn/x86/atom/sse2/divrem_1.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom mpn_divrem_1 -- mpn by limb division.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_preinv_divrem_1 mpn_divrem_1c mpn_divrem_1)
+include_mpn(`x86/pentium4/sse2/divrem_1.asm')
diff --git a/mpn/x86/atom/sse2/mod_1_1.asm b/mpn/x86/atom/sse2/mod_1_1.asm

new file mode 100644 (file)

index 0000000..4b8c410
--- /dev/null
+++ b/mpn/x86/atom/sse2/mod_1_1.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom/SSE2 mpn_mod_1_1.
+
+dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_1_1p)
+include_mpn(`x86/pentium4/sse2/mod_1_1.asm')
diff --git a/mpn/x86/atom/sse2/mod_1_4.asm b/mpn/x86/atom/sse2/mod_1_4.asm

new file mode 100644 (file)

index 0000000..056700a
--- /dev/null
+++ b/mpn/x86/atom/sse2/mod_1_4.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom/SSE2 mpn_mod_1_4.
+
+dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_1s_4p)
+include_mpn(`x86/pentium4/sse2/mod_1_4.asm')
diff --git a/mpn/x86/atom/sse2/mul_1.asm b/mpn/x86/atom/sse2/mul_1.asm

new file mode 100644 (file)

index 0000000..5cd86ca
--- /dev/null
+++ b/mpn/x86/atom/sse2/mul_1.asm
@@ -0,0 +1,113 @@
+dnl  Intel Atom mpn_mul_1.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                          cycles/limb
+C                          cycles/limb
+C P5                            -
+C P6 model 0-8,10-12            -
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    7.5
+C AMD K6                        -
+C AMD K7                        -
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_MUL,  16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+define(`rp', `%edx')
+define(`up', `%esi')
+define(`n',  `%ecx')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(mpn_mul_1c)
+       movd    PARAM_CARRY, %mm6       C carry
+       jmp     L(ent)
+EPILOGUE()
+
+       ALIGN(8)                        C for compact code
+PROLOGUE(mpn_mul_1)
+       pxor    %mm6, %mm6
+L(ent):        push    %esi                    FRAME_pushl()
+       mov     PARAM_SRC, up
+       mov     PARAM_SIZE, %eax        C size
+       movd    PARAM_MUL, %mm7
+       movd    (up), %mm0
+       mov     %eax, n
+       and     $3, %eax
+       pmuludq %mm7, %mm0
+       mov     PARAM_DST, rp
+       jz      L(lo0)
+       cmp     $2, %eax
+       lea     -16(up,%eax,4),up
+       lea     -16(rp,%eax,4),rp
+       jc      L(lo1)
+       jz      L(lo2)
+       jmp     L(lo3)
+
+       ALIGN(16)
+L(top):        movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       psrlq   $32, %mm6
+       lea     16(rp), rp
+L(lo0):        paddq   %mm0, %mm6
+       movd    4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, (rp)
+       psrlq   $32, %mm6
+L(lo3):        paddq   %mm0, %mm6
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, 4(rp)
+       psrlq   $32, %mm6
+L(lo2):        paddq   %mm0, %mm6
+       movd    12(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, 8(rp)
+       psrlq   $32, %mm6
+L(lo1):        paddq   %mm0, %mm6
+       sub     $4, n
+       movd    %mm6, 12(rp)
+       lea     16(up), up
+       ja      L(top)
+
+       psrlq   $32, %mm6
+       movd    %mm6, %eax
+       emms
+       pop     %esi                    FRAME_popl()
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/atom/sse2/mul_basecase.asm b/mpn/x86/atom/sse2/mul_basecase.asm

new file mode 100644 (file)

index 0000000..ffd05b6
--- /dev/null
+++ b/mpn/x86/atom/sse2/mul_basecase.asm
@@ -0,0 +1,490 @@
+dnl  x86 mpn_mul_basecase -- Multiply two limb vectors and store the result in
+dnl  a third limb vector.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C  * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
+C    4 large loops into one; we could use it for the outer loop branch.
+C  * Optimise code outside of inner loops.
+C  * Write combined addmul_1 feed-in a wind-down code, and use when iterating
+C    outer each loop.  ("Overlapping software pipelining")
+C  * Postpone push of ebx until we know vn > 1.  Perhaps use caller-saves regs
+C    for inlined mul_1, allowing us to postpone all pushes.
+C  * Perhaps write special code for vn <= un < M, for some small M.
+
+C void mpn_mul_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xn,
+C                        mp_srcptr yp, mp_size_t yn);
+C
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`un',  `%ecx')
+define(`vp',  `%ebp')
+define(`vn',  `36(%esp)')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_basecase)
+       push    %edi
+       push    %esi
+       push    %ebx
+       push    %ebp
+       mov     20(%esp), rp
+       mov     24(%esp), up
+       mov     28(%esp), un
+       mov     32(%esp), vp
+
+       movd    (up), %mm0
+       movd    (vp), %mm7
+       pmuludq %mm7, %mm0
+       pxor    %mm6, %mm6
+
+       mov     un, %eax
+       and     $3, %eax
+       jz      L(of0)
+       cmp     $2, %eax
+       jc      L(of1)
+       jz      L(of2)
+
+C ================================================================
+       jmp     L(m3)
+       ALIGN(16)
+L(lm3):        movd    -4(up), %mm0
+       pmuludq %mm7, %mm0
+       psrlq   $32, %mm6
+       lea     16(rp), rp
+       paddq   %mm0, %mm6
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -4(rp)
+       psrlq   $32, %mm6
+L(m3): paddq   %mm0, %mm6
+       movd    4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, (rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, 4(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       sub     $4, un
+       movd    %mm6, 8(rp)
+       lea     16(up), up
+       ja      L(lm3)
+
+       psrlq   $32, %mm6
+       movd    %mm6, 12(rp)
+
+       decl    vn
+       jz      L(done)
+       lea     -8(rp), rp
+
+L(ol3):        mov     28(%esp), un
+       neg     un
+       lea     4(vp), vp
+       movd    (vp), %mm7      C read next V limb
+       mov     24(%esp), up
+       lea     16(rp,un,4), rp
+
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       sar     $2, un
+       movd    4(up), %mm1
+       movd    %mm0, %ebx
+       pmuludq %mm7, %mm1
+       lea     -8(up), up
+       xor     %edx, %edx      C zero edx and CF
+       jmp     L(a3)
+
+L(la3):        movd    4(up), %mm1
+       adc     $0, %edx
+       add     %eax, 12(rp)
+       movd    %mm0, %ebx
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       adc     $0, %edx
+       add     %ebx, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       movd    %mm0, %ebx
+       movd    12(up), %mm1
+       pmuludq %mm7, %mm1
+       adc     $0, %edx
+       add     %eax, 4(rp)
+L(a3): psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       lea     16(up), up
+       movd    (up), %mm0
+       adc     $0, %edx
+       add     %ebx, 8(rp)
+       psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       pmuludq %mm7, %mm0
+       inc     un
+       jnz     L(la3)
+
+       adc     un, %edx        C un is zero here
+       add     %eax, 12(rp)
+       movd    %mm0, %ebx
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %eax
+       adc     un, %eax
+       add     %ebx, 16(rp)
+       adc     un, %eax
+       mov     %eax, 20(rp)
+
+       decl    vn
+       jnz     L(ol3)
+       jmp     L(done)
+
+C ================================================================
+       ALIGN(16)
+L(lm0):        movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       psrlq   $32, %mm6
+       lea     16(rp), rp
+L(of0):        paddq   %mm0, %mm6
+       movd    4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, (rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, 4(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    12(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, 8(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       sub     $4, un
+       movd    %mm6, 12(rp)
+       lea     16(up), up
+       ja      L(lm0)
+
+       psrlq   $32, %mm6
+       movd    %mm6, 16(rp)
+
+       decl    vn
+       jz      L(done)
+       lea     -4(rp), rp
+
+L(ol0):        mov     28(%esp), un
+       neg     un
+       lea     4(vp), vp
+       movd    (vp), %mm7      C read next V limb
+       mov     24(%esp), up
+       lea     20(rp,un,4), rp
+
+       movd    (up), %mm1
+       pmuludq %mm7, %mm1
+       sar     $2, un
+       movd    4(up), %mm0
+       lea     -4(up), up
+       movd    %mm1, %eax
+       pmuludq %mm7, %mm0
+       xor     %edx, %edx      C zero edx and CF
+       jmp     L(a0)
+
+L(la0):        movd    4(up), %mm1
+       adc     $0, %edx
+       add     %eax, 12(rp)
+       movd    %mm0, %ebx
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       adc     $0, %edx
+       add     %ebx, (rp)
+L(a0): psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       movd    %mm0, %ebx
+       movd    12(up), %mm1
+       pmuludq %mm7, %mm1
+       adc     $0, %edx
+       add     %eax, 4(rp)
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       lea     16(up), up
+       movd    (up), %mm0
+       adc     $0, %edx
+       add     %ebx, 8(rp)
+       psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       pmuludq %mm7, %mm0
+       inc     un
+       jnz     L(la0)
+
+       adc     un, %edx        C un is zero here
+       add     %eax, 12(rp)
+       movd    %mm0, %ebx
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %eax
+       adc     un, %eax
+       add     %ebx, 16(rp)
+       adc     un, %eax
+       mov     %eax, 20(rp)
+
+       decl    vn
+       jnz     L(ol0)
+       jmp     L(done)
+
+C ================================================================
+       ALIGN(16)
+L(lm1):        movd    -12(up), %mm0
+       pmuludq %mm7, %mm0
+       psrlq   $32, %mm6
+       lea     16(rp), rp
+       paddq   %mm0, %mm6
+       movd    -8(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -12(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    -4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -8(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -4(rp)
+       psrlq   $32, %mm6
+L(of1):        paddq   %mm0, %mm6
+       sub     $4, un
+       movd    %mm6, (rp)
+       lea     16(up), up
+       ja      L(lm1)
+
+       psrlq   $32, %mm6
+       movd    %mm6, 4(rp)
+
+       decl    vn
+       jz      L(done)
+       lea     -16(rp), rp
+
+L(ol1):        mov     28(%esp), un
+       neg     un
+       lea     4(vp), vp
+       movd    (vp), %mm7      C read next V limb
+       mov     24(%esp), up
+       lea     24(rp,un,4), rp
+
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       sar     $2, un
+       movd    %mm0, %ebx
+       movd    4(up), %mm1
+       pmuludq %mm7, %mm1
+       xor     %edx, %edx      C zero edx and CF
+       inc     un
+       jmp     L(a1)
+
+L(la1):        movd    4(up), %mm1
+       adc     $0, %edx
+       add     %eax, 12(rp)
+       movd    %mm0, %ebx
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+L(a1): psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       adc     $0, %edx
+       add     %ebx, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       movd    %mm0, %ebx
+       movd    12(up), %mm1
+       pmuludq %mm7, %mm1
+       adc     $0, %edx
+       add     %eax, 4(rp)
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       lea     16(up), up
+       movd    (up), %mm0
+       adc     $0, %edx
+       add     %ebx, 8(rp)
+       psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       pmuludq %mm7, %mm0
+       inc     un
+       jnz     L(la1)
+
+       adc     un, %edx        C un is zero here
+       add     %eax, 12(rp)
+       movd    %mm0, %ebx
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %eax
+       adc     un, %eax
+       add     %ebx, 16(rp)
+       adc     un, %eax
+       mov     %eax, 20(rp)
+
+       decl    vn
+       jnz     L(ol1)
+       jmp     L(done)
+
+C ================================================================
+       ALIGN(16)
+L(lm2):        movd    -8(up), %mm0
+       pmuludq %mm7, %mm0
+       psrlq   $32, %mm6
+       lea     16(rp), rp
+       paddq   %mm0, %mm6
+       movd    -4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -8(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -4(rp)
+       psrlq   $32, %mm6
+L(of2):        paddq   %mm0, %mm6
+       movd    4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, (rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       sub     $4, un
+       movd    %mm6, 4(rp)
+       lea     16(up), up
+       ja      L(lm2)
+
+       psrlq   $32, %mm6
+       movd    %mm6, 8(rp)
+
+       decl    vn
+       jz      L(done)
+       lea     -12(rp), rp
+
+L(ol2):        mov     28(%esp), un
+       neg     un
+       lea     4(vp), vp
+       movd    (vp), %mm7      C read next V limb
+       mov     24(%esp), up
+       lea     12(rp,un,4), rp
+
+       movd    (up), %mm1
+       pmuludq %mm7, %mm1
+       sar     $2, un
+       movd    4(up), %mm0
+       lea     4(up), up
+       movd    %mm1, %eax
+       xor     %edx, %edx      C zero edx and CF
+       jmp     L(lo2)
+
+L(la2):        movd    4(up), %mm1
+       adc     $0, %edx
+       add     %eax, 12(rp)
+       movd    %mm0, %ebx
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       adc     $0, %edx
+       add     %ebx, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       movd    %mm0, %ebx
+       movd    12(up), %mm1
+       pmuludq %mm7, %mm1
+       adc     $0, %edx
+       add     %eax, 4(rp)
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %edx
+       movd    %mm1, %eax
+       lea     16(up), up
+       movd    (up), %mm0
+       adc     $0, %edx
+       add     %ebx, 8(rp)
+L(lo2):        psrlq   $32, %mm1
+       adc     %edx, %eax
+       movd    %mm1, %edx
+       pmuludq %mm7, %mm0
+       inc     un
+       jnz     L(la2)
+
+       adc     un, %edx        C un is zero here
+       add     %eax, 12(rp)
+       movd    %mm0, %ebx
+       psrlq   $32, %mm0
+       adc     %edx, %ebx
+       movd    %mm0, %eax
+       adc     un, %eax
+       add     %ebx, 16(rp)
+       adc     un, %eax
+       mov     %eax, 20(rp)
+
+       decl    vn
+       jnz     L(ol2)
+C      jmp     L(done)
+
+C ================================================================
+L(done):
+       emms
+       pop     %ebp
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       ret
+EPILOGUE()
diff --git a/mpn/x86/atom/sse2/popcount.asm b/mpn/x86/atom/sse2/popcount.asm

new file mode 100644 (file)

index 0000000..997616c
--- /dev/null
+++ b/mpn/x86/atom/sse2/popcount.asm
@@ -0,0 +1,24 @@
+dnl  Intel Atom mpn_popcount -- population count.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/mpn/x86/atom/sse2/sqr_basecase.asm b/mpn/x86/atom/sse2/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..6032293
--- /dev/null
+++ b/mpn/x86/atom/sse2/sqr_basecase.asm
@@ -0,0 +1,623 @@
+dnl  x86 mpn_sqr_basecase -- square an mpn number, optimised for atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C  * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
+C    4 large loops into one; we could use it for the outer loop branch.
+C  * Optimise code outside of inner loops.
+C  * Write combined addmul_1 feed-in a wind-down code, and use when iterating
+C    outer each loop.  ("Overlapping software pipelining")
+C  * Perhaps use caller-saves regs for inlined mul_1, allowing us to postpone
+C    all pushes.
+C  * Perhaps write special code for n < M, for some small M.
+C  * Replace inlined addmul_1 with smaller code from aorsmul_1.asm, or perhaps
+C    with even less pipelined code.
+C  * We run the outer loop until we have a 2-limb by 1-limb addmul_1 left.
+C    Consider breaking out earlier, saving high the cost of short loops.
+
+C void mpn_sqr_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xn);
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`n',   `%ecx')
+
+define(`un',  `%ebp')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_sqr_basecase)
+       push    %edi
+       push    %esi
+       mov     12(%esp), rp
+       mov     16(%esp), up
+       mov     20(%esp), n
+
+       lea     4(rp), rp       C write triangular product starting at rp[1]
+       dec     n
+       movd    (up), %mm7
+
+       jz      L(one)
+       lea     4(up), up
+       push    %ebx
+       push    %ebp
+       mov     n, %eax
+
+       movd    (up), %mm0
+       neg     n
+       pmuludq %mm7, %mm0
+       pxor    %mm6, %mm6
+       mov     n, un
+
+       and     $3, %eax
+       jz      L(of0)
+       cmp     $2, %eax
+       jc      L(of1)
+       jz      L(of2)
+
+C ================================================================
+       jmp     L(m3)
+       ALIGN(16)
+L(lm3):        movd    -4(up), %mm0
+       pmuludq %mm7, %mm0
+       psrlq   $32, %mm6
+       lea     16(rp), rp
+       paddq   %mm0, %mm6
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -4(rp)
+       psrlq   $32, %mm6
+L(m3): paddq   %mm0, %mm6
+       movd    4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, (rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, 4(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       add     $4, un
+       movd    %mm6, 8(rp)
+       lea     16(up), up
+       js      L(lm3)
+
+       psrlq   $32, %mm6
+       movd    %mm6, 12(rp)
+
+       inc     n
+C      jz      L(done)
+  lea  -12(up), up
+  lea  4(rp), rp
+       jmp     L(ol2)
+
+C ================================================================
+       ALIGN(16)
+L(lm0):        movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       psrlq   $32, %mm6
+       lea     16(rp), rp
+L(of0):        paddq   %mm0, %mm6
+       movd    4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, (rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, 4(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    12(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, 8(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       add     $4, un
+       movd    %mm6, 12(rp)
+       lea     16(up), up
+       js      L(lm0)
+
+       psrlq   $32, %mm6
+       movd    %mm6, 16(rp)
+
+       inc     n
+C      jz      L(done)
+  lea  -8(up), up
+  lea  8(rp), rp
+       jmp     L(ol3)
+
+C ================================================================
+       ALIGN(16)
+L(lm1):        movd    -12(up), %mm0
+       pmuludq %mm7, %mm0
+       psrlq   $32, %mm6
+       lea     16(rp), rp
+       paddq   %mm0, %mm6
+       movd    -8(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -12(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    -4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -8(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -4(rp)
+       psrlq   $32, %mm6
+L(of1):        paddq   %mm0, %mm6
+       add     $4, un
+       movd    %mm6, (rp)
+       lea     16(up), up
+       js      L(lm1)
+
+       psrlq   $32, %mm6
+       movd    %mm6, 4(rp)
+
+       inc     n
+       jz      L(done)         C goes away when we add special n=2 code
+  lea  -20(up), up
+  lea  -4(rp), rp
+       jmp     L(ol0)
+
+C ================================================================
+       ALIGN(16)
+L(lm2):        movd    -8(up), %mm0
+       pmuludq %mm7, %mm0
+       psrlq   $32, %mm6
+       lea     16(rp), rp
+       paddq   %mm0, %mm6
+       movd    -4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -8(rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, -4(rp)
+       psrlq   $32, %mm6
+L(of2):        paddq   %mm0, %mm6
+       movd    4(up), %mm0
+       pmuludq %mm7, %mm0
+       movd    %mm6, (rp)
+       psrlq   $32, %mm6
+       paddq   %mm0, %mm6
+       add     $4, un
+       movd    %mm6, 4(rp)
+       lea     16(up), up
+       js      L(lm2)
+
+       psrlq   $32, %mm6
+       movd    %mm6, 8(rp)
+
+       inc     n
+C      jz      L(done)
+  lea  -16(up), up
+C  lea (rp), rp
+C      jmp     L(ol1)
+
+C ================================================================
+
+L(ol1):        lea     4(up,n,4), up
+       movd    (up), %mm7      C read next U invariant limb
+       lea     8(rp,n,4), rp
+       mov     n, un
+
+       movd    4(up), %mm1
+       pmuludq %mm7, %mm1
+       sar     $2, un
+       movd    %mm1, %ebx
+       inc     un
+       jz      L(re1)
+
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       xor     %edx, %edx      C zero edx and CF
+       jmp     L(a1)
+
+L(la1):        adc     $0, %edx
+       add     %ebx, 12(rp)
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       adc     $0, %edx
+       add     %eax, (rp)
+L(a1): psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %edx
+       movd    %mm0, %eax
+       movd    12(up), %mm1
+       pmuludq %mm7, %mm1
+       adc     $0, %edx
+       add     %ebx, 4(rp)
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       lea     16(up), up
+       movd    (up), %mm0
+       adc     $0, %edx
+       add     %eax, 8(rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %edx
+       pmuludq %mm7, %mm0
+       inc     un
+       movd    4(up), %mm1
+       jnz     L(la1)
+
+       adc     un, %edx        C un is zero here
+       add     %ebx, 12(rp)
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       adc     un, %edx
+       add     %eax, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %eax
+       adc     un, %eax
+       add     %ebx, 4(rp)
+       adc     un, %eax
+       mov     %eax, 8(rp)
+
+       inc     n
+
+C ================================================================
+
+L(ol0):        lea     (up,n,4), up
+       movd    4(up), %mm7     C read next U invariant limb
+       lea     4(rp,n,4), rp
+       mov     n, un
+
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       sar     $2, un
+       movd    12(up), %mm1
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       xor     %edx, %edx      C zero edx and CF
+       jmp     L(a0)
+
+L(la0):        adc     $0, %edx
+       add     %ebx, 12(rp)
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       adc     $0, %edx
+       add     %eax, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %edx
+       movd    %mm0, %eax
+       movd    12(up), %mm1
+       pmuludq %mm7, %mm1
+       adc     $0, %edx
+       add     %ebx, 4(rp)
+L(a0): psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       lea     16(up), up
+       movd    (up), %mm0
+       adc     $0, %edx
+       add     %eax, 8(rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %edx
+       pmuludq %mm7, %mm0
+       inc     un
+       movd    4(up), %mm1
+       jnz     L(la0)
+
+       adc     un, %edx        C un is zero here
+       add     %ebx, 12(rp)
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       adc     un, %edx
+       add     %eax, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %eax
+       adc     un, %eax
+       add     %ebx, 4(rp)
+       adc     un, %eax
+       mov     %eax, 8(rp)
+
+       inc     n
+
+C ================================================================
+
+L(ol3):        lea     12(up,n,4), up
+       movd    -8(up), %mm7    C read next U invariant limb
+       lea     (rp,n,4), rp    C put rp back
+       mov     n, un
+
+       movd    -4(up), %mm1
+       pmuludq %mm7, %mm1
+       sar     $2, un
+       movd    %mm1, %ebx
+       movd    (up), %mm0
+       xor     %edx, %edx      C zero edx and CF
+       jmp     L(a3)
+
+L(la3):        adc     $0, %edx
+       add     %ebx, 12(rp)
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       adc     $0, %edx
+       add     %eax, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %edx
+       movd    %mm0, %eax
+       movd    12(up), %mm1
+       pmuludq %mm7, %mm1
+       adc     $0, %edx
+       add     %ebx, 4(rp)
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       lea     16(up), up
+       movd    (up), %mm0
+       adc     $0, %edx
+       add     %eax, 8(rp)
+L(a3): psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %edx
+       pmuludq %mm7, %mm0
+       inc     un
+       movd    4(up), %mm1
+       jnz     L(la3)
+
+       adc     un, %edx        C un is zero here
+       add     %ebx, 12(rp)
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       adc     un, %edx
+       add     %eax, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %eax
+       adc     un, %eax
+       add     %ebx, 4(rp)
+       adc     un, %eax
+       mov     %eax, 8(rp)
+
+       inc     n
+
+C ================================================================
+
+L(ol2):        lea     8(up,n,4), up
+       movd    -4(up), %mm7    C read next U invariant limb
+       lea     12(rp,n,4), rp
+       mov     n, un
+
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       xor     %edx, %edx
+       sar     $2, un
+       movd    4(up), %mm1
+       test    un, un          C clear carry
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       inc     un
+       jnz     L(a2)
+       jmp     L(re2)
+
+L(la2):        adc     $0, %edx
+       add     %ebx, 12(rp)
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+L(a2): psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       movd    8(up), %mm0
+       pmuludq %mm7, %mm0
+       adc     $0, %edx
+       add     %eax, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %edx
+       movd    %mm0, %eax
+       movd    12(up), %mm1
+       pmuludq %mm7, %mm1
+       adc     $0, %edx
+       add     %ebx, 4(rp)
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       lea     16(up), up
+       movd    (up), %mm0
+       adc     $0, %edx
+       add     %eax, 8(rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %edx
+       pmuludq %mm7, %mm0
+       inc     un
+       movd    4(up), %mm1
+       jnz     L(la2)
+
+       adc     un, %edx        C un is zero here
+       add     %ebx, 12(rp)
+       movd    %mm0, %eax
+       pmuludq %mm7, %mm1
+       lea     16(rp), rp
+       psrlq   $32, %mm0
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       adc     un, %edx
+       add     %eax, (rp)
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %eax
+       adc     un, %eax
+       add     %ebx, 4(rp)
+       adc     un, %eax
+       mov     %eax, 8(rp)
+
+       inc     n
+       jmp     L(ol1)
+
+C ================================================================
+L(re2):        psrlq   $32, %mm0
+       movd    (up), %mm7      C read next U invariant limb
+       adc     %edx, %eax
+       movd    %mm0, %edx
+       movd    %mm1, %ebx
+       adc     un, %edx
+       add     %eax, (rp)
+       lea     4(rp), rp
+       psrlq   $32, %mm1
+       adc     %edx, %ebx
+       movd    %mm1, %eax
+       movd    4(up), %mm1
+       adc     un, %eax
+       add     %ebx, (rp)
+       pmuludq %mm7, %mm1
+       adc     un, %eax
+       mov     %eax, 4(rp)
+       movd    %mm1, %ebx
+
+L(re1):        psrlq   $32, %mm1
+       add     %ebx, 4(rp)
+       movd    %mm1, %eax
+       adc     un, %eax
+       xor     n, n            C make n zeroness assumption below true
+       mov     %eax, 8(rp)
+
+L(done):                       C n is zero here
+       mov     24(%esp), up
+       mov     28(%esp), %eax
+
+       movd    (up), %mm0
+       inc     %eax
+       pmuludq %mm0, %mm0
+       lea     4(up), up
+       mov     20(%esp), rp
+       shr     %eax
+       movd    %mm0, (rp)
+       psrlq   $32, %mm0
+       lea     -12(rp), rp
+       mov     %eax, 28(%esp)
+       jnc     L(odd)
+
+       movd    %mm0, %ebp
+       movd    (up), %mm0
+       lea     8(rp), rp
+       pmuludq %mm0, %mm0
+       lea     -4(up), up
+       add     8(rp), %ebp
+       movd    %mm0, %edx
+       adc     12(rp), %edx
+       rcr     n
+       jmp     L(ent)
+
+C      ALIGN(16)               C alignment seems irrelevant
+L(top):        movd    (up), %mm1
+       adc     n, n
+       movd    %mm0, %eax
+       pmuludq %mm1, %mm1
+       movd    4(up), %mm0
+       adc     (rp), %eax
+       movd    %mm1, %ebx
+       pmuludq %mm0, %mm0
+       psrlq   $32, %mm1
+       adc     4(rp), %ebx
+       movd    %mm1, %ebp
+       movd    %mm0, %edx
+       adc     8(rp), %ebp
+       adc     12(rp), %edx
+       rcr     n               C FIXME: isn't this awfully slow on atom???
+       adc     %eax, (rp)
+       adc     %ebx, 4(rp)
+L(ent):        lea     8(up), up
+       adc     %ebp, 8(rp)
+       psrlq   $32, %mm0
+       adc     %edx, 12(rp)
+L(odd):        decl    28(%esp)
+       lea     16(rp), rp
+       jnz     L(top)
+
+L(end):        adc     n, n
+       movd    %mm0, %eax
+       adc     n, %eax
+       mov     %eax, (rp)
+
+L(rtn):        emms
+       pop     %ebp
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       ret
+
+L(one):        pmuludq %mm7, %mm7
+       movq    %mm7, -4(rp)
+       emms
+       pop     %esi
+       pop     %edi
+       ret
+EPILOGUE()
diff --git a/mpn/x86/atom/sublsh1_n.asm b/mpn/x86/atom/sublsh1_n.asm

new file mode 100644 (file)

index 0000000..e41bb91
--- /dev/null
+++ b/mpn/x86/atom/sublsh1_n.asm
@@ -0,0 +1,23 @@
+dnl  Intel Atom mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_sublsh1_n_ip1)
+include_mpn(`x86/k7/sublsh1_n.asm')
diff --git a/mpn/x86/atom/sublsh2_n.asm b/mpn/x86/atom/sublsh2_n.asm

new file mode 100644 (file)

index 0000000..90800dc
--- /dev/null
+++ b/mpn/x86/atom/sublsh2_n.asm
@@ -0,0 +1,46 @@
+dnl  Intel Atom mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2).
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+       define(M4_inst,         adcl)
+       define(M4_opp,          subl)
+       define(M4_function,     mpn_addlsh2_n)
+       define(M4_function_c,   mpn_addlsh2_nc)
+       define(M4_ip_function_c, mpn_addlsh2_nc_ip1)
+       define(M4_ip_function,  mpn_addlsh2_n_ip1)
+',`ifdef(`OPERATION_sublsh2_n', `
+       define(M4_inst,         sbbl)
+       define(M4_opp,          addl)
+       define(M4_function,     mpn_sublsh2_n)
+       define(M4_function_c,   mpn_sublsh2_nc)
+       define(M4_ip_function_c, mpn_sublsh2_nc_ip1)
+       define(M4_ip_function,  mpn_sublsh2_n_ip1)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_sublsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_sublsh2_n mpn_sublsh2_nc mpn_sublsh2_n_ip1 mpn_sublsh2_nc_ip1)
+
+include_mpn(`x86/atom/aorslshC_n.asm')
diff --git a/mpn/x86/bdiv_dbm1c.asm b/mpn/x86/bdiv_dbm1c.asm

index dbee28fd9445d17ee9b80858f57a0fcb85921b8f..ac9faf270a5d69a5fd0d9d5f9b9811e3e8a46193 100644 (file)
--- a/mpn/x86/bdiv_dbm1c.asm
+++ b/mpn/x86/bdiv_dbm1c.asm
@@ -1,6 +1,6 @@
  dnl  x86 mpn_bdiv_dbm1.
  
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,14 +19,22 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C          cycles/limb
-C K7:           3.5
-C P4 m0:         ?
-C P4 m1:         ?
-C P4 m2:       13.67
-C P4 m3:         ?
-C P4 m4:         ?
-C P6-13:        5.1
+C                          cycles/limb
+C P5
+C P6 model 0-8,10-12)
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)          5.1
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)      13.67
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom
+C AMD K6
+C AMD K7                        3.5
+C AMD K8
+C AMD K10
+
  
  C TODO
  C  * Optimize for more x86 processors
@@ -57,18 +65,17 @@ PROLOGUE(mpn_bdiv_dbm1c)
         cmp     $2, %eax
         jc      L(b1)
         jz      L(b2)
-       jmp     L(b3)
+
+L(b3): lea     -8(%esi), %esi
+       lea     8(%edi), %edi
+       add     $-3, %ebp
+       jmp     L(3)
  
  L(b0): mov     4(%esi), %eax
         lea     -4(%esi), %esi
         lea     12(%edi), %edi
         add     $-4, %ebp
         jmp     L(0)
-L(b3):
-       lea     -8(%esi), %esi
-       lea     8(%edi), %edi
-       add     $-3, %ebp
-       jmp     L(3)
  
  L(b2): mov     4(%esi), %eax
         lea     4(%esi), %esi
@@ -77,8 +84,7 @@ L(b2):        mov     4(%esi), %eax
         jmp     L(2)
  
         ALIGN(8)
-L(top):
-       mov     4(%esi), %eax
+L(top):        mov     4(%esi), %eax
         mul     %ecx
         lea     16(%edi), %edi
         sub     %eax, %ebx
diff --git a/mpn/x86/bdiv_q_1.asm b/mpn/x86/bdiv_q_1.asm

new file mode 100644 (file)

index 0000000..7f344ab
--- /dev/null
+++ b/mpn/x86/bdiv_q_1.asm
@@ -0,0 +1,197 @@
+dnl  x86 mpn_bdiv_q_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C     cycles/limb
+C P54    30.0
+C P55    29.0
+C P6     13.0 odd divisor, 12.0 even (strangely)
+C K6     14.0
+C K7     12.0
+C P4     42.0
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+
+defframe(PARAM_SHIFT,  24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+dnl  re-use parameter space
+define(VAR_INVERSE,`PARAM_SRC')
+
+       TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                  mp_limb_t inverse, int shift)
+
+       ALIGN(16)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SHIFT, %ecx
+       pushl   %ebp    FRAME_pushl()
+
+       movl    PARAM_INVERSE, %eax
+       movl    PARAM_SIZE, %ebp
+       pushl   %ebx    FRAME_pushl()
+L(common):
+       pushl   %edi    FRAME_pushl()
+       pushl   %esi    FRAME_pushl()
+
+       movl    PARAM_SRC, %esi
+       movl    PARAM_DST, %edi
+
+       leal    (%esi,%ebp,4), %esi     C src end
+       leal    (%edi,%ebp,4), %edi     C dst end
+       negl    %ebp                    C -size
+
+       movl    %eax, VAR_INVERSE
+       movl    (%esi,%ebp,4), %eax     C src[0]
+
+       xorl    %ebx, %ebx
+       xorl    %edx, %edx
+
+       incl    %ebp
+       jz      L(one)
+
+       movl    (%esi,%ebp,4), %edx     C src[1]
+
+       shrdl(  %cl, %edx, %eax)
+
+       movl    VAR_INVERSE, %edx
+       jmp     L(entry)
+
+
+       ALIGN(8)
+       nop     C k6 code alignment
+       nop
+L(top):
+       C eax   q
+       C ebx   carry bit, 0 or -1
+       C ecx   shift
+       C edx   carry limb
+       C esi   src end
+       C edi   dst end
+       C ebp   counter, limbs, negative
+
+       movl    -4(%esi,%ebp,4), %eax
+       subl    %ebx, %edx              C accumulate carry bit
+
+       movl    (%esi,%ebp,4), %ebx
+
+       shrdl(  %cl, %ebx, %eax)
+
+       subl    %edx, %eax              C apply carry limb
+       movl    VAR_INVERSE, %edx
+
+       sbbl    %ebx, %ebx
+
+L(entry):
+       imull   %edx, %eax
+
+       movl    %eax, -4(%edi,%ebp,4)
+       movl    PARAM_DIVISOR, %edx
+
+       mull    %edx
+
+       incl    %ebp
+       jnz     L(top)
+
+
+       movl    -4(%esi), %eax          C src high limb
+L(one):
+       shrl    %cl, %eax
+       popl    %esi    FRAME_popl()
+
+       addl    %ebx, %eax              C apply carry bit
+
+       subl    %edx, %eax              C apply carry limb
+
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi)
+
+       popl    %edi
+       popl    %ebx
+       popl    %ebp
+
+       ret
+
+EPILOGUE()
+
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                           mp_limb_t divisor);
+C
+
+       ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %eax
+       pushl   %ebp    FRAME_pushl()
+
+       movl    $-1, %ecx               C shift count
+       movl    PARAM_SIZE, %ebp
+
+       pushl   %ebx    FRAME_pushl()
+
+L(strip_twos):
+       incl    %ecx
+
+       shrl    %eax
+       jnc     L(strip_twos)
+
+       leal    1(%eax,%eax), %ebx      C d without twos
+       andl    $127, %eax              C d/2, 7 bits
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %edx)
+       movzbl  (%eax,%edx), %eax               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+
+       leal    (%eax,%eax), %edx       C 2*inv
+       movl    %ebx, PARAM_DIVISOR     C d without twos
+       imull   %eax, %eax              C inv*inv
+       imull   %ebx, %eax              C inv*inv*d
+       subl    %eax, %edx              C inv = 2*inv - inv*inv*d
+
+       leal    (%edx,%edx), %eax       C 2*inv
+       imull   %edx, %edx              C inv*inv
+       imull   %ebx, %edx              C inv*inv*d
+       subl    %edx, %eax              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       imull   PARAM_DIVISOR, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       jmp     L(common)
+EPILOGUE()
+
diff --git a/mpn/x86/bobcat/gmp-mparam.h b/mpn/x86/bobcat/gmp-mparam.h

new file mode 100644 (file)

index 0000000..e14ba39
--- /dev/null
+++ b/mpn/x86/bobcat/gmp-mparam.h
@@ -0,0 +1,142 @@
+/* x86/bobcat gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        23
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     13
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           42
+
+#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM33_THRESHOLD                90
+#define MUL_TOOM44_THRESHOLD               147
+#define MUL_TOOM6H_THRESHOLD               274
+#define MUL_TOOM8H_THRESHOLD               454
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      93
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     113
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 38
+#define SQR_TOOM3_THRESHOLD                 89
+#define SQR_TOOM4_THRESHOLD                220
+#define SQR_TOOM6_THRESHOLD                303
+#define SQR_TOOM8_THRESHOLD                454
+
+#define MULMID_TOOM42_THRESHOLD             76
+
+#define MULMOD_BNM1_THRESHOLD               19
+#define SQRMOD_BNM1_THRESHOLD               23
+
+#define POWM_SEC_TABLE  4,14,290,357,2178
+
+#define MUL_FFT_MODF_THRESHOLD             888  /* k = 6 */
+#define MUL_FFT_TABLE3                                      \
+  { {    888, 6}, {     25, 7}, {     13, 6}, {     27, 7}, \
+    {     15, 6}, {     33, 7}, {     17, 6}, {     35, 7}, \
+    {     19, 6}, {     39, 7}, {     23, 6}, {     47, 7}, \
+    {     27, 8}, {     15, 7}, {     31, 6}, {     63, 7}, \
+    {     35, 8}, {     19, 7}, {     41, 8}, {     23, 7}, \
+    {     49, 8}, {     31, 7}, {     63, 8}, {     39, 7}, \
+    {     79, 8}, {     43, 9}, {     23, 8}, {     51, 9}, \
+    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
+    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    159,10}, {     95, 9}, {    191,11}, \
+    {     63,10}, {    127, 9}, {    255,10}, {    159,11}, \
+    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543,11}, \
+    {    159,10}, {    319, 9}, {    671,11}, {    191,10}, \
+    {    383, 9}, {    767,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 70
+#define MUL_FFT_THRESHOLD                 7552
+
+#define SQR_FFT_MODF_THRESHOLD             723  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    723, 5}, {     25, 6}, {     13, 5}, {     28, 6}, \
+    {     15, 5}, {     31, 6}, {     27, 7}, {     15, 6}, \
+    {     33, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     23, 6}, {     47, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 6}, {     63, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
+    {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
+    {     47, 7}, {     95, 8}, {     51, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     95,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
+    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
+    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
+    {     95, 9}, {    191,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    159,11}, {     95,10}, {    191,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    543,11}, \
+    {    159, 9}, {    671,11}, {    191,10}, {    383, 9}, \
+    {    799,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 69
+#define SQR_FFT_THRESHOLD                 5760
+
+#define MULLO_BASECASE_THRESHOLD             5
+#define MULLO_DC_THRESHOLD                  45
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 75
+#define DC_DIVAPPR_Q_THRESHOLD             216
+#define DC_BDIV_QR_THRESHOLD                67
+#define DC_BDIV_Q_THRESHOLD                143
+
+#define INV_MULMOD_BNM1_THRESHOLD           75
+#define INV_NEWTON_THRESHOLD               244
+#define INV_APPR_THRESHOLD                 228
+
+#define BINV_NEWTON_THRESHOLD              276
+#define REDC_1_TO_REDC_N_THRESHOLD          71
+
+#define MU_DIV_QR_THRESHOLD               1858
+#define MU_DIVAPPR_Q_THRESHOLD            1822
+#define MUPI_DIV_QR_THRESHOLD              122
+#define MU_BDIV_QR_THRESHOLD              1787
+#define MU_BDIV_Q_THRESHOLD               1787
+
+#define MATRIX22_STRASSEN_THRESHOLD         19
+#define HGCD_THRESHOLD                      78
+#define HGCD_APPR_THRESHOLD                 55
+#define HGCD_REDUCE_THRESHOLD             4633
+#define GCD_DC_THRESHOLD                   474
+#define GCDEXT_DC_THRESHOLD                345
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        31
+#define SET_STR_DC_THRESHOLD               270
+#define SET_STR_PRECOMPUTE_THRESHOLD       812
diff --git a/mpn/x86/copyd.asm b/mpn/x86/copyd.asm

index 4ce3bbbc69b928236a98321c249b90ade245d897..c2215ef37db1e67b1520f002e3932120678b7382 100644 (file)
--- a/mpn/x86/copyd.asm
+++ b/mpn/x86/copyd.asm
@@ -21,11 +21,11 @@ include(`../config.m4')
  
  
  C     cycles/limb  startup (approx)
-C P5:     1.0         40
-C P6      2.4         70
-C K6      1.0         55
-C K7:     1.3         75
-C P4:     2.6        175
+C P5     1.0         40
+C P6     2.4         70
+C K6     1.0         55
+C K7     1.3         75
+C P4     2.6        175
  C
  C (Startup time includes some function call overheads.)
  
diff --git a/mpn/x86/copyi.asm b/mpn/x86/copyi.asm

index c6bbaeee65a660b7dcaabeed0343de4b01e2ab3d..542a39eb6401e61d6363b87e014ab1bd2044275d 100644 (file)
--- a/mpn/x86/copyi.asm
+++ b/mpn/x86/copyi.asm
@@ -21,11 +21,11 @@ include(`../config.m4')
  
  
  C     cycles/limb  startup (approx)
-C P5:     1.0         35
-C P6      0.75        45
-C K6      1.0         30
-C K7:     1.3         65
-C P4:     1.0        120
+C P5     1.0         35
+C P6     0.75        45
+C K6     1.0         30
+C K7     1.3         65
+C P4     1.0        120
  C
  C (Startup time includes some function call overheads.)
  
diff --git a/mpn/x86/core2/gmp-mparam.h b/mpn/x86/core2/gmp-mparam.h

new file mode 100644 (file)

index 0000000..feb0f28
--- /dev/null
+++ b/mpn/x86/core2/gmp-mparam.h
@@ -0,0 +1,141 @@
+/* x86/core2 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD                 4
+#define MOD_1_UNNORM_THRESHOLD               4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           19
+
+#define MUL_TOOM22_THRESHOLD                24
+#define MUL_TOOM33_THRESHOLD                93
+#define MUL_TOOM44_THRESHOLD               228
+#define MUL_TOOM6H_THRESHOLD               294
+#define MUL_TOOM8H_THRESHOLD               458
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      90
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      96
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 34
+#define SQR_TOOM3_THRESHOLD                116
+#define SQR_TOOM4_THRESHOLD                178
+#define SQR_TOOM6_THRESHOLD                262
+#define SQR_TOOM8_THRESHOLD                597
+
+#define MULMID_TOOM42_THRESHOLD             70
+
+#define MULMOD_BNM1_THRESHOLD               20
+#define SQRMOD_BNM1_THRESHOLD               19
+
+#define POWM_SEC_TABLE  6,26,262,991,2212
+
+#define MUL_FFT_MODF_THRESHOLD             690  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    690, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     15, 5}, {     31, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
+    {     47, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
+    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
+    {     51, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
+    {     79, 9}, {     47, 8}, {     95,10}, {     31, 9}, \
+    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    159,10}, {     95, 9}, {    191,11}, \
+    {     63,10}, {    127, 9}, {    255,10}, {    159,11}, \
+    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
+    {    271, 9}, {    543,10}, {    287,11}, {    159,10}, \
+    {    319, 9}, {    639,11}, {    191,10}, {    383, 9}, \
+    {    799,11}, {    223,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 70
+#define MUL_FFT_THRESHOLD                 7552
+
+#define SQR_FFT_MODF_THRESHOLD             630  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    630, 5}, {     25, 6}, {     13, 5}, {     28, 6}, \
+    {     15, 5}, {     31, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
+    {     47, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {     39, 9}, {     23, 8}, {     51, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
+    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
+    {    127,10}, {     79, 9}, {    159,10}, {     95,11}, \
+    {     63,10}, {    159,11}, {     95,10}, {    191,12}, \
+    {     63,11}, {    127,10}, {    271, 9}, {    543,11}, \
+    {    159,10}, {    319, 9}, {    671, 8}, {   1343,11}, \
+    {    191,10}, {    383, 9}, {    799,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 67
+#define SQR_FFT_THRESHOLD                 5760
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  30
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 15
+#define DC_DIVAPPR_Q_THRESHOLD              49
+#define DC_BDIV_QR_THRESHOLD                76
+#define DC_BDIV_Q_THRESHOLD                190
+
+#define INV_MULMOD_BNM1_THRESHOLD           46
+#define INV_NEWTON_THRESHOLD                35
+#define INV_APPR_THRESHOLD                  35
+
+#define BINV_NEWTON_THRESHOLD              324
+#define REDC_1_TO_REDC_N_THRESHOLD          83
+
+#define MU_DIV_QR_THRESHOLD               1442
+#define MU_DIVAPPR_Q_THRESHOLD            1099
+#define MUPI_DIV_QR_THRESHOLD                0  /* always */
+#define MU_BDIV_QR_THRESHOLD              1589
+#define MU_BDIV_Q_THRESHOLD               1718
+
+#define MATRIX22_STRASSEN_THRESHOLD         31
+#define HGCD_THRESHOLD                     118
+#define HGCD_APPR_THRESHOLD                149
+#define HGCD_REDUCE_THRESHOLD             3524
+#define GCD_DC_THRESHOLD                   351
+#define GCDEXT_DC_THRESHOLD                309
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        26
+#define SET_STR_DC_THRESHOLD               517
+#define SET_STR_PRECOMPUTE_THRESHOLD      1402
diff --git a/mpn/x86/coreinhm/gmp-mparam.h b/mpn/x86/coreinhm/gmp-mparam.h

new file mode 100644 (file)

index 0000000..21afeb6
--- /dev/null
+++ b/mpn/x86/coreinhm/gmp-mparam.h
@@ -0,0 +1,141 @@
+/* x86/coreinhm gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.5 */
+
+#define MOD_1_NORM_THRESHOLD                24
+#define MOD_1_UNNORM_THRESHOLD              15
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      5
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           16
+
+#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM33_THRESHOLD                81
+#define MUL_TOOM44_THRESHOLD               214
+#define MUL_TOOM6H_THRESHOLD               306
+#define MUL_TOOM8H_THRESHOLD               454
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     137
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     148
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     132
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     131
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 42
+#define SQR_TOOM3_THRESHOLD                149
+#define SQR_TOOM4_THRESHOLD                226
+#define SQR_TOOM6_THRESHOLD                333
+#define SQR_TOOM8_THRESHOLD                494
+
+#define MULMID_TOOM42_THRESHOLD             78
+
+#define MULMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               21
+
+#define POWM_SEC_TABLE  2,33,294,1298,2870
+
+#define MUL_FFT_MODF_THRESHOLD             606  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    606, 5}, {     28, 6}, {     15, 5}, {     33, 6}, \
+    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     36, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
+    {     47, 7}, {     29, 8}, {     15, 7}, {     37, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
+    {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
+    {     51, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
+    {     79, 9}, {     47, 8}, {     95,10}, {     31, 9}, \
+    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
+    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
+    {     95, 9}, {    191,11}, {     63,10}, {    159,11}, \
+    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271,11}, {    159,10}, \
+    {    319, 9}, {    639,10}, {    335,11}, {    191,10}, \
+    {    383, 9}, {    767,10}, {    399,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 63
+#define MUL_FFT_THRESHOLD                 6784
+
+#define SQR_FFT_MODF_THRESHOLD             505  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    505, 5}, {     28, 6}, {     15, 5}, {     33, 6}, \
+    {     17, 5}, {     35, 6}, {     29, 7}, {     15, 6}, \
+    {     33, 7}, {     17, 6}, {     36, 7}, {     19, 6}, \
+    {     39, 7}, {     23, 6}, {     47, 7}, {     29, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 7}, {     55, 8}, \
+    {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
+    {     55, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
+    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     79,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95,11}, {     63,10}, {    143, 9}, \
+    {    287,10}, {    159,11}, {     95,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,10}, {    287,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    335, 9}, {    671,10}, {    351,11}, \
+    {    191,10}, {    383, 9}, {    767,10}, {    399, 9}, \
+    {    799,10}, {    415,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 74
+#define SQR_FFT_THRESHOLD                 4800
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  35
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 21
+#define DC_DIVAPPR_Q_THRESHOLD              42
+#define DC_BDIV_QR_THRESHOLD                84
+#define DC_BDIV_Q_THRESHOLD                156
+
+#define INV_MULMOD_BNM1_THRESHOLD           54
+#define INV_NEWTON_THRESHOLD                17
+#define INV_APPR_THRESHOLD                  17
+
+#define BINV_NEWTON_THRESHOLD              348
+#define REDC_1_TO_REDC_N_THRESHOLD          83
+
+#define MU_DIV_QR_THRESHOLD                979
+#define MU_DIVAPPR_Q_THRESHOLD             501
+#define MUPI_DIV_QR_THRESHOLD                0  /* always */
+#define MU_BDIV_QR_THRESHOLD              1589
+#define MU_BDIV_Q_THRESHOLD               1787
+
+#define MATRIX22_STRASSEN_THRESHOLD         20
+#define HGCD_THRESHOLD                      57
+#define HGCD_APPR_THRESHOLD                 50
+#define HGCD_REDUCE_THRESHOLD             3524
+#define GCD_DC_THRESHOLD                   253
+#define GCDEXT_DC_THRESHOLD                233
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        20
+#define SET_STR_DC_THRESHOLD               127
+#define SET_STR_PRECOMPUTE_THRESHOLD       646
diff --git a/mpn/x86/coreisbr/gmp-mparam.h b/mpn/x86/coreisbr/gmp-mparam.h

new file mode 100644 (file)

index 0000000..16ef958
--- /dev/null
+++ b/mpn/x86/coreisbr/gmp-mparam.h
@@ -0,0 +1,140 @@
+/* x86/coreisbr gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-24, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD                24
+#define MOD_1_UNNORM_THRESHOLD              25
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      3
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           18
+
+#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM33_THRESHOLD               101
+#define MUL_TOOM44_THRESHOLD               244
+#define MUL_TOOM6H_THRESHOLD               351
+#define MUL_TOOM8H_THRESHOLD               547
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     109
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     183
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     109
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     109
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 48
+#define SQR_TOOM3_THRESHOLD                165
+#define SQR_TOOM4_THRESHOLD                276
+#define SQR_TOOM6_THRESHOLD                366
+#define SQR_TOOM8_THRESHOLD                572
+
+#define MULMID_TOOM42_THRESHOLD             98
+
+#define MULMOD_BNM1_THRESHOLD               20
+#define SQRMOD_BNM1_THRESHOLD               23
+
+#define POWM_SEC_TABLE  2,27,258,1052
+
+#define MUL_FFT_MODF_THRESHOLD             716  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    716, 5}, {     27, 6}, {     15, 5}, {     31, 6}, \
+    {     27, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
+    {     47, 7}, {     27, 8}, {     15, 7}, {     31, 6}, \
+    {     63, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     51, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
+    {     71, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     79,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    159,11}, {     95,10}, \
+    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271,11}, {    159,10}, {    319, 9}, \
+    {    639,11}, {    191,10}, {    383, 9}, {    767,11}, \
+    {    223,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 69
+#define MUL_FFT_THRESHOLD                 7552
+
+#define SQR_FFT_MODF_THRESHOLD             595  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    595, 5}, {     28, 6}, {     15, 5}, {     31, 6}, \
+    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
+    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
+    {     47, 7}, {     35, 8}, {     19, 7}, {     43, 8}, \
+    {     23, 7}, {     49, 8}, {     31, 7}, {     63, 8}, \
+    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     79,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95,11}, {     63,10}, {    159,11}, \
+    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543,11}, \
+    {    159,10}, {    319, 9}, {    671,11}, {    191,10}, \
+    {    383, 9}, {    767,10}, {    399,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 63
+#define SQR_FFT_THRESHOLD                 5760
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                 100
+#define MULLO_MUL_N_THRESHOLD            14379
+
+#define DC_DIV_QR_THRESHOLD                 22
+#define DC_DIVAPPR_Q_THRESHOLD              30
+#define DC_BDIV_QR_THRESHOLD               120
+#define DC_BDIV_Q_THRESHOLD                268
+
+#define INV_MULMOD_BNM1_THRESHOLD           54
+#define INV_NEWTON_THRESHOLD                12
+#define INV_APPR_THRESHOLD                  13
+
+#define BINV_NEWTON_THRESHOLD              410
+#define REDC_1_TO_REDC_N_THRESHOLD         100
+
+#define MU_DIV_QR_THRESHOLD               1037
+#define MU_DIVAPPR_Q_THRESHOLD             889
+#define MUPI_DIV_QR_THRESHOLD                0  /* always */
+#define MU_BDIV_QR_THRESHOLD              1858
+#define MU_BDIV_Q_THRESHOLD               2172
+
+#define MATRIX22_STRASSEN_THRESHOLD         21
+#define HGCD_THRESHOLD                      59
+#define HGCD_APPR_THRESHOLD                 56
+#define HGCD_REDUCE_THRESHOLD             4818
+#define GCD_DC_THRESHOLD                   278
+#define GCDEXT_DC_THRESHOLD                298
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                11
+#define GET_STR_PRECOMPUTE_THRESHOLD        23
+#define SET_STR_DC_THRESHOLD               438
+#define SET_STR_PRECOMPUTE_THRESHOLD      1206
diff --git a/mpn/x86/darwin.m4 b/mpn/x86/darwin.m4

index 7ef8dfc105da5778a88617b96c5c813f176a3f35..0d82f806bb757bd656838131876f3ea65bdc4b76 100644 (file)
--- a/mpn/x86/darwin.m4
+++ b/mpn/x86/darwin.m4
@@ -1,5 +1,5 @@
  divert(-1)
-dnl  Copyright 2007 Free Software Foundation, Inc.
+dnl  Copyright 2007, 2011, 2012 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -18,23 +18,54 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  define(`DARWIN')
  
+
  dnl  Usage LEA(symbol,reg)
  dnl
-dnl  FIXME: Only handles one symbol per assembly file because of the
-dnl  way EPILOGUE_cpu is handled.
+dnl  We maintain lists of stuff to append in load_eip and darwin_bd.  The
+dnl  `index' stuff is needed to suppress repeated definitions.  To avoid
+dnl  getting fooled by "var" and "var1", we add 'bol ' (the end of
+dnl  'indirect_symbol') at the beginning and and a newline at the end.  This
+dnl  might be a bit fragile.
  
-define(`LEA',`
-define(`EPILOGUE_cpu',
-`      L(movl_eip_`'substr($2,1)):
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',`
+ifelse(index(defn(`load_eip'), `$2'),-1,
+`m4append(`load_eip',
+`L(movl_eip_`'substr($2,1)):
         movl    (%esp), $2
         ret_internal
-       .section __IMPORT,__pointers,non_lazy_symbol_pointers
+')')
+ifelse(index(defn(`darwin_bd'), `bol $1
+'),-1,
+`m4append(`darwin_bd',
+`      .section __IMPORT,__pointers,non_lazy_symbol_pointers
  L($1`'$non_lazy_ptr):
         .indirect_symbol $1
         .long    0
-')
+')')
         call    L(movl_eip_`'substr($2,1))
         movl    L($1`'$non_lazy_ptr)-.($2), $2
-')
+',`
+       movl    `$'$1, $2
+')')
+
+
+dnl EPILOGUE_cpu
+
+define(`EPILOGUE_cpu',`load_eip`'darwin_bd')
+
+define(`load_eip', `')         dnl updated in LEA
+define(`darwin_bd', `')                dnl updated in LEA
+
+
+dnl  Usage: CALL(funcname)
+dnl
+
+define(`CALL',
+m4_assert_numargs(1)
+`call  GSYM_PREFIX`'$1')
+
+undefine(`PIC_WITH_EBX')
  
  divert`'dnl
diff --git a/mpn/x86/divrem_2.asm b/mpn/x86/divrem_2.asm

index 2ccaae946fa26063ea40e9336a7772ab82bd39f7..1581add400e38c7405c8497d15e1ab47fa2ba0c6 100644 (file)
--- a/mpn/x86/divrem_2.asm
+++ b/mpn/x86/divrem_2.asm
@@ -81,7 +81,7 @@ PROLOGUE(mpn_divrem_2)
         seta    %dl
         cmp     20(%esp), %ebp
         setae   %al
-       orb     %dl, %al
+       orb     %dl, %al                C "orb" form to placate Sun tools
         jne     L(35)
  L(8):
         mov     60(%esp), %esi          C fn
@@ -174,7 +174,7 @@ L(9):       mov     64(%esp), %esi          C up
  L(fix):        seta    %dl
         cmp     20(%esp), %ebp
         setae   %al
-       orb     %dl, %al
+       orb     %dl, %al                C "orb" form to placate Sun tools
         je      L(bck)
         inc     %edi
         sub     20(%esp), %ebp
diff --git a/mpn/x86/fat/com.c b/mpn/x86/fat/com.c

new file mode 100644 (file)

index 0000000..8462dd8
--- /dev/null
+++ b/mpn/x86/fat/com.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_com.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/com.c"
diff --git a/mpn/x86/fat/diveby3.c b/mpn/x86/fat/diveby3.c

deleted file mode 100644 (file)

index 7ea0161..0000000
--- a/mpn/x86/fat/diveby3.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Fat binary fallback mpn_divexact_by3c.
-
-Copyright 2003, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/diveby3.c"
diff --git a/mpn/x86/fat/fat.c b/mpn/x86/fat/fat.c

index 8349afcb388e5bd5d2249f4732d75b334380cb46..7ae29dc6a8630be4b7a7b1729134eb0f47055a35 100644 (file)
--- a/mpn/x86/fat/fat.c
+++ b/mpn/x86/fat/fat.c
@@ -4,7 +4,7 @@
     THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
     COMPLETELY IN FUTURE GNU MP RELEASES.
  
-Copyright 2003, 2004, 2011 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -36,8 +36,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* fat_entry.asm */
-long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id));
-int  __gmpn_cpuid_available __GMP_PROTO ((void));
+long __gmpn_cpuid (char [12], int);
+int  __gmpn_cpuid_available (void);
  
  
  #if WANT_FAKE_CPUID
@@ -64,17 +64,28 @@ static struct {
    { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
    { "pentium2",   "GenuineIntel", MAKE_FMS (6, 2) },
    { "pentium3",   "GenuineIntel", MAKE_FMS (6, 7) },
-  { "pentium4",   "GenuineIntel", MAKE_FMS (7, 0) },
+  { "pentium4",   "GenuineIntel", MAKE_FMS (15, 2) },
+  { "prescott",   "GenuineIntel", MAKE_FMS (15, 3) },
+  { "nocona",     "GenuineIntel", MAKE_FMS (15, 4) },
+  { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
+  { "coreinhm",   "GenuineIntel", MAKE_FMS (6, 0x1a) },
+  { "coreiwsm",   "GenuineIntel", MAKE_FMS (6, 0x25) },
+  { "coreisbr",   "GenuineIntel", MAKE_FMS (6, 0x2a) },
+  { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
  
    { "k5",         "AuthenticAMD", MAKE_FMS (5, 0) },
    { "k6",         "AuthenticAMD", MAKE_FMS (5, 3) },
    { "k62",        "AuthenticAMD", MAKE_FMS (5, 8) },
    { "k63",        "AuthenticAMD", MAKE_FMS (5, 9) },
    { "athlon",     "AuthenticAMD", MAKE_FMS (6, 0) },
-  { "x86_64",     "AuthenticAMD", MAKE_FMS (15, 0) },
+  { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
+  { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
+  { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
+  { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
  
    { "viac3",      "CentaurHauls", MAKE_FMS (6, 0) },
    { "viac32",     "CentaurHauls", MAKE_FMS (6, 9) },
+  { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
  };
  
  static int
@@ -128,28 +139,44 @@ typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
  
  struct cpuvec_t __gmpn_cpuvec = {
    __MPN(add_n_init),
+  0,
+  0,
    __MPN(addmul_1_init),
+  0,
+  __MPN(bdiv_dbm1c_init),
+  __MPN(com_init),
    __MPN(copyd_init),
    __MPN(copyi_init),
    __MPN(divexact_1_init),
-  __MPN(divexact_by3c_init),
    __MPN(divrem_1_init),
    __MPN(gcd_1_init),
    __MPN(lshift_init),
+  __MPN(lshiftc_init),
    __MPN(mod_1_init),
+  __MPN(mod_1_1p_init),
+  __MPN(mod_1_1p_cps_init),
+  __MPN(mod_1s_2p_init),
+  __MPN(mod_1s_2p_cps_init),
+  __MPN(mod_1s_4p_init),
+  __MPN(mod_1s_4p_cps_init),
    __MPN(mod_34lsub1_init),
    __MPN(modexact_1c_odd_init),
    __MPN(mul_1_init),
    __MPN(mul_basecase_init),
+  __MPN(mullo_basecase_init),
    __MPN(preinv_divrem_1_init),
    __MPN(preinv_mod_1_init),
+  __MPN(redc_1_init),
+  __MPN(redc_2_init),
    __MPN(rshift_init),
    __MPN(sqr_basecase_init),
    __MPN(sub_n_init),
+  0,
    __MPN(submul_1_init),
    0
  };
  
+int __gmpn_cpuvec_initialized = 0;
  
  /* The following setups start with generic x86, then overwrite with
     specifics for a chip, and higher versions of that chip.
@@ -219,21 +246,107 @@ __gmpn_cpuvec_init (void)
              case 6:
                TRACE (printf ("  p6\n"));
                CPUVEC_SETUP_p6;
-              if (model >= 2)
-                {
-                  TRACE (printf ("  pentium2\n"));
+             switch (model)
+               {
+               case 0x00:
+               case 0x01:
+                 TRACE (printf ("  pentiumpro\n"));
+                 break;
+
+               case 0x02:
+               case 0x03:
+               case 0x04:
+               case 0x05:
+               case 0x06:
+                 TRACE (printf ("  pentium2\n"));
+                  CPUVEC_SETUP_p6_mmx;
+                 break;
+
+               case 0x07:
+               case 0x08:
+               case 0x0a:
+               case 0x0b:
+               case 0x0c:
+                 TRACE (printf ("  pentium3\n"));
+                  CPUVEC_SETUP_p6_mmx;
+                  CPUVEC_SETUP_p6_p3mmx;
+                 break;
+
+               case 0x09:              /* Banias */
+               case 0x0d:              /* Dothan */
+               case 0x0e:              /* Yonah */
+                 TRACE (printf ("  Banias/Bothan/Yonah\n"));
                    CPUVEC_SETUP_p6_mmx;
-                }
-              if (model >= 7)
-                {
-                  TRACE (printf ("  pentium3\n"));
                    CPUVEC_SETUP_p6_p3mmx;
-                }
-              if (model >= 0xD || model == 9)
-                {
-                  TRACE (printf ("  p6 with sse2\n"));
                    CPUVEC_SETUP_p6_sse2;
-                }
+                 break;
+
+               case 0x0f:              /* Conroe Merom Kentsfield Allendale */
+               case 0x10:
+               case 0x11:
+               case 0x12:
+               case 0x13:
+               case 0x14:
+               case 0x15:
+               case 0x16:
+               case 0x17:              /* PNR Wolfdale Yorkfield */
+               case 0x18:
+               case 0x19:
+               case 0x1d:              /* PNR Dunnington */
+                 TRACE (printf ("  Conroe\n"));
+                  CPUVEC_SETUP_p6_mmx;
+                  CPUVEC_SETUP_p6_p3mmx;
+                  CPUVEC_SETUP_p6_sse2;
+                 CPUVEC_SETUP_core2;
+                 break;
+
+               case 0x1c:              /* Atom Silverthorne */
+               case 0x26:              /* Atom Lincroft */
+               case 0x27:              /* Atom Saltwell */
+               case 0x36:              /* Atom Cedarview/Saltwell */
+                 TRACE (printf ("  atom\n"));
+                 CPUVEC_SETUP_atom;
+                 CPUVEC_SETUP_atom_mmx;
+                 CPUVEC_SETUP_atom_sse2;
+                 break;
+
+               case 0x1a:              /* NHM Gainestown */
+               case 0x1b:
+               case 0x1e:              /* NHM Lynnfield/Jasper */
+               case 0x1f:
+               case 0x20:
+               case 0x21:
+               case 0x22:
+               case 0x23:
+               case 0x24:
+               case 0x25:              /* WSM Clarkdale/Arrandale */
+               case 0x28:
+               case 0x29:
+               case 0x2b:
+               case 0x2c:              /* WSM Gulftown */
+               case 0x2e:              /* NHM Beckton */
+               case 0x2f:              /* WSM Eagleton */
+                 TRACE (printf ("  nehalem/westmere\n"));
+                  CPUVEC_SETUP_p6_mmx;
+                  CPUVEC_SETUP_p6_p3mmx;
+                  CPUVEC_SETUP_p6_sse2;
+                 CPUVEC_SETUP_core2;
+                 CPUVEC_SETUP_coreinhm;
+                 break;
+
+               case 0x2a:              /* SBR */
+               case 0x2d:              /* SBR-EP */
+               case 0x3a:              /* IBR */
+               case 0x3c:              /* Haswell */
+                 TRACE (printf ("  sandybridge\n"));
+                  CPUVEC_SETUP_p6_mmx;
+                  CPUVEC_SETUP_p6_p3mmx;
+                  CPUVEC_SETUP_p6_sse2;
+                 CPUVEC_SETUP_core2;
+                 CPUVEC_SETUP_coreinhm;
+                 CPUVEC_SETUP_coreisbr;
+                 break;
+               }
                break;
  
              case 15:
@@ -271,13 +384,40 @@ __gmpn_cpuvec_init (void)
                break;
              case 6:
                TRACE (printf ("  athlon\n"));
-            athlon:
                CPUVEC_SETUP_k7;
                CPUVEC_SETUP_k7_mmx;
                break;
-            case 15:
-              TRACE (printf ("  x86_64\n"));
-              goto athlon;
+
+            case 0x0f:         /* k8 */
+            case 0x11:         /* "fam 11h", mix of k8 and k10 */
+            case 0x13:         /* unknown, conservativeky assume k8  */
+            case 0x16:         /* unknown, conservativeky assume k8  */
+            case 0x17:         /* unknown, conservativeky assume k8  */
+              TRACE (printf ("  k8\n"));
+              CPUVEC_SETUP_k7;
+              CPUVEC_SETUP_k7_mmx;
+              CPUVEC_SETUP_k8;
+             break;
+
+            case 0x10:         /* k10 */
+            case 0x12:         /* k10 (llano) */
+              TRACE (printf ("  k10\n"));
+              CPUVEC_SETUP_k7;
+              CPUVEC_SETUP_k7_mmx;
+             break;
+
+            case 0x14:         /* bobcat */
+              TRACE (printf ("  bobcat\n"));
+              CPUVEC_SETUP_k7;
+              CPUVEC_SETUP_k7_mmx;
+              CPUVEC_SETUP_bobcat;
+             break;
+
+            case 0x15:         /* bulldozer */
+              TRACE (printf ("  bulldozer\n"));
+              CPUVEC_SETUP_k7;
+              CPUVEC_SETUP_k7_mmx;
+             break;
              }
          }
        else if (strcmp (vendor_string, "CentaurHauls") == 0)
@@ -290,6 +430,11 @@ __gmpn_cpuvec_init (void)
                  {
                    TRACE (printf ("  viac32\n"));
                  }
+             if (model >= 15)
+               {
+                  TRACE (printf ("  nano\n"));
+                 CPUVEC_SETUP_nano;
+               }
                break;
              }
          }
@@ -313,5 +458,5 @@ __gmpn_cpuvec_init (void)
  
    /* Set this once the threshold fields are ready.
       Use volatile to prevent it getting moved.  */
-  ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;
+  *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
  }
diff --git a/mpn/x86/fat/fat_entry.asm b/mpn/x86/fat/fat_entry.asm

index bd46e4e8bdfdf9dbc8b7118c702c1816ca291e8c..f9b88cf79131eccdbbe21f848557b647bcb3e576 100644 (file)
--- a/mpn/x86/fat/fat_entry.asm
+++ b/mpn/x86/fat/fat_entry.asm
@@ -1,6 +1,6 @@
  dnl  x86 fat binary entrypoints.
  
-dnl  Copyright 2003 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2012 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -118,7 +118,7 @@ EPILOGUE()
  L(fat_init):
         C al    __gmpn_cpuvec byte offset
  
-       movsbl  %al, %eax
+       movzbl  %al, %eax
         pushl   %eax
  
  ifdef(`PIC',`
diff --git a/mpn/x86/fat/gmp-mparam.h b/mpn/x86/fat/gmp-mparam.h

index 45680ede453ab11cc9fcaf0752364ac34e7cf951..9043bf9da765e9602c9f20c217f75aa5dd56ce30 100644 (file)
--- a/mpn/x86/fat/gmp-mparam.h
+++ b/mpn/x86/fat/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* Fat binary x86 gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2011 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -34,6 +34,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     preinv.  */
  #define USE_PREINV_DIVREM_1   1
  
+#define BMOD_1_TO_MOD_1_THRESHOLD           20
+
  /* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need
     for mpn_sqr to call the latter.  */
  #define SQR_BASECASE_THRESHOLD 0
diff --git a/mpn/x86/fat/lshiftc.c b/mpn/x86/fat/lshiftc.c

new file mode 100644 (file)

index 0000000..e6e2a3b
--- /dev/null
+++ b/mpn/x86/fat/lshiftc.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_lshiftc.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/lshiftc.c"
diff --git a/mpn/x86/fat/mod_1_1.c b/mpn/x86/fat/mod_1_1.c

new file mode 100644 (file)

index 0000000..ffe1481
--- /dev/null
+++ b/mpn/x86/fat/mod_1_1.c
@@ -0,0 +1,25 @@
+/* Fat binary fallback mpn_mod_1_1p.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/*
+PROLOGUE(mpn_mod_1_1p_cps)
+*/
+
+#define OPERATION_mod_1_1_cps 1
+#include "mpn/generic/mod_1_1.c"
diff --git a/mpn/x86/fat/mod_1_2.c b/mpn/x86/fat/mod_1_2.c

new file mode 100644 (file)

index 0000000..6bcbacc
--- /dev/null
+++ b/mpn/x86/fat/mod_1_2.c
@@ -0,0 +1,25 @@
+/* Fat binary fallback mpn_mod_1s_2p.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/*
+PROLOGUE(mpn_mod_1s_2p_cps)
+*/
+
+#define OPERATION_mod_1_2_cps 1
+#include "mpn/generic/mod_1_2.c"
diff --git a/mpn/x86/fat/mod_1_4.c b/mpn/x86/fat/mod_1_4.c

new file mode 100644 (file)

index 0000000..1f6a8c9
--- /dev/null
+++ b/mpn/x86/fat/mod_1_4.c
@@ -0,0 +1,25 @@
+/* Fat binary fallback mpn_mod_1s_4p.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/*
+PROLOGUE(mpn_mod_1s_4p_cps)
+*/
+
+#define OPERATION_mod_1_4_cps 1
+#include "mpn/generic/mod_1_4.c"
diff --git a/mpn/x86/fat/mullo_basecase.c b/mpn/x86/fat/mullo_basecase.c

new file mode 100644 (file)

index 0000000..7720176
--- /dev/null
+++ b/mpn/x86/fat/mullo_basecase.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_mullo_basecase.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/mullo_basecase.c"
diff --git a/mpn/x86/fat/redc_1.c b/mpn/x86/fat/redc_1.c

new file mode 100644 (file)

index 0000000..a786115
--- /dev/null
+++ b/mpn/x86/fat/redc_1.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_redc_1.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/redc_1.c"
diff --git a/mpn/x86/fat/redc_2.c b/mpn/x86/fat/redc_2.c

new file mode 100644 (file)

index 0000000..f29d658
--- /dev/null
+++ b/mpn/x86/fat/redc_2.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_redc_2.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/redc_2.c"
diff --git a/mpn/x86/geode/gmp-mparam.h b/mpn/x86/geode/gmp-mparam.h

new file mode 100644 (file)

index 0000000..9d9854b
--- /dev/null
+++ b/mpn/x86/geode/gmp-mparam.h
@@ -0,0 +1,131 @@
+/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2011 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-01-30, gcc 3.4 */
+
+#define MOD_1_NORM_THRESHOLD                 6
+#define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         17
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
+#define USE_PREINV_DIVREM_1                  0
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           42
+
+#define MUL_TOOM22_THRESHOLD                18
+#define MUL_TOOM33_THRESHOLD                66
+#define MUL_TOOM44_THRESHOLD               105
+#define MUL_TOOM6H_THRESHOLD               141
+#define MUL_TOOM8H_THRESHOLD               212
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      62
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      65
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      67
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 33
+#define SQR_TOOM3_THRESHOLD                 60
+#define SQR_TOOM4_THRESHOLD                136
+#define SQR_TOOM6_THRESHOLD                196
+#define SQR_TOOM8_THRESHOLD                292
+
+#define MULMOD_BNM1_THRESHOLD               14
+#define SQRMOD_BNM1_THRESHOLD               16
+
+#define MUL_FFT_MODF_THRESHOLD             468  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    468, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
+    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
+    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    159,10}, {     95, 9}, {    191,11}, \
+    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
+    {    287,10}, {    159,11}, {     95,10}, {    191, 9}, \
+    {    383,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 61
+#define MUL_FFT_THRESHOLD                 5504
+
+#define SQR_FFT_MODF_THRESHOLD             396  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    396, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
+    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
+    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
+    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255, 9}, {    135,10}, {     79, 9}, {    159, 8}, \
+    {    319,10}, {     95, 9}, {    191,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    511,10}, {    143, 9}, \
+    {    287, 8}, {    575,10}, {    159,11}, {     95,10}, \
+    {    191,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 61
+#define SQR_FFT_THRESHOLD                 3712
+
+#define MULLO_BASECASE_THRESHOLD             3
+#define MULLO_DC_THRESHOLD                  37
+#define MULLO_MUL_N_THRESHOLD            10950
+
+#define DC_DIV_QR_THRESHOLD                 59
+#define DC_DIVAPPR_Q_THRESHOLD             189
+#define DC_BDIV_QR_THRESHOLD                55
+#define DC_BDIV_Q_THRESHOLD                136
+
+#define INV_MULMOD_BNM1_THRESHOLD           50
+#define INV_NEWTON_THRESHOLD               183
+#define INV_APPR_THRESHOLD                 181
+
+#define BINV_NEWTON_THRESHOLD              204
+#define REDC_1_TO_REDC_N_THRESHOLD          54
+
+#define MU_DIV_QR_THRESHOLD               1142
+#define MU_DIVAPPR_Q_THRESHOLD            1142
+#define MUPI_DIV_QR_THRESHOLD               81
+#define MU_BDIV_QR_THRESHOLD               889
+#define MU_BDIV_Q_THRESHOLD                998
+
+#define MATRIX22_STRASSEN_THRESHOLD         13
+#define HGCD_THRESHOLD                     133
+#define GCD_DC_THRESHOLD                   451
+#define GCDEXT_DC_THRESHOLD                318
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                15
+#define GET_STR_PRECOMPUTE_THRESHOLD        30
+#define SET_STR_DC_THRESHOLD               547
+#define SET_STR_PRECOMPUTE_THRESHOLD      1049
diff --git a/mpn/x86/invert_limb.asm b/mpn/x86/invert_limb.asm

deleted file mode 100644 (file)

index ff77128..0000000
--- a/mpn/x86/invert_limb.asm
+++ /dev/null
@@ -1,169 +0,0 @@
-dnl  x86 mpn_invert_limb
-
-dnl  Contributed to the GNU project by Niels Möller
-
-dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
-dnl
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles (approx)    div
-C K7:           46             53
-
-C Register usage:
-C   Input D in %edi
-C   Current approximation is in %eax and/or %ecx
-C   %ebx and %edx are temporaries
-C   %esi and %ebp are unused
-
-defframe(PARAM_DIVISOR,4)
-
-ASM_START()
-
-C Make approx_tab global to work around Apple relocation bug.
-ifdef(`DARWIN',`
-       define(`approx_tab', MPN(invert_limb_tab))
-       GLOBL   approx_tab')
-
-       TEXT
-       ALIGN(16)
-PROLOGUE(mpn_invert_limb)
-deflit(`FRAME', 0)
-       C Adding the unnecessary push of %ebp and the corresponding pop seems
-       C to *reduce* running time from 46 to 43 cycles on K7.  Don't know if
-       C this is a benchmark artefact or some alignment issue.
-
-       push    %ebx    FRAME_pushl()
-       C push  %ebp    FRAME_pushl()
-       push    %edi    FRAME_pushl()
-
-       mov     PARAM_DIVISOR, %edi
-       mov     %edi, %eax
-       shr     $22, %eax
-ifdef(`PIC',`
-       LEA(    approx_tab, %ebx)
-       movzwl  -1024(%ebx, %eax, 2), %eax
-',`
-       movzwl  -1024+approx_tab`'(%eax, %eax), %eax    C %eax = v0
-')
-
-       C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1
-       mov     %eax, %ecx
-       imul    %eax, %eax
-       mov     %edi, %ebx
-       shr     $11, %ebx
-       inc     %ebx
-       mul     %ebx
-       mov     %edi, %ebx                              C Prepare
-       shr     %ebx
-       sbb     %eax, %eax
-       sub     %eax, %ebx                              C %ebx = d_31, %eax = mask
-       shl     $4, %ecx
-       dec     %ecx
-       sub     %edx, %ecx                              C %ecx = v1
-
-       C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
-       imul    %ecx, %ebx
-       and     %ecx, %eax
-       shr     %eax
-       sub     %ebx, %eax
-       mul     %ecx
-       mov     %edi, %eax                              C Prepare for next mul
-       shl     $15, %ecx
-       shr     %edx
-       add     %edx, %ecx                              C %ecx = v2
-
-       mul     %ecx
-       add     %edi, %eax
-       mov     %ecx, %eax
-       adc     %edi, %edx
-       sub     %edx, %eax                              C %eax = v3
-
-       pop     %edi
-       C pop   %ebp
-       pop     %ebx
-
-       ret
-
-EPILOGUE()
-
-DEF_OBJECT(approx_tab,2)
-       .value  0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
-       .value  0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
-       .value  0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
-       .value  0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
-       .value  0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
-       .value  0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
-       .value  0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
-       .value  0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
-       .value  0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
-       .value  0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
-       .value  0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
-       .value  0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
-       .value  0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
-       .value  0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
-       .value  0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
-       .value  0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
-       .value  0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
-       .value  0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
-       .value  0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
-       .value  0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
-       .value  0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
-       .value  0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
-       .value  0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
-       .value  0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
-       .value  0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
-       .value  0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
-       .value  0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
-       .value  0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
-       .value  0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
-       .value  0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
-       .value  0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
-       .value  0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
-       .value  0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
-       .value  0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
-       .value  0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
-       .value  0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
-       .value  0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
-       .value  0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
-       .value  0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
-       .value  0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
-       .value  0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
-       .value  0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
-       .value  0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
-       .value  0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
-       .value  0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
-       .value  0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
-       .value  0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
-       .value  0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
-       .value  0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
-       .value  0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
-       .value  0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
-       .value  0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
-       .value  0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
-       .value  0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
-       .value  0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
-       .value  0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
-       .value  0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
-       .value  0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
-       .value  0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
-       .value  0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
-       .value  0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
-       .value  0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
-       .value  0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
-       .value  0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
-END_OBJECT(approx_tab)
diff --git a/mpn/x86/k10/gmp-mparam.h b/mpn/x86/k10/gmp-mparam.h

new file mode 100644 (file)

index 0000000..0ab12b8
--- /dev/null
+++ b/mpn/x86/k10/gmp-mparam.h
@@ -0,0 +1,146 @@
+/* x86/k10 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         12
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         6
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        13
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           31
+
+#define MUL_TOOM22_THRESHOLD                26
+#define MUL_TOOM33_THRESHOLD                85
+#define MUL_TOOM44_THRESHOLD               130
+#define MUL_TOOM6H_THRESHOLD               206
+#define MUL_TOOM8H_THRESHOLD               309
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      80
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      90
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     112
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 40
+#define SQR_TOOM3_THRESHOLD                 81
+#define SQR_TOOM4_THRESHOLD                178
+#define SQR_TOOM6_THRESHOLD                266
+#define SQR_TOOM8_THRESHOLD                357
+
+#define MULMID_TOOM42_THRESHOLD             54
+
+#define MULMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define MUL_FFT_MODF_THRESHOLD             606  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    786, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     33, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     23, 6}, {     47, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 6}, {     63, 7}, {     35, 8}, \
+    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {     39, 9}, {     23, 8}, {     51, 9}, {     31, 8}, \
+    {     63, 9}, {     39, 8}, {     83, 9}, {     47,10}, \
+    {     31, 9}, {     63, 8}, {    127, 9}, {     79,10}, \
+    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    127, 9}, \
+    {    255, 7}, {   1023, 8}, {    543, 9}, {    279,10}, \
+    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543, 8}, {   1087,10}, {    287,11}, {    159, 9}, \
+    {    671,11}, {    191,10}, {    399, 9}, {    799,12}, \
+    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 76
+#define MUL_FFT_THRESHOLD                 6784
+
+#define SQR_FFT_MODF_THRESHOLD             505  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    660, 5}, {     25, 6}, {     13, 5}, {     28, 6}, \
+    {     25, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
+    {     31, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     23, 6}, {     47, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     47, 8}, {     31, 7}, {     63, 8}, \
+    {     35, 7}, {     71, 8}, {     39, 9}, {     23, 8}, \
+    {     55,10}, {     15, 9}, {     31, 8}, {     63, 9}, \
+    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
+    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    167,10}, {     95,11}, {     63,10}, \
+    {    159,11}, {     95, 8}, {    799,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    543,11}, {    159, 9}, \
+    {    639,10}, {    367,11}, {    191,10}, {    383, 9}, \
+    {    799,10}, {    415,11}, {    223,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 67
+#define SQR_FFT_THRESHOLD                 4736
+
+#define MULLO_BASECASE_THRESHOLD             7
+#define MULLO_DC_THRESHOLD                  42
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 56
+#define DC_DIVAPPR_Q_THRESHOLD             270
+#define DC_BDIV_QR_THRESHOLD                55
+#define DC_BDIV_Q_THRESHOLD                182
+
+#define INV_MULMOD_BNM1_THRESHOLD           62
+#define INV_NEWTON_THRESHOLD               260
+#define INV_APPR_THRESHOLD                 270
+
+#define BINV_NEWTON_THRESHOLD              276
+#define REDC_1_TO_REDC_N_THRESHOLD          71
+
+#define MU_DIV_QR_THRESHOLD               1652
+#define MU_DIVAPPR_Q_THRESHOLD            1652
+#define MUPI_DIV_QR_THRESHOLD              130
+#define MU_BDIV_QR_THRESHOLD              1499
+#define MU_BDIV_Q_THRESHOLD               1528
+
+#define POWM_SEC_TABLE  4,23,228,947
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     132
+#define HGCD_APPR_THRESHOLD                180
+#define HGCD_REDUCE_THRESHOLD             3134
+#define GCD_DC_THRESHOLD                   630
+#define GCDEXT_DC_THRESHOLD                432
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        23
+#define SET_STR_DC_THRESHOLD               208
+#define SET_STR_PRECOMPUTE_THRESHOLD      1254
+
+#define FAC_DSC_THRESHOLD                  208
+#define FAC_ODD_THRESHOLD                   29
diff --git a/mpn/x86/k6/aorsmul_1.asm b/mpn/x86/k6/aorsmul_1.asm

index 5f1cd9cd1a1c6638a8bc1884a2ca15f887aaa869..2389368fbdad08bf97a914ec15b8fc76a1149e70 100644 (file)
--- a/mpn/x86/k6/aorsmul_1.asm
+++ b/mpn/x86/k6/aorsmul_1.asm
@@ -21,19 +21,19 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C                           cycles/limb
-C P5:
-C P6 model 0-8,10-12)            5.94
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)           5.57
+C                          cycles/limb
+C P5
+C P6 model 0-8,10-12            5.94
+C P6 model 9  (Banias)          5.51
+C P6 model 13 (Dothan)          5.57
  C P4 model 0  (Willamette)
  C P4 model 1  (?)
  C P4 model 2  (Northwood)
  C P4 model 3  (Prescott)
  C P4 model 4  (Nocona)
-C K6:                           7.65-8.5 (data dependent)
-C K7:
-C K8:
+C AMD K6                       7.65-8.5 (data dependent)
+C AMD K7
+C AMD K8
  
  
  dnl  K6:           large multipliers  small multipliers
diff --git a/mpn/x86/k6/gmp-mparam.h b/mpn/x86/k6/gmp-mparam.h

index 168ea065e97991108ef25df5b1f3401675228684..97854d1a055cced254bc4173c97a8c9fc9eee0a4 100644 (file)
--- a/mpn/x86/k6/gmp-mparam.h
+++ b/mpn/x86/k6/gmp-mparam.h
@@ -26,11 +26,11 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                12
  #define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         28
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         18
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     82
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         41
+#define MOD_1U_TO_MOD_1_1_THRESHOLD         32
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         3
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD    128
  #define USE_PREINV_DIVREM_1                  0
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
  #define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
diff --git a/mpn/x86/k6/mul_1.asm b/mpn/x86/k6/mul_1.asm

index e1c468fe3479f11004a12d0d9e59c38c89c4a3a1..26cfe4022db3a7715310b243696a152654237875 100644 (file)
--- a/mpn/x86/k6/mul_1.asm
+++ b/mpn/x86/k6/mul_1.asm
@@ -20,19 +20,19 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C                           cycles/limb
-C P5:
-C P6 model 0-8,10-12)            5.5
+C                          cycles/limb
+C P5
+C P6 model 0-8,10-12            5.5
  C P6 model 9  (Banias)
-C P6 model 13 (Dothan)           4.87
+C P6 model 13 (Dothan)          4.87
  C P4 model 0  (Willamette)
  C P4 model 1  (?)
  C P4 model 2  (Northwood)
  C P4 model 3  (Prescott)
  C P4 model 4  (Nocona)
-C K6:                            6.25
-C K7:
-C K8:
+C AMD K6                        6.25
+C AMD K7
+C AMD K8
  
  
  C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/k7/addlsh1_n.asm b/mpn/x86/k7/addlsh1_n.asm

new file mode 100644 (file)

index 0000000..e24f7a9
--- /dev/null
+++ b/mpn/x86/k7/addlsh1_n.asm
@@ -0,0 +1,185 @@
+dnl  AMD K7 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This is an attempt at an addlsh1_n for x86-32, not relying on sse2 insns.
+C The innerloop is 2*3-way unrolled, which is best we can do with the available
+C registers.  It seems tricky to use the same structure for rsblsh1_n, since we
+C cannot feed carry between operations there.
+
+C                          cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)          5.4    (worse than add_n + lshift)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    6
+C AMD K6                        ?
+C AMD K7                        2.5
+C AMD K8
+
+C This is a basic addlsh1_n for k7, atom, and perhaps some other x86-32
+C processors.  It uses 2*3-way unrolling, for good reasons.  Unfortunately,
+C that means we need an initial magic multiply.
+C
+C It is not clear how to do sublsh1_n or rsblsh1_n using the same pattern.  We
+C cannot do rsblsh1_n since we feed carry from the shift blocks to the
+C add/subtract blocks, which is right for addition but reversed for
+C subtraction.  We could perhaps do sublsh1_n, with some extra move insns,
+C without losing any time, since we're not issue limited but carry recurrency
+C latency.
+C
+C Breaking carry recurrency might be a good idea.  We would then need separate
+C registers for the shift carry and add/subtract carry, which in turn would
+C force is to 2*2-way unrolling.
+
+defframe(PARAM_SIZE,   16)
+defframe(PARAM_DBLD,   12)
+defframe(PARAM_SRC,     8)
+defframe(PARAM_DST,     4)
+
+dnl  re-use parameter space
+define(VAR_COUNT,`PARAM_DST')
+define(VAR_TMP,`PARAM_DBLD')
+
+ASM_START()
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_addlsh1_n)
+deflit(`FRAME',0)
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebp')
+
+       mov     $0x2aaaaaab, %eax
+
+       push    %ebx                    FRAME_pushl()
+       mov     PARAM_SIZE, %ebx        C size
+
+       push    rp                      FRAME_pushl()
+       mov     PARAM_DST, rp
+
+       mul     %ebx
+
+       push    up                      FRAME_pushl()
+       mov     PARAM_SRC, up
+
+       not     %edx                    C count = -(size\8)-1
+       mov     %edx, VAR_COUNT
+
+       push    vp                      FRAME_pushl()
+       mov     PARAM_DBLD, vp
+
+       lea     3(%edx,%edx,2), %ecx    C count*3+3 = -(size\6)*3
+       xor     %edx, %edx
+       lea     (%ebx,%ecx,2), %ebx     C size + (count*3+3)*2 = size % 6
+       or      %ebx, %ebx
+       jz      L(exact)
+
+L(oop):
+ifdef(`CPU_P6',`
+       shr     %edx ')                 C restore 2nd saved carry bit
+       mov     (vp), %eax
+       adc     %eax, %eax
+       rcr     %edx                    C restore 1st saved carry bit
+       lea     4(vp), vp
+       adc     (up), %eax
+       lea     4(up), up
+       adc     %edx, %edx              C save a carry bit in edx
+ifdef(`CPU_P6',`
+       adc     %edx, %edx ')           C save another carry bit in edx
+       dec     %ebx
+       mov     %eax, (rp)
+       lea     4(rp), rp
+       jnz     L(oop)
+       mov     vp, VAR_TMP
+L(exact):
+       incl    VAR_COUNT
+       jz      L(end)
+
+       ALIGN(16)
+L(top):
+ifdef(`CPU_P6',`
+       shr     %edx ')                 C restore 2nd saved carry bit
+       mov     (vp), %eax
+       adc     %eax, %eax
+       mov     4(vp), %ebx
+       adc     %ebx, %ebx
+       mov     8(vp), %ecx
+       adc     %ecx, %ecx
+
+       rcr     %edx                    C restore 1st saved carry bit
+
+       adc     (up), %eax
+       mov     %eax, (rp)
+       adc     4(up), %ebx
+       mov     %ebx, 4(rp)
+       adc     8(up), %ecx
+       mov     %ecx, 8(rp)
+
+       mov     12(vp), %eax
+       adc     %eax, %eax
+       mov     16(vp), %ebx
+       adc     %ebx, %ebx
+       mov     20(vp), %ecx
+       adc     %ecx, %ecx
+
+       lea     24(vp), vp
+       adc     %edx, %edx              C save a carry bit in edx
+
+       adc     12(up), %eax
+       mov     %eax, 12(rp)
+       adc     16(up), %ebx
+       mov     %ebx, 16(rp)
+       adc     20(up), %ecx
+
+       lea     24(up), up
+
+ifdef(`CPU_P6',`
+       adc     %edx, %edx ')           C save another carry bit in edx
+       mov     %ecx, 20(rp)
+       incl    VAR_COUNT
+       lea     24(rp), rp
+       jne     L(top)
+
+L(end):
+       pop     vp                      FRAME_popl()
+       pop     up                      FRAME_popl()
+
+ifdef(`CPU_P6',`
+       xor     %eax, %eax
+       shr     $1, %edx
+       adc     %edx, %eax
+',`
+       adc     $0, %edx
+       mov     %edx, %eax
+')
+       pop     rp                      FRAME_popl()
+       pop     %ebx                    FRAME_popl()
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/k7/aorsmul_1.asm b/mpn/x86/k7/aorsmul_1.asm

index b247c291318d80d2a09fe154a9c9898cca490dcd..a1a0e3e9521209694a1969273015607ea638b1bb 100644 (file)
--- a/mpn/x86/k7/aorsmul_1.asm
+++ b/mpn/x86/k7/aorsmul_1.asm
@@ -21,19 +21,19 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C                           cycles/limb
-C P5:
-C P6 model 0-8,10-12)
-C P6 model 9  (Banias)
+C                          cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)          6.5
  C P6 model 13 (Dothan)
  C P4 model 0  (Willamette)
  C P4 model 1  (?)
  C P4 model 2  (Northwood)
  C P4 model 3  (Prescott)
  C P4 model 4  (Nocona)
-C K6:
-C K7:                            3.75
-C K8:
+C AMD K6
+C AMD K7                        3.75
+C AMD K8
  
  C TODO
  C  * Improve feed-in and wind-down code.  We beat the old code for all n != 1,
diff --git a/mpn/x86/k7/bdiv_q_1.asm b/mpn/x86/k7/bdiv_q_1.asm

new file mode 100644 (file)

index 0000000..7c7f2c3
--- /dev/null
+++ b/mpn/x86/k7/bdiv_q_1.asm
@@ -0,0 +1,233 @@
+dnl  AMD K7 mpn_bdiv_q_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2004, 2007, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  Rearranged from mpn/x86/k7/dive_1.asm by Marco Bodrato.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C          cycles/limb
+C Athlon:     11.0
+C Hammer:      9.0
+
+
+C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                      mp_limb_t divisor);
+C
+C The dependent chain is mul+imul+sub for 11 cycles and that speed is
+C achieved with no special effort.  The load and shrld latencies are hidden
+C by out of order execution.
+C
+C It's a touch faster on size==1 to use the mul-by-inverse than divl.
+
+defframe(PARAM_SHIFT,  24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+defframe(SAVE_EBX,     -4)
+defframe(SAVE_ESI,     -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+defframe(VAR_INVERSE, -20)
+defframe(VAR_DST_END, -24)
+
+deflit(STACK_SPACE, 24)
+
+       TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                  mp_limb_t inverse, int shift)
+       ALIGN(16)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+       subl    $STACK_SPACE, %esp      deflit(`FRAME',STACK_SPACE)
+       movl    PARAM_SHIFT, %ecx       C shift count
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_SIZE, %ebp
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       movl    %ebx, SAVE_EBX
+
+       leal    (%esi,%ebp,4), %esi     C src end
+       leal    (%edi,%ebp,4), %edi     C dst end
+       negl    %ebp                    C -size
+
+       movl    PARAM_INVERSE, %eax     C inv
+
+L(common):
+       movl    %eax, VAR_INVERSE
+       movl    (%esi,%ebp,4), %eax     C src[0]
+
+       incl    %ebp
+       jz      L(one)
+
+       movl    (%esi,%ebp,4), %edx     C src[1]
+
+       shrdl(  %cl, %edx, %eax)
+
+       movl    %edi, VAR_DST_END
+       xorl    %ebx, %ebx
+       jmp     L(entry)
+
+       ALIGN(8)
+L(top):
+       C eax   q
+       C ebx   carry bit, 0 or 1
+       C ecx   shift
+       C edx
+       C esi   src end
+       C edi   dst end
+       C ebp   counter, limbs, negative
+
+       mull    PARAM_DIVISOR           C carry limb in edx
+
+       movl    -4(%esi,%ebp,4), %eax
+       movl    (%esi,%ebp,4), %edi
+
+       shrdl(  %cl, %edi, %eax)
+
+       subl    %ebx, %eax              C apply carry bit
+       setc    %bl
+       movl    VAR_DST_END, %edi
+
+       subl    %edx, %eax              C apply carry limb
+       adcl    $0, %ebx
+
+L(entry):
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi,%ebp,4)
+       incl    %ebp
+       jnz     L(top)
+
+
+       mull    PARAM_DIVISOR           C carry limb in edx
+
+       movl    -4(%esi), %eax          C src high limb
+       shrl    %cl, %eax
+       movl    SAVE_ESI, %esi
+
+       subl    %ebx, %eax              C apply carry bit
+       movl    SAVE_EBX, %ebx
+       movl    SAVE_EBP, %ebp
+
+       subl    %edx, %eax              C apply carry limb
+
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi)
+       movl    SAVE_EDI, %edi
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+L(one):
+       shrl    %cl, %eax
+       movl    SAVE_ESI, %esi
+       movl    SAVE_EBX, %ebx
+
+       imull   VAR_INVERSE, %eax
+
+       movl    SAVE_EBP, %ebp
+
+       movl    %eax, -4(%edi)
+       movl    SAVE_EDI, %edi
+       addl    $STACK_SPACE, %esp
+
+       ret
+EPILOGUE()
+
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                           mp_limb_t divisor);
+C
+
+       ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %eax
+       subl    $STACK_SPACE, %esp      deflit(`FRAME',STACK_SPACE)
+       movl    $-1, %ecx               C shift count
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_SIZE, %ebp
+
+       movl    %esi, SAVE_ESI
+       movl    %edi, SAVE_EDI
+
+       C If there's usually only one or two trailing zero bits then this
+       C should be faster than bsfl.
+L(strip_twos):
+       incl    %ecx
+       shrl    %eax
+       jnc     L(strip_twos)
+
+       movl    %ebx, SAVE_EBX
+       leal    1(%eax,%eax), %ebx      C d without twos
+       andl    $127, %eax              C d/2, 7 bits
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %edx)
+       movzbl  (%eax,%edx), %eax               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+
+       leal    (%eax,%eax), %edx       C 2*inv
+       movl    %ebx, PARAM_DIVISOR     C d without twos
+
+       imull   %eax, %eax              C inv*inv
+
+       movl    PARAM_SRC, %esi
+       movl    PARAM_DST, %edi
+
+       imull   %ebx, %eax              C inv*inv*d
+
+       subl    %eax, %edx              C inv = 2*inv - inv*inv*d
+       leal    (%edx,%edx), %eax       C 2*inv
+
+       imull   %edx, %edx              C inv*inv
+
+       leal    (%esi,%ebp,4), %esi     C src end
+       leal    (%edi,%ebp,4), %edi     C dst end
+       negl    %ebp                    C -size
+
+       imull   %ebx, %edx              C inv*inv*d
+
+       subl    %edx, %eax              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       imull   PARAM_DIVISOR, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       jmp     L(common)
+EPILOGUE()
diff --git a/mpn/x86/k7/gcd_1.asm b/mpn/x86/k7/gcd_1.asm

index e90d6bbf64f4e6b48dd9bdf6cc931ab7c166cd47..81ee93d8ad92eaacb88d6c2a35b09a5264587416 100644 (file)
--- a/mpn/x86/k7/gcd_1.asm
+++ b/mpn/x86/k7/gcd_1.asm
@@ -1,369 +1,176 @@
-dnl  AMD K7 mpn_gcd_1 -- mpn by 1 gcd.
+dnl  x86 mpn_gcd_1 optimised for AMD K7.
  
-dnl  Copyright 2000, 2001, 2002, 2009 Free Software Foundation, Inc.
-dnl
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+dnl  Contributed to the GNU project by by Kevin Ryde.  Rehacked by Torbjorn
+dnl  Granlund.
  
-include(`../config.m4')
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
+dnl  This file is part of the GNU MP Library.
  
-C K7: 6.75 cycles/bit (approx)  1x1 gcd
-C     11.0 cycles/limb          Nx1 reduction (modexact_1_odd)
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
  
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
  
-dnl  Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
-dnl  where x is the larger of the two.  See tune/README for more.
-dnl
-dnl  divl at 40 cycles compared to the gcd at about 7 cycles/bitpair
-dnl  suggests 40/7*2=11.4 but 7 seems to be about right.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
-deflit(DIV_THRESHOLD, 7)
+include(`../config.m4')
  
  
-C table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
-C
-C This is mixed in with the code, but as per the k7 optimization manual it's
-C a full cache line and suitably aligned so it won't get swapped between
-C code and data.  Having it in TEXT rather than RODATA saves needing a GOT
-C entry when PIC.
-C
-C Actually, there doesn't seem to be a measurable difference between this in
-C it's own cache line or plonked in the middle of the code.  Presumably
-C since TEXT is read-only there's no worries about coherency.
+C           cycles/bit (approx)
+C AMD K7        5.31
+C AMD K8,K9     5.33
+C AMD K10       5.30
+C AMD bd1       ?
+C AMD bobcat    7.02
+C Intel P4-2   10.1
+C Intel P4-3/4 10.0
+C Intel P6/13   5.88
+C Intel core2   6.26
+C Intel NHM     6.83
+C Intel SBR     8.50
+C Intel atom    8.90
+C VIA nano      ?
+C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
+
+C TODO
+C  * Tune overhead, this takes 2-3 cycles more than old code when v0 is tiny.
+C  * Stream things better through registers, avoiding some copying.
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
  
  deflit(MAXSHIFT, 6)
  deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
  
-       TEXT
-       ALIGN(64)
-L(table):
+DEF_OBJECT(ctz_table,64)
         .byte   MAXSHIFT
  forloop(i,1,MASK,
  `      .byte   m4_count_trailing_zeros(i)
  ')
+END_OBJECT(ctz_table)
  
+C Threshold of when to call bmod when U is one limb.  Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`DIV_THRES_LOG2', 7)
  
-C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t limb);
-C
  
-defframe(PARAM_LIMB,   12)
-defframe(PARAM_SIZE,    8)
-defframe(PARAM_SRC,     4)
+define(`up',    `%edi')
+define(`n',     `%esi')
+define(`v0',    `%edx')
  
-defframe(SAVE_EBX,     -4)
-defframe(SAVE_ESI,     -8)
-defframe(SAVE_EDI,    -12)
-defframe(SAVE_EBP,    -16)
-defframe(CALL_DIVISOR,-20)
-defframe(CALL_SIZE,   -24)
-defframe(CALL_SRC,    -28)
-
-deflit(STACK_SPACE, 28)
  
+ASM_START()
         TEXT
         ALIGN(16)
-
  PROLOGUE(mpn_gcd_1)
-deflit(`FRAME',0)
-
-       ASSERT(ne, `cmpl $0, PARAM_LIMB')       C y!=0
-       ASSERT(ae, `cmpl $1, PARAM_SIZE')       C size>=1
+       push    %edi
+       push    %esi
  
-       mov     PARAM_SRC, %eax
-       mov     PARAM_LIMB, %edx
-       sub     $STACK_SPACE, %esp      deflit(`FRAME',STACK_SPACE)
+       mov     12(%esp), up
+       mov     16(%esp), n
+       mov     20(%esp), v0
  
-       mov     %esi, SAVE_ESI
-       mov     %ebx, SAVE_EBX
-
-       mov     (%eax), %esi            C src low limb
-
-ifdef(`PIC',`
-       mov     %edi, SAVE_EDI
-       call    L(movl_eip_to_edi)
-L(here):
-       add     $L(table)-L(here), %edi
-')
-
-       mov     %esi, %ebx
-       or      %edx, %esi      C x|y
+       mov     (up), %eax              C U low limb
+       or      v0, %eax                C x | y
         mov     $-1, %ecx
  
  L(twos):
         inc     %ecx
-       shr     %esi
-       jnc     L(twos)         C 3/4 chance of x or y odd already
-
-       shr     %cl, %ebx
-       shr     %cl, %edx
-       mov     %ecx, %esi      C common twos
-
-       mov     PARAM_SIZE, %ecx
-       cmp     $1, %ecx
-       ja      L(divide)
-
-
-       C eax
-       C ebx   x
-       C ecx
-       C edx   y
-       C esi   common twos
-       C edi   [PIC] L(table)
-       C ebp
-
-       mov     %edx, %eax
-       cmp     %ebx, %edx
-
-       cmovb(  %ebx, %eax)     C swap to make x bigger than y
-       cmovb(  %edx, %ebx)
+       shr     %eax
+       jnc     L(twos)
  
+       shr     %cl, v0
+       mov     %ecx, %eax              C common twos
  
-L(strip_y):
-       C eax   x
-       C ebx   y
-       C ecx
-       C edx
-       C esi   common twos
-       C edi   [PIC] L(table)
-       C ebp
-
-       ASSERT(nz,`orl %ebx,%ebx')
-       shr     %ebx
-       jnc     L(strip_y)
-       rcl     %ebx
-
+L(divide_strip_y):
+       shr     v0
+       jnc     L(divide_strip_y)
+       adc     v0, v0
  
-       C eax   x
-       C ebx   y (odd)
-       C ecx
-       C edx
-       C esi   common twos
-       C edi   [PIC] L(table)
-       C ebp
+       push    %eax
+       push    v0
  
-       mov     %eax, %ecx
-       mov     %ebx, %edx
-       shr     $DIV_THRESHOLD, %eax
+       cmp     $1, n
+       jnz     L(reduce_nby1)
  
-       cmp     %eax, %ebx
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+       mov     (up), %ecx
         mov     %ecx, %eax
-       ja      L(strip_x_entry)        C do x%y if x much bigger than y
-
+       shr     $DIV_THRES_LOG2, %ecx
+       cmp     %ecx, v0
+       ja      L(reduced)
  
+       mov     v0, %esi
         xor     %edx, %edx
+       div     %esi
+       mov     %edx, %eax
+       jmp     L(reduced)
  
-       div     %ebx
-
-       or      %edx, %edx
-       mov     %edx, %ecx              C remainder -> x
-       mov     %ebx, %edx              C y
-
-       jz      L(done_ebx)
-       jmp     L(strip_x)
-
-
-       C Offset 0x9D here for non-PIC.  About 0.4 cycles/bit is saved by
-       C ensuring the end of the jnz at the end of this loop doesn't cross
-       C into the next cache line at 0xC0.
-       C
-       C PIC on the other hand is offset 0xAC here and extends to 0xC9, so
-       C it crosses but doesn't suffer any measurable slowdown.
-
-L(top):
-       C eax   x
-       C ebx   y-x
-       C ecx   x-y
-       C edx   y
-       C esi   twos, for use at end
-       C edi   [PIC] L(table)
-
-       cmovc(  %ebx, %ecx)             C if x-y gave carry, use x and y-x
-       cmovc(  %eax, %edx)
-
-L(strip_x):
-       mov     %ecx, %eax
-L(strip_x_entry):
-       and     $MASK, %ecx
-
-       ASSERT(nz, `orl %eax, %eax')
-
-ifdef(`PIC',`
-       mov     (%ecx,%edi), %cl
-',`
-       mov     L(table) (%ecx), %cl
+L(reduce_nby1):
+ifdef(`PIC_WITH_EBX',`
+       push    %ebx
+       call    L(movl_eip_to_ebx)
+       add     $_GLOBAL_OFFSET_TABLE_, %ebx
  ')
-
-       shr     %cl, %eax
-       cmp     $MAXSHIFT, %cl
-
-       mov     %eax, %ecx
-       mov     %edx, %ebx
-       je      L(strip_x)
-
-       ASSERT(nz, `test $1, %eax')     C both odd
-       ASSERT(nz, `test $1, %edx')
-
-       sub     %eax, %ebx
-       sub     %edx, %ecx
-       jnz     L(top)
-
-
-L(done):
-       mov     %esi, %ecx
-       mov     SAVE_ESI, %esi
-ifdef(`PIC',`
-       mov     SAVE_EDI, %edi
+       push    v0                      C param 3
+       push    n                       C param 2
+       push    up                      C param 1
+       cmp     $BMOD_1_TO_MOD_1_THRESHOLD, n
+       jl      L(bmod)
+       CALL(   mpn_mod_1)
+       jmp     L(called)
+L(bmod):
+       CALL(   mpn_modexact_1_odd)
+
+L(called):
+       add     $12, %esp               C deallocate params
+ifdef(`PIC_WITH_EBX',`
+       pop     %ebx
  ')
+L(reduced):
+       pop     %edx
  
+       LEA(    ctz_table, %esi)
+       test    %eax, %eax
+       mov     %eax, %ecx
+       jnz     L(mid)
+       jmp     L(end)
+
+       ALIGN(16)                       C               K8    BC    P4    NHM   SBR
+L(top):        cmovc(  %ecx, %eax)             C if x-y < 0    0
+       cmovc(  %edi, %edx)             C use x,y-x     0
+L(mid):        and     $MASK, %ecx             C               0
+       movzbl  (%esi,%ecx), %ecx       C               1
+       jz      L(shift_alot)           C               1
+       shr     %cl, %eax               C               3
+       mov     %eax, %edi              C               4
+       mov     %edx, %ecx              C               3
+       sub     %eax, %ecx              C               4
+       sub     %edx, %eax              C               4
+       jnz     L(top)                  C               5
+
+L(end):        pop     %ecx
+       mov     %edx, %eax
         shl     %cl, %eax
-       mov     SAVE_EBX, %ebx
-       add     $FRAME, %esp
-
+       pop     %esi
+       pop     %edi
         ret
  
-
-
-C -----------------------------------------------------------------------------
-C two or more limbs
-
-dnl  MODEXACT_THRESHOLD is the size at which it's better to call
-dnl  mpn_modexact_1_odd than do an inline loop.
-
-deflit(MODEXACT_THRESHOLD, ifdef(`PIC',6,5))
-
-L(divide):
-       C eax   src
-       C ebx
-       C ecx   size
-       C edx   y
-       C esi   common twos
-       C edi   [PIC] L(table)
-       C ebp
-
-L(divide_strip_y):
-       ASSERT(nz,`or %edx,%edx')
-       shr     %edx
-       jnc     L(divide_strip_y)
-       lea     1(%edx,%edx), %ebx              C y now odd
-
-       mov     %ebp, SAVE_EBP
-       mov     %eax, %ebp
-       mov     -4(%eax,%ecx,4), %eax           C src high limb
-
-       cmp     $MODEXACT_THRESHOLD, %ecx
-       jae     L(modexact)
-
-       cmp     %ebx, %eax                      C high cmp divisor
-       mov     $0, %edx
-
-       cmovc(  %eax, %edx)                     C skip a div if high<divisor
-       sbb     $0, %ecx
-
-
-L(divide_top):
-       C eax   scratch (quotient)
-       C ebx   y
-       C ecx   counter (size to 1, inclusive)
-       C edx   carry (remainder)
-       C esi   common twos
-       C edi   [PIC] L(table)
-       C ebp   src
-
-       mov     -4(%ebp,%ecx,4), %eax
-
-       div     %ebx
-
-       dec     %ecx
-       jnz     L(divide_top)
-
-
-       C eax
-       C ebx   y (odd)
-       C ecx
-       C edx   x
-       C esi   common twos
-       C edi   [PIC] L(table)
-       C ebp
-
-       or      %edx, %edx
-       mov     SAVE_EBP, %ebp
-       mov     %edx, %eax
-
-       mov     %edx, %ecx
-       mov     %ebx, %edx
-       jnz     L(strip_x_entry)
-
-
-L(done_ebx):
-       mov     %ebx, %eax
-       jmp     L(done)
-
-
-
-L(modexact):
-       C eax
-       C ebx   y
-       C ecx   size
-       C edx
-       C esi   common twos
-       C edi   [PIC] L(table)
-       C ebp   src
-
-ifdef(`PIC',`
-       mov     %ebp, CALL_SRC
-       mov     %ebx, %ebp              C y
-       mov     %edi, %ebx              C L(table)
-
-       add     $_GLOBAL_OFFSET_TABLE_+[.-L(table)], %ebx
-       mov     %ebp, CALL_DIVISOR
-       mov     %ecx, CALL_SIZE
-
-       call    GSYM_PREFIX`'mpn_modexact_1_odd@PLT
-',`
-dnl non-PIC
-       mov     %ebx, CALL_DIVISOR
-       mov     %ebp, CALL_SRC
-       mov     %ecx, CALL_SIZE
-
-       call    GSYM_PREFIX`'mpn_modexact_1_odd
-')
-
-       C eax   x
-       C ebx   [non-PIC] y
-       C ecx
-       C edx
-       C esi   common twos
-       C edi   [PIC] L(table)
-       C ebp   [PIC] y
-
-       or      %eax, %eax
-       mov     ifdef(`PIC',`%ebp',`%ebx'), %edx
-       mov     SAVE_EBP, %ebp
-
+L(shift_alot):
+       shr     $MAXSHIFT, %eax
         mov     %eax, %ecx
-       jnz     L(strip_x_entry)
+       jmp     L(mid)
  
-       mov     %edx, %eax
-       jmp     L(done)
-
-
-ifdef(`PIC', `
-L(movl_eip_to_edi):
-       mov     (%esp), %edi
-       ret_internal
+ifdef(`PIC_WITH_EBX',`
+L(movl_eip_to_ebx):
+       mov     (%esp), %ebx
+       ret
  ')
-
  EPILOGUE()
diff --git a/mpn/x86/k7/gmp-mparam.h b/mpn/x86/k7/gmp-mparam.h

index f18940fc413a0d12a0912fa0d38411aeb33d344b..c6bfa87e368d643de28e9191822e75b79e8a0256 100644 (file)
--- a/mpn/x86/k7/gmp-mparam.h
+++ b/mpn/x86/k7/gmp-mparam.h
@@ -23,35 +23,39 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         14
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        20
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     26
+#define MOD_1_UNNORM_THRESHOLD               3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        24
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           28
+#define BMOD_1_TO_MOD_1_THRESHOLD           24
  
  #define MUL_TOOM22_THRESHOLD                28
  #define MUL_TOOM33_THRESHOLD                85
-#define MUL_TOOM44_THRESHOLD               148
-#define MUL_TOOM6H_THRESHOLD               204
+#define MUL_TOOM44_THRESHOLD               142
+#define MUL_TOOM6H_THRESHOLD               258
  #define MUL_TOOM8H_THRESHOLD               309
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD      85
  #define MUL_TOOM32_TO_TOOM53_THRESHOLD      99
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      93
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     101
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     102
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     144
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
  #define SQR_TOOM2_THRESHOLD                 50
-#define SQR_TOOM3_THRESHOLD                 87
-#define SQR_TOOM4_THRESHOLD                208
-#define SQR_TOOM6_THRESHOLD                306
+#define SQR_TOOM3_THRESHOLD                 83
+#define SQR_TOOM4_THRESHOLD                216
+#define SQR_TOOM6_THRESHOLD                318
  #define SQR_TOOM8_THRESHOLD                430
  
-#define MULMOD_BNM1_THRESHOLD               18
+#define MULMID_TOOM42_THRESHOLD             56
+
+#define MULMOD_BNM1_THRESHOLD               17
  #define SQRMOD_BNM1_THRESHOLD               19
  
  #define MUL_FFT_MODF_THRESHOLD             888  /* k = 6 */
@@ -99,9 +103,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      {   1151,11}, {   2303,12}, {   1215,11}, {   2431,13}, \
      {   8192,14}, {  16384,15}, {  32768,16} }
  #define MUL_FFT_TABLE3_SIZE 167
-#define MUL_FFT_THRESHOLD                 7808
+#define MUL_FFT_THRESHOLD                 7552
  
-#define SQR_FFT_MODF_THRESHOLD             786  /* k = 6 */
+#define SQR_FFT_MODF_THRESHOLD             666  /* k = 6 */
  #define SQR_FFT_TABLE3                                      \
    { {    786, 6}, {     25, 7}, {     13, 6}, {     27, 7}, \
      {     15, 6}, {     31, 7}, {     17, 6}, {     35, 7}, \
@@ -149,37 +153,44 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      {   1215,11}, {   2431,13}, {   8192,14}, {  16384,15}, \
      {  32768,16} }
  #define SQR_FFT_TABLE3_SIZE 177
-#define SQR_FFT_THRESHOLD                 7552
+#define SQR_FFT_THRESHOLD                 7040
  
-#define MULLO_BASECASE_THRESHOLD            10
-#define MULLO_DC_THRESHOLD                  50
+#define MULLO_BASECASE_THRESHOLD            11
+#define MULLO_DC_THRESHOLD                  35
  #define MULLO_MUL_N_THRESHOLD            13463
  
-#define DC_DIV_QR_THRESHOLD                 60
-#define DC_DIVAPPR_Q_THRESHOLD             333
-#define DC_BDIV_QR_THRESHOLD                82
-#define DC_BDIV_Q_THRESHOLD                268
+#define DC_DIV_QR_THRESHOLD                 41
+#define DC_DIVAPPR_Q_THRESHOLD             214
+#define DC_BDIV_QR_THRESHOLD                41
+#define DC_BDIV_Q_THRESHOLD                148
  
-#define INV_MULMOD_BNM1_THRESHOLD           62
-#define INV_NEWTON_THRESHOLD               284
-#define INV_APPR_THRESHOLD                 290
+#define INV_MULMOD_BNM1_THRESHOLD           77
+#define INV_NEWTON_THRESHOLD               204
+#define INV_APPR_THRESHOLD                 204
  
-#define BINV_NEWTON_THRESHOLD              264
-#define REDC_1_TO_REDC_N_THRESHOLD          86
+#define BINV_NEWTON_THRESHOLD              230
+#define REDC_1_TO_REDC_N_THRESHOLD          59
  
-#define MU_DIV_QR_THRESHOLD               1858
-#define MU_DIVAPPR_Q_THRESHOLD            1718
-#define MUPI_DIV_QR_THRESHOLD              114
-#define MU_BDIV_QR_THRESHOLD              1387
+#define MU_DIV_QR_THRESHOLD               1752
+#define MU_DIVAPPR_Q_THRESHOLD            1528
+#define MUPI_DIV_QR_THRESHOLD               82
+#define MU_BDIV_QR_THRESHOLD              1360
  #define MU_BDIV_Q_THRESHOLD               1470
  
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     154
-#define GCD_DC_THRESHOLD                   599
-#define GCDEXT_DC_THRESHOLD                443
-#define JACOBI_BASE_METHOD                   1
+#define POWM_SEC_TABLE  2,17,176,905,2246
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     125
+#define HGCD_APPR_THRESHOLD                143
+#define HGCD_REDUCE_THRESHOLD             4633
+#define GCD_DC_THRESHOLD                   460
+#define GCDEXT_DC_THRESHOLD                330
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                15
+#define GET_STR_PRECOMPUTE_THRESHOLD        35
+#define SET_STR_DC_THRESHOLD               272
+#define SET_STR_PRECOMPUTE_THRESHOLD      1183
  
-#define GET_STR_DC_THRESHOLD                17
-#define GET_STR_PRECOMPUTE_THRESHOLD        34
-#define SET_STR_DC_THRESHOLD               542
-#define SET_STR_PRECOMPUTE_THRESHOLD      1615
+#define FAC_DSC_THRESHOLD                  336
+#define FAC_ODD_THRESHOLD                   29
diff --git a/mpn/x86/k7/invert_limb.asm b/mpn/x86/k7/invert_limb.asm

new file mode 100644 (file)

index 0000000..435fa96
--- /dev/null
+++ b/mpn/x86/k7/invert_limb.asm
@@ -0,0 +1,182 @@
+dnl  x86 mpn_invert_limb
+
+dnl  Contributed to the GNU project by Niels Möller
+
+dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                          cycles (approx)     div
+C P5                            ?
+C P6 model 0-8,10-12            ?
+C P6 model 9  (Banias)          ?
+C P6 model 13 (Dothan)          ?
+C P4 model 0  (Willamette)      ?
+C P4 model 1  (?)               ?
+C P4 model 2  (Northwood)       ?
+C P4 model 3  (Prescott)        ?
+C P4 model 4  (Nocona)          ?
+C AMD K6                        ?
+C AMD K7                       41              53
+C AMD K8                        ?
+
+C TODO
+C  * These c/l numbers are for a non-PIC build.  Consider falling back to using
+C    the 'div' instruction for PIC builds.
+C  * Perhaps use this file--or at least the algorithm--for more machines than k7.
+
+C Register usage:
+C   Input D in %edi
+C   Current approximation is in %eax and/or %ecx
+C   %ebx and %edx are temporaries
+C   %esi and %ebp are unused
+
+defframe(PARAM_DIVISOR,4)
+
+ASM_START()
+
+C Make approx_tab global to work around Apple relocation bug.
+ifdef(`DARWIN',`
+       deflit(`approx_tab', MPN(invert_limb_tab))
+       GLOBL   approx_tab')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_invert_limb)
+deflit(`FRAME', 0)
+       mov     PARAM_DIVISOR, %eax
+       C Avoid push/pop on k7.
+       sub     $8, %esp        FRAME_subl_esp(8)
+       mov     %ebx, (%esp)
+       mov     %edi, 4(%esp)
+
+       mov     %eax, %edi
+       shr     $22, %eax
+ifdef(`PIC',`
+       LEA(    approx_tab, %ebx)
+       movzwl  -1024(%ebx, %eax, 2), %eax
+',`
+       movzwl  -1024+approx_tab(%eax, %eax), %eax      C %eax = v0
+')
+
+       C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1
+       mov     %eax, %ecx
+       imul    %eax, %eax
+       mov     %edi, %ebx
+       shr     $11, %ebx
+       inc     %ebx
+       mul     %ebx
+       mov     %edi, %ebx                              C Prepare
+       shr     %ebx
+       sbb     %eax, %eax
+       sub     %eax, %ebx                              C %ebx = d_31, %eax = mask
+       shl     $4, %ecx
+       dec     %ecx
+       sub     %edx, %ecx                              C %ecx = v1
+
+       C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
+       imul    %ecx, %ebx
+       and     %ecx, %eax
+       shr     %eax
+       sub     %ebx, %eax
+       mul     %ecx
+       mov     %edi, %eax                              C Prepare for next mul
+       shl     $15, %ecx
+       shr     %edx
+       add     %edx, %ecx                              C %ecx = v2
+
+       mul     %ecx
+       add     %edi, %eax
+       mov     %ecx, %eax
+       adc     %edi, %edx
+       sub     %edx, %eax                              C %eax = v3
+
+       mov     (%esp), %ebx
+       mov     4(%esp), %edi
+       add     $8, %esp
+
+       ret
+
+EPILOGUE()
+
+DEF_OBJECT(approx_tab,2)
+       .value  0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
+       .value  0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
+       .value  0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
+       .value  0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
+       .value  0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
+       .value  0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
+       .value  0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
+       .value  0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
+       .value  0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
+       .value  0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
+       .value  0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
+       .value  0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
+       .value  0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
+       .value  0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
+       .value  0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
+       .value  0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
+       .value  0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
+       .value  0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
+       .value  0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
+       .value  0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
+       .value  0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
+       .value  0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
+       .value  0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
+       .value  0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
+       .value  0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
+       .value  0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
+       .value  0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
+       .value  0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
+       .value  0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
+       .value  0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
+       .value  0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
+       .value  0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
+       .value  0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
+       .value  0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
+       .value  0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
+       .value  0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
+       .value  0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
+       .value  0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
+       .value  0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
+       .value  0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
+       .value  0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
+       .value  0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
+       .value  0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
+       .value  0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
+       .value  0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
+       .value  0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
+       .value  0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
+       .value  0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
+       .value  0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
+       .value  0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
+       .value  0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
+       .value  0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
+       .value  0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
+       .value  0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
+       .value  0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
+       .value  0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
+       .value  0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
+       .value  0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
+       .value  0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
+       .value  0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
+       .value  0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
+       .value  0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
+       .value  0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
+       .value  0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
+END_OBJECT(approx_tab)
diff --git a/mpn/x86/k7/mmx/divrem_1.asm b/mpn/x86/k7/mmx/divrem_1.asm

index fa5824c7b9863da3a1d9eddead7dd3353a17b646..8c771a32b2530e0041eee791ee803ca37488ecc1 100644 (file)
--- a/mpn/x86/k7/mmx/divrem_1.asm
+++ b/mpn/x86/k7/mmx/divrem_1.asm
@@ -724,12 +724,12 @@ C q1 is the high word of m*n2+b*n2 and the following shows q1<=b-2 always.
  C rnd() means rounding down to a multiple of d.
  C
  C      m*n2 + b*n2 <= m*(d-1) + b*(d-1)
-C                   = m*d + b*d - m - b
-C                   = floor((b(b-d)-1)/d)*d + b*d - m - b
-C                   = rnd(b(b-d)-1) + b*d - m - b
-C                   = rnd(b(b-d)-1 + b*d) - m - b
-C                   = rnd(b*b-1) - m - b
-C                   <= (b-2)*b
+C                   = m*d + b*d - m - b
+C                   = floor((b(b-d)-1)/d)*d + b*d - m - b
+C                   = rnd(b(b-d)-1) + b*d - m - b
+C                   = rnd(b(b-d)-1 + b*d) - m - b
+C                   = rnd(b*b-1) - m - b
+C                   <= (b-2)*b
  C
  C Unchanged from the general case is that the final quotient limb q can be
  C either q1 or q1+1, and the q1+1 case occurs often.  This can be seen from
diff --git a/mpn/x86/k7/mmx/popham.asm b/mpn/x86/k7/mmx/popham.asm

index 5dc0a78c42552f57bf0c94ad0dadc2278403927e..072f9e2a984a80aaee204a60ae164a306b4ca175 100644 (file)
--- a/mpn/x86/k7/mmx/popham.asm
+++ b/mpn/x86/k7/mmx/popham.asm
@@ -23,7 +23,7 @@ include(`../config.m4')
  
  C                           popcount        hamdist
  C P3 generic                   6.5             7
-C P3 model 9  (Banias)          ?              ?
+C P3 model 9  (Banias)          5.7            6.1
  C P3 model 13 (Dothan)         5.75            6
  C K7                           5               6
  
diff --git a/mpn/x86/k7/mod_1_1.asm b/mpn/x86/k7/mod_1_1.asm

new file mode 100644 (file)

index 0000000..bb19626
--- /dev/null
+++ b/mpn/x86/k7/mod_1_1.asm
@@ -0,0 +1,210 @@
+dnl  x86-32 mpn_mod_1_1p, requiring cmov.
+
+dnl  Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+dnl
+dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                          cycles/limb
+C P5                            ?
+C P6 model 0-8,10-12            ?
+C P6 model 9  (Banias)          ?
+C P6 model 13 (Dothan)          ?
+C P4 model 0  (Willamette)      ?
+C P4 model 1  (?)               ?
+C P4 model 2  (Northwood)       ?
+C P4 model 3  (Prescott)        ?
+C P4 model 4  (Nocona)          ?
+C AMD K6                        ?
+C AMD K7                        7
+C AMD K8                        ?
+
+define(`B2mb', `%ebx')
+define(`r0', `%esi')
+define(`r2', `%ebp')
+define(`t0', `%edi')
+define(`ap', `%ecx')  C Also shift count
+
+C Stack frame
+C      pre     36(%esp)
+C      b       32(%esp)
+C      n       28(%esp)
+C      ap      24(%esp)
+C      return  20(%esp)
+C      %ebp    16(%esp)
+C      %edi    12(%esp)
+C      %esi    8(%esp)
+C      %ebx    4(%esp)
+C      B2mod   (%esp)
+
+define(`B2modb', `(%esp)')
+define(`n', `28(%esp)')
+define(`b', `32(%esp)')
+define(`pre', `36(%esp)')
+
+C mp_limb_t
+C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t pre[4])
+C
+C The pre array contains bi, cnt, B1modb, B2modb
+C Note: This implementation needs B1modb only when cnt > 0
+
+ASM_START()
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_mod_1_1p)
+       push    %ebp
+       push    %edi
+       push    %esi
+       push    %ebx
+       mov     32(%esp), %ebp          C pre[]
+
+       mov     12(%ebp), %eax          C B2modb
+       push    %eax                    C Put it on stack
+
+       mov     n, %edx
+       mov     24(%esp), ap
+
+       lea     (ap, %edx, 4), ap
+       mov     -4(ap), %eax
+       cmp     $3, %edx
+       jnc     L(first)
+       mov     -8(ap), r0
+       jmp     L(reduce_two)
+
+L(first):
+       C First iteration, no r2
+       mull    B2modb
+       mov     -12(ap), r0
+       add     %eax, r0
+       mov     -8(ap), %eax
+       adc     %edx, %eax
+       sbb     r2, r2
+       sub     $3, n
+       lea     -16(ap), ap
+       jz      L(reduce_three)
+
+       mov     B2modb, B2mb
+       sub     b, B2mb
+       lea     (B2mb, r0), t0
+       jmp     L(mid)
+
+       ALIGN(16)
+L(top): C Loopmixed to 7 c/l on k7
+       add     %eax, r0
+       lea     (B2mb, r0), t0
+       mov     r2, %eax
+       adc     %edx, %eax
+       sbb     r2, r2
+L(mid):        mull    B2modb
+       and     B2modb, r2
+       add     r0, r2
+       decl    n
+       mov     (ap), r0
+       cmovc(  t0, r2)
+       lea     -4(ap), ap
+       jnz     L(top)
+
+       add     %eax, r0
+       mov     r2, %eax
+       adc     %edx, %eax
+       sbb     r2, r2
+
+L(reduce_three):
+       C Eliminate r2
+       and     b, r2
+       sub     r2, %eax
+
+L(reduce_two):
+       mov     pre, %ebp
+       movb    4(%ebp), %cl
+       test    %cl, %cl
+       jz      L(normalized)
+
+       C Unnormalized, use B1modb to reduce to size < B b
+       mull    8(%ebp)
+       xor     t0, t0
+       add     %eax, r0
+       adc     %edx, t0
+       mov     t0, %eax
+
+       C Left-shift to normalize
+       shld    %cl, r0, %eax C Always use shld?
+
+       shl     %cl, r0
+       jmp     L(udiv)
+
+L(normalized):
+       mov     %eax, t0
+       sub     b, t0
+       cmovnc( t0, %eax)
+
+L(udiv):
+       lea     1(%eax), t0
+       mull    (%ebp)
+       mov     b, %ebx         C Needed in register for lea
+       add     r0, %eax
+       adc     t0, %edx
+       imul    %ebx, %edx
+       sub     %edx, r0
+       cmp     r0, %eax
+       lea     (%ebx, r0), %eax
+       cmovnc( r0, %eax)
+       cmp     %ebx, %eax
+       jnc     L(fix)
+L(ok): shr     %cl, %eax
+
+       add     $4, %esp
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       pop     %ebp
+
+       ret
+L(fix):        sub     %ebx, %eax
+       jmp     L(ok)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+       push    %ebp
+       mov     12(%esp), %ebp
+       push    %esi
+       bsr     %ebp, %ecx
+       push    %ebx
+       xor     $31, %ecx
+       mov     16(%esp), %esi
+       sal     %cl, %ebp
+       mov     %ebp, %edx
+       not     %edx
+       mov     $-1, %eax
+       div     %ebp                    C On K7, invert_limb would be a few cycles faster.
+       mov     %eax, (%esi)            C store bi
+       mov     %ecx, 4(%esi)           C store cnt
+       neg     %ebp
+       mov     $1, %edx
+       shld    %cl, %eax, %edx
+       imul    %ebp, %edx
+       shr     %cl, %edx
+       imul    %ebp, %eax
+       mov     %edx, 8(%esi)           C store B1modb
+       mov     %eax, 12(%esi)          C store B2modb
+       pop     %ebx
+       pop     %esi
+       pop     %ebp
+       ret
+EPILOGUE()
diff --git a/mpn/x86/k7/mod_1_4.asm b/mpn/x86/k7/mod_1_4.asm

index 5ecb4fc5a6aa8b2c6e2cc230074100f542a3500d..4d1129c2af433b4fe93d5de95485ddc8658596b0 100644 (file)
--- a/mpn/x86/k7/mod_1_4.asm
+++ b/mpn/x86/k7/mod_1_4.asm
@@ -1,50 +1,42 @@
  dnl  x86-32 mpn_mod_1s_4p, requiring cmov.
  
  dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2009 Free Software Foundation, Inc.
-
+dnl
+dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
+dnl
  dnl  This file is part of the GNU MP Library.
-
+dnl
  dnl  The GNU MP Library is free software; you can redistribute it and/or modify
  dnl  it under the terms of the GNU Lesser General Public License as published
  dnl  by the Free Software Foundation; either version 3 of the License, or (at
  dnl  your option) any later version.
-
+dnl
  dnl  The GNU MP Library is distributed in the hope that it will be useful, but
  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  dnl  License for more details.
-
+dnl
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C                           cycles/limb
-C P5:
-C P6 model 0-8,10-12)
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)          6.0
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
+C                          cycles/limb
+C P5                            ?
+C P6 model 0-8,10-12            ?
+C P6 model 9  (Banias)          ?
+C P6 model 13 (Dothan)          6
+C P4 model 0  (Willamette)      ?
+C P4 model 1  (?)               ?
  C P4 model 2  (Northwood)      15.5
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C K6:
-C K7:                            4.75
-C K8:
-
-
-C Ths inner loop was manually written, it ought to be loopmixed.
-C Presumably, we could get to 4 c/l for K7.
-
-C The cps function was compiler generated.  It can clearly be optimized.
-
+C P4 model 3  (Prescott)        ?
+C P4 model 4  (Nocona)          ?
+C AMD K6                        ?
+C AMD K7                        4.75
+C AMD K8                        ?
  
  ASM_START()
         TEXT
-
         ALIGN(16)
  PROLOGUE(mpn_mod_1s_4p)
         push    %ebp
@@ -52,7 +44,7 @@ PROLOGUE(mpn_mod_1s_4p)
         push    %esi
         push    %ebx
         sub     $28, %esp
-       mov     60(%esp), %edi          C cps
+       mov     60(%esp), %edi          C cps[]
         mov     8(%edi), %eax
         mov     12(%edi), %edx
         mov     16(%edi), %ecx
@@ -101,11 +93,10 @@ L(b1):     mov     8(%esi), %ebp
         lea     -4(%esi), %esi
         jmp     L(m1)
  
-L(b2): mov     8(%esi), %eax
-       mull    4(%esp)
+L(b2): mov     8(%esi), %edi
         mov     4(%esi), %ebp
         lea     -8(%esi), %esi
-       jmp     L(m0)
+       jmp     L(m1)
  
         ALIGN(16)
  L(top):        mov     (%esi), %eax
@@ -154,18 +145,18 @@ L(end):   mov     4(%esp), %eax
         mov     %ebx, %ecx
         mov     %eax, %ebx
         mov     %ebp, %eax
+       mov     56(%esp), %ebp
         sal     %cl, %eax
         add     %eax, %ebx
         adc     %esi, %edx
-       imul    56(%esp), %edx
-       mov     56(%esp), %esi
+       imul    %ebp, %edx
         sub     %edx, %eax
-       lea     (%eax,%esi), %edx
+       lea     (%eax,%ebp), %edx
         cmp     %eax, %ebx
-       cmovb(  %edx, %eax)
+       cmovc(  %edx, %eax)
         mov     %eax, %edx
-       sub     %esi, %eax
-       cmovb(  %edx, %eax)
+       sub     %ebp, %eax
+       cmovc(  %edx, %eax)
         add     $28, %esp
         pop     %ebx
         pop     %esi
@@ -177,105 +168,82 @@ EPILOGUE()
  
         ALIGN(16)
  PROLOGUE(mpn_mod_1s_4p_cps)
-       sub     $56, %esp
-       mov     %esi, 44(%esp)
-       mov     64(%esp), %esi
-       mov     %edi, 48(%esp)
-       mov     %ebx, 40(%esp)
-       mov     $-1, %ebx
-       mov     %ebp, 52(%esp)
-       bsr     %esi, %eax
-       xor     $31, %eax
-       mov     %eax, %ecx
-       mov     %eax, 24(%esp)
-       mov     %ebx, %eax
-       sal     %cl, %esi
-       mov     %esi, %ecx
-       mov     %esi, %edi
-       mov     %esi, %ebp
-       neg     %ecx
-       not     %edi
-       mov     %ecx, 20(%esp)
-       mov     $32, %ecx
-       sub     24(%esp), %ecx
-       mov     %edi, %edx
-       mov     %edi, 16(%esp)
-       mov     20(%esp), %edi
-       div     %esi
-       mov     %eax, %ebx
-       shr     %cl, %eax
-       movzbl  24(%esp), %ecx
-       mov     %eax, 12(%esp)
-       mov     $1, %eax
-       sal     %cl, %eax
-       or      %eax, 12(%esp)
-       imul    12(%esp), %edi
+C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
+       push    %ebp
+       push    %edi
+       push    %esi
+       push    %ebx
+       mov     20(%esp), %ebp          C FIXME: avoid bp for 0-idx
+       mov     24(%esp), %ebx
+       bsr     %ebx, %ecx
+       xor     $31, %ecx
+       sal     %cl, %ebx               C b << cnt
+       mov     %ebx, %edx
+       not     %edx
+       mov     $-1, %eax
+       div     %ebx
+       xor     %edi, %edi
+       sub     %ebx, %edi
+       mov     $1, %esi
+       mov     %eax, (%ebp)            C store bi
+       mov     %ecx, 4(%ebp)           C store cnt
+       shld    %cl, %eax, %esi
+       imul    %edi, %esi
+       mov     %eax, %edi
+       mul     %esi
+
+       add     %esi, %edx
+       shr     %cl, %esi
+       mov     %esi, 8(%ebp)           C store B1modb
+
+       not     %edx
+       imul    %ebx, %edx
+       lea     (%edx,%ebx), %esi
+       cmp     %edx, %eax
+       cmovnc( %edx, %esi)
         mov     %edi, %eax
-       mov     %edi, 20(%esp)
-       mul     %ebx
-       mov     %eax, %ecx
-       lea     1(%edx,%edi), %eax
-       neg     %eax
-       imul    %eax, %ebp
-       lea     (%ebp,%esi), %eax
-       cmp     %ebp, %ecx
-       cmovb(  %eax, %ebp)
-       mov     %ebp, %eax
-       mul     %ebx
-       lea     1(%ebp,%edx), %edi
-       mov     %eax, %ecx
-       neg     %edi
-       mov     %edi, 8(%esp)
-       imul    %esi, %edi
+       mul     %esi
+
+       add     %esi, %edx
+       shr     %cl, %esi
+       mov     %esi, 12(%ebp)          C store B2modb
+
+       not     %edx
+       imul    %ebx, %edx
+       lea     (%edx,%ebx), %esi
+       cmp     %edx, %eax
+       cmovnc( %edx, %esi)
         mov     %edi, %eax
-       add     %esi, %eax
-       cmp     %edi, %ecx
-       cmovae( %edi, %eax)
-       mov     %eax, 32(%esp)
-       mov     32(%esp), %edi
-       mul     %ebx
-       mov     %eax, 36(%esp)
-       lea     1(%edi,%edx), %eax
-       negl    %eax
-       imul    %esi, %eax
-       mov     %eax, %ecx
-       add     %esi, %ecx
-       cmp     %eax, 36(%esp)
-       cmovae( %eax, %ecx)
-       mov     %ecx, (%esp)
-       mov     %ecx, %eax
-       mul     %ebx
-       mov     %eax, %edi
-       mov     (%esp), %eax
-       lea     1(%eax,%edx), %ecx
-       mov     60(%esp), %edx
-       neg     %ecx
-       imul    %esi, %ecx
-       mov     %ebx, (%edx)
-       add     %ecx, %esi
-       cmp     %ecx, %edi
-       cmovae( %ecx, %esi)
-       mov     24(%esp), %ecx
-       shrl    %cl, 20(%esp)
-       mov     20(%esp), %edi
-       mov     %esi, 4(%esp)
-       mov     %ecx, 4(%edx)
-       movzbl  24(%esp), %ecx
-       mov     %edi, 8(%edx)
-       shr     %cl, %ebp
-       shr     %cl, %eax
-       mov     %ebp, 12(%edx)
-       shrl    %cl, 32(%esp)
-       mov     32(%esp), %edi
-       shrl    %cl, 4(%esp)
-       mov     %eax, 20(%edx)
-       mov     %edi, 16(%edx)
-       mov     4(%esp), %edi
-       mov     %edi, 24(%edx)
-       mov     40(%esp), %ebx
-       mov     44(%esp), %esi
-       mov     48(%esp), %edi
-       mov     52(%esp), %ebp
-       add     $56, %esp
+       mul     %esi
+
+       add     %esi, %edx
+       shr     %cl, %esi
+       mov     %esi, 16(%ebp)          C store B3modb
+
+       not     %edx
+       imul    %ebx, %edx
+       lea     (%edx,%ebx), %esi
+       cmp     %edx, %eax
+       cmovnc( %edx, %esi)
+       mov     %edi, %eax
+       mul     %esi
+
+       add     %esi, %edx
+       shr     %cl, %esi
+       mov     %esi, 20(%ebp)          C store B4modb
+
+       not     %edx
+       imul    %ebx, %edx
+       add     %edx, %ebx
+       cmp     %edx, %eax
+       cmovnc( %edx, %ebx)
+
+       shr     %cl, %ebx
+       mov     %ebx, 24(%ebp)          C store B5modb
+
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       pop     %ebp
         ret
  EPILOGUE()
diff --git a/mpn/x86/k7/mul_1.asm b/mpn/x86/k7/mul_1.asm

index 016262d594c3c62e6d9f09ee6a7ded299a7aec6b..bb02bb1f5062430ccbef22ed62c0692ef2858767 100644 (file)
--- a/mpn/x86/k7/mul_1.asm
+++ b/mpn/x86/k7/mul_1.asm
@@ -21,8 +21,8 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C                           cycles/limb
-C P5:
+C                          cycles/limb
+C P5
  C P6 model 0-8,10-12)
  C P6 model 9  (Banias)
  C P6 model 13 (Dothan)
@@ -31,9 +31,9 @@ C P4 model 1  (?)
  C P4 model 2  (Northwood)
  C P4 model 3  (Prescott)
  C P4 model 4  (Nocona)
-C K6:
-C K7:                            3.25
-C K8:
+C AMD K6
+C AMD K7                        3.25
+C AMD K8
  
  C TODO
  C  * Improve feed-in and wind-down code.  We beat the old code for all n != 1,
diff --git a/mpn/x86/k7/sublsh1_n.asm b/mpn/x86/k7/sublsh1_n.asm

new file mode 100644 (file)

index 0000000..9653485
--- /dev/null
+++ b/mpn/x86/k7/sublsh1_n.asm
@@ -0,0 +1,162 @@
+dnl  AMD K7 mpn_sublsh1_n_ip1 -- rp[] = rp[] - (up[] << 1)
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This is an attempt at a sublsh1_n for x86-32, not relying on sse2 insns.  The
+C innerloop is 2*3-way unrolled, which is best we can do with the available
+C registers.  It seems tricky to use the same structure for rsblsh1_n, since we
+C cannot feed carry between operations there.
+
+C                          cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    6.75
+C AMD K6
+C AMD K7
+C AMD K8
+
+C This is a basic sublsh1_n for k7, atom, and perhaps some other x86-32
+C processors.  It uses 2*4-way unrolling, for good reasons.
+C
+C Breaking carry recurrency might be a good idea.  We would then need separate
+C registers for the shift carry and add/subtract carry, which in turn would
+C force is to 2*2-way unrolling.
+
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,     8)
+defframe(PARAM_DST,     4)
+
+dnl  re-use parameter space
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+ASM_START()
+       TEXT
+       ALIGN(8)
+PROLOGUE(mpn_sublsh1_n_ip1)
+deflit(`FRAME',0)
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+
+       mov     PARAM_SIZE, %eax        C size
+       push    up                      FRAME_pushl()
+       push    rp                      FRAME_pushl()
+       xor     %edx, %edx
+       mov     PARAM_SRC, up
+       mov     PARAM_DST, rp
+       mov     %ebx, SAVE_EBX
+       mov     %eax, %ebx
+       shr     $3, %eax
+
+       not     %eax                    C count = -(size\8)-i
+       and     $7, %ebx                C size % 8
+       jz      L(exact)
+
+L(oop):
+ifdef(`CPU_P6',`
+       shr     %edx ')                 C restore 2nd saved carry bit
+       mov     (up), %ecx
+       adc     %ecx, %ecx
+       rcr     %edx                    C restore 1st saved carry bit
+       lea     4(up), up
+       sbb     %ecx, (rp)
+       lea     4(rp), rp
+       adc     %edx, %edx              C save a carry bit in edx
+ifdef(`CPU_P6',`
+       adc     %edx, %edx ')           C save another carry bit in edx
+       dec     %ebx
+       jnz     L(oop)
+L(exact):
+       inc     %eax
+       jz      L(end)
+       mov     %eax, VAR_COUNT
+       mov     %ebp, SAVE_EBP
+
+       ALIGN(16)
+L(top):
+ifdef(`CPU_P6',`
+       shr     %edx ')                 C restore 2nd saved carry bit
+       mov     (up), %eax
+       adc     %eax, %eax
+       mov     4(up), %ebx
+       adc     %ebx, %ebx
+       mov     8(up), %ecx
+       adc     %ecx, %ecx
+       mov     12(up), %ebp
+       adc     %ebp, %ebp
+
+       rcr     %edx                    C restore 1st saved carry bit
+
+       sbb     %eax, (rp)
+       sbb     %ebx, 4(rp)
+       sbb     %ecx, 8(rp)
+       sbb     %ebp, 12(rp)
+
+       mov     16(up), %eax
+       adc     %eax, %eax
+       mov     20(up), %ebx
+       adc     %ebx, %ebx
+       mov     24(up), %ecx
+       adc     %ecx, %ecx
+       mov     28(up), %ebp
+       adc     %ebp, %ebp
+
+       lea     32(up), up
+       adc     %edx, %edx              C save a carry bit in edx
+
+       sbb     %eax, 16(rp)
+       sbb     %ebx, 20(rp)
+       sbb     %ecx, 24(rp)
+       sbb     %ebp, 28(rp)
+
+ifdef(`CPU_P6',`
+       adc     %edx, %edx ')           C save another carry bit in edx
+       incl    VAR_COUNT
+       lea     32(rp), rp
+       jne     L(top)
+
+       mov     SAVE_EBP, %ebp
+L(end):
+       mov     SAVE_EBX, %ebx
+
+ifdef(`CPU_P6',`
+       xor     %eax, %eax
+       shr     $1, %edx
+       adc     %edx, %eax
+',`
+       adc     $0, %edx
+       mov     %edx, %eax
+')
+       pop     rp                      FRAME_popl()
+       pop     up                      FRAME_popl()
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/k8/gmp-mparam.h b/mpn/x86/k8/gmp-mparam.h

new file mode 100644 (file)

index 0000000..ab19342
--- /dev/null
+++ b/mpn/x86/k8/gmp-mparam.h
@@ -0,0 +1,148 @@
+/* x86/k8 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         10
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     12
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           40
+
+#define MUL_TOOM22_THRESHOLD                26
+#define MUL_TOOM33_THRESHOLD                81
+#define MUL_TOOM44_THRESHOLD               136
+#define MUL_TOOM6H_THRESHOLD               270
+#define MUL_TOOM8H_THRESHOLD               430
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      93
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      96
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     121
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 46
+#define SQR_TOOM3_THRESHOLD                 81
+#define SQR_TOOM4_THRESHOLD                202
+#define SQR_TOOM6_THRESHOLD                286
+#define SQR_TOOM8_THRESHOLD                430
+
+#define MULMID_TOOM42_THRESHOLD             56
+
+#define MULMOD_BNM1_THRESHOLD               17
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define MUL_FFT_MODF_THRESHOLD             888  /* k = 6 */
+#define MUL_FFT_TABLE3                                      \
+  { {    888, 6}, {     15, 5}, {     31, 6}, {     25, 7}, \
+    {     13, 6}, {     27, 7}, {     15, 6}, {     33, 7}, \
+    {     17, 6}, {     35, 7}, {     19, 6}, {     39, 7}, \
+    {     23, 6}, {     47, 7}, {     27, 8}, {     15, 7}, \
+    {     31, 6}, {     63, 7}, {     35, 8}, {     19, 7}, \
+    {     41, 8}, {     23, 7}, {     47, 8}, {     31, 7}, \
+    {     63, 8}, {     39, 7}, {     79, 9}, {     23, 8}, \
+    {     51, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
+    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
+    {     31, 9}, {     63, 8}, {    127, 9}, {     79,10}, \
+    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    167,10}, {     95, 9}, \
+    {    191,10}, {    111,11}, {     63,10}, {    127, 9}, \
+    {    255,10}, {    159,11}, {     95,10}, {    191,12}, \
+    {     63,11}, {    127,10}, {    271, 9}, {    543,10}, \
+    {    287,11}, {    159,10}, {    335,11}, {    191,10}, \
+    {    383, 9}, {    767,10}, {    399, 9}, {    799,11}, \
+    {    223,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 73
+#define MUL_FFT_THRESHOLD                 7552
+
+#define SQR_FFT_MODF_THRESHOLD             758  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    758, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     39, 7}, {     23, 6}, {     47, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     47, 8}, {     31, 7}, {     63, 8}, \
+    {     39, 9}, {     23, 8}, {     51, 9}, {     31, 8}, \
+    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
+    {     95, 9}, {     55,10}, {     31, 9}, {     63, 8}, \
+    {    127, 9}, {     79,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95, 9}, {    191,10}, {    111,11}, \
+    {     63,10}, {    127, 9}, {    255,10}, {    159,11}, \
+    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
+    {   1087,10}, {    287,11}, {    159,10}, {    319, 9}, \
+    {    671,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    399, 9}, {    799,12}, {   4096,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 74
+#define SQR_FFT_THRESHOLD                 7296
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  28
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 91
+#define DC_DIVAPPR_Q_THRESHOLD             280
+#define DC_BDIV_QR_THRESHOLD                87
+#define DC_BDIV_Q_THRESHOLD                222
+
+#define INV_MULMOD_BNM1_THRESHOLD           62
+#define INV_NEWTON_THRESHOLD               266
+#define INV_APPR_THRESHOLD                 268
+
+#define BINV_NEWTON_THRESHOLD              272
+#define REDC_1_TO_REDC_N_THRESHOLD          79
+
+#define MU_DIV_QR_THRESHOLD               1822
+#define MU_DIVAPPR_Q_THRESHOLD            1652
+#define MUPI_DIV_QR_THRESHOLD              108
+#define MU_BDIV_QR_THRESHOLD              1470
+#define MU_BDIV_Q_THRESHOLD               1470
+
+#define POWM_SEC_TABLE  3,21,195,961,2783
+
+#define MATRIX22_STRASSEN_THRESHOLD         19
+#define HGCD_THRESHOLD                     149
+#define HGCD_APPR_THRESHOLD                181
+#define HGCD_REDUCE_THRESHOLD             4633
+#define GCD_DC_THRESHOLD                   610
+#define GCDEXT_DC_THRESHOLD                419
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        31
+#define SET_STR_DC_THRESHOLD               272
+#define SET_STR_PRECOMPUTE_THRESHOLD      1330
+
+#define FAC_DSC_THRESHOLD                  438
+#define FAC_ODD_THRESHOLD                   24
diff --git a/mpn/x86/lshift.asm b/mpn/x86/lshift.asm

index 5598599f8bb2872e8b56a696d62c44e2d278c5e7..b0aafd3d0970e1ef941172044044d8a6e9c685ec 100644 (file)
--- a/mpn/x86/lshift.asm
+++ b/mpn/x86/lshift.asm
@@ -22,12 +22,12 @@ include(`../config.m4')
  
  
  C     cycles/limb
-C P54:   7.5
-C P55:   7.0
-C P6:    2.5
-C K6:    4.5
-C K7:    5.0
-C P4:   14.5
+C P54   7.5
+C P55   7.0
+C P6    2.5
+C K6    4.5
+C K7    5.0
+C P4   14.5
  
  
  C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/mod_34lsub1.asm b/mpn/x86/mod_34lsub1.asm

index 68b4a73dbc78b060c3c55cf79a1ac973978453fd..4167d2cde4badd6d715b553485f5f77ad9c04609 100644 (file)
--- a/mpn/x86/mod_34lsub1.asm
+++ b/mpn/x86/mod_34lsub1.asm
@@ -21,11 +21,11 @@ include(`../config.m4')
  
  
  C      cycles/limb
-C P5:     3.0
-C P6:     3.66
-C K6:     3.0
-C K7:     1.3
-C P4:     9
+C P5     3.0
+C P6     3.66
+C K6     3.0
+C K7     1.3
+C P4     9
  
  
  C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
diff --git a/mpn/x86/mul_1.asm b/mpn/x86/mul_1.asm

index 1d715ece7e77ecac2fd586e5e9232ea5246f9fd1..24fe46bc7a11a74e7dfa7a641d6cdbcb3a99ef52 100644 (file)
--- a/mpn/x86/mul_1.asm
+++ b/mpn/x86/mul_1.asm
@@ -22,19 +22,19 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C                           cycles/limb
-C P5:                           12.5
-C P6 model 0-8,10-12)            5.5
+C                          cycles/limb
+C P5                           12.5
+C P6 model 0-8,10-12            5.5
  C P6 model 9  (Banias)
-C P6 model 13 (Dothan)           5.25
-C P4 model 0  (Willamette)      19.0
-C P4 model 1  (?)               19.0
-C P4 model 2  (Northwood)       19.0
+C P6 model 13 (Dothan)          5.25
+C P4 model 0  (Willamette)     19.0
+C P4 model 1  (?)              19.0
+C P4 model 2  (Northwood)      19.0
  C P4 model 3  (Prescott)
  C P4 model 4  (Nocona)
-C K6:                           10.5
-C K7:                            4.5
-C K8:
+C AMD K6                       10.5
+C AMD K7                        4.5
+C AMD K8
  
  
  C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/mul_basecase.asm b/mpn/x86/mul_basecase.asm

index 7918ea07f3430a585d3dbc5424e7b5a0d082689a..1400fac845da7d8592e3600548818d41938bde47 100644 (file)
--- a/mpn/x86/mul_basecase.asm
+++ b/mpn/x86/mul_basecase.asm
@@ -23,11 +23,11 @@ include(`../config.m4')
  
  
  C     cycles/crossproduct
-C P5:     15
-C P6:      7.5
-C K6:     12.5
-C K7:      5.5
-C P4:     24
+C P5     15
+C P6      7.5
+C K6     12.5
+C K7      5.5
+C P4     24
  
  
  C void mpn_mul_basecase (mp_ptr wp,
diff --git a/mpn/x86/nano/gmp-mparam.h b/mpn/x86/nano/gmp-mparam.h

new file mode 100644 (file)

index 0000000..5fa5093
--- /dev/null
+++ b/mpn/x86/nano/gmp-mparam.h
@@ -0,0 +1,152 @@
+/* x86/nano gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_1P_METHOD                      1
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         10
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        53
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     12
+#define USE_PREINV_DIVREM_1                  1
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           32
+
+#define MUL_TOOM22_THRESHOLD                16
+#define MUL_TOOM33_THRESHOLD               132
+#define MUL_TOOM44_THRESHOLD               195
+#define MUL_TOOM6H_THRESHOLD               270
+#define MUL_TOOM8H_THRESHOLD               478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     129
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     138
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     130
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     135
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 28
+#define SQR_TOOM3_THRESHOLD                194
+#define SQR_TOOM4_THRESHOLD                502
+#define SQR_TOOM6_THRESHOLD                746
+#define SQR_TOOM8_THRESHOLD               1005
+
+#define MULMID_TOOM42_THRESHOLD             40
+
+#define MULMOD_BNM1_THRESHOLD               14
+#define SQRMOD_BNM1_THRESHOLD               19
+
+#define POWM_SEC_TABLE  4,23,258,828,2246
+
+#define MUL_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    308, 5}, {     13, 6}, {      7, 5}, {     17, 6}, \
+    {      9, 5}, {     19, 6}, {     11, 5}, {     23, 6}, \
+    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     11, 6}, {     24, 7}, {     15, 6}, \
+    {     31, 7}, {     19, 8}, {     11, 7}, {     25, 8}, \
+    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     47, 9}, {     15, 8}, {     31, 7}, \
+    {     63, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
+    {     15, 9}, {     31, 8}, {     63, 9}, {     47,10}, \
+    {     31, 9}, {     71,10}, {     47, 9}, {     95,11}, \
+    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
+    {     79, 9}, {    159,10}, {     95, 9}, {    191,11}, \
+    {     63,10}, {    127, 9}, {    255, 8}, {    543, 9}, \
+    {    287, 8}, {    575, 7}, {   1215,10}, {    159,11}, \
+    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
+    {    255, 9}, {    543, 8}, {   1087,10}, {    287, 9}, \
+    {    607, 8}, {   1215,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    351, 9}, {    703, 8}, {   1407, 9}, \
+    {    735, 8}, {   1471,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    415, 9}, {    831,11}, {    223,10}, \
+    {    447, 9}, {    895,10}, {    479, 9}, {    959, 8}, \
+    {   1919,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 89
+#define MUL_FFT_THRESHOLD                 1856
+
+#define SQR_FFT_MODF_THRESHOLD             396  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    396, 5}, {     13, 6}, {      7, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
+    {     25, 7}, {     15, 6}, {     31, 7}, {     19, 6}, \
+    {     39, 7}, {     21, 8}, {     11, 7}, {     23, 6}, \
+    {     47, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
+    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
+    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
+    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
+    {     47,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    127,10}, \
+    {     79, 9}, {    159,10}, {     95,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    543,10}, {    143, 9}, \
+    {    287, 8}, {    607, 7}, {   1215, 6}, {   2431,10}, \
+    {    159, 8}, {    639,11}, {     95,10}, {    191,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    543, 8}, \
+    {   1087,10}, {    287, 9}, {    607, 8}, {   1215,11}, \
+    {    159,10}, {    319, 9}, {    671,10}, {    351, 9}, \
+    {    703, 8}, {   1407, 9}, {    735, 8}, {   1471, 7}, \
+    {   2943,11}, {    191,10}, {    383, 9}, {    799,10}, \
+    {    415, 9}, {    895,10}, {    479,12}, {   4096,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 87
+#define SQR_FFT_THRESHOLD                 2368
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  51
+#define MULLO_MUL_N_THRESHOLD             3369
+
+#define DC_DIV_QR_THRESHOLD                 56
+#define DC_DIVAPPR_Q_THRESHOLD             183
+#define DC_BDIV_QR_THRESHOLD                55
+#define DC_BDIV_Q_THRESHOLD                118
+
+#define INV_MULMOD_BNM1_THRESHOLD           30
+#define INV_NEWTON_THRESHOLD               266
+#define INV_APPR_THRESHOLD                 218
+
+#define BINV_NEWTON_THRESHOLD              268
+#define REDC_1_TO_REDC_N_THRESHOLD          56
+
+#define MU_DIV_QR_THRESHOLD               1308
+#define MU_DIVAPPR_Q_THRESHOLD            1528
+#define MUPI_DIV_QR_THRESHOLD              124
+#define MU_BDIV_QR_THRESHOLD               855
+#define MU_BDIV_Q_THRESHOLD               1334
+
+#define MATRIX22_STRASSEN_THRESHOLD         14
+#define HGCD_THRESHOLD                     104
+#define HGCD_APPR_THRESHOLD                139
+#define HGCD_REDUCE_THRESHOLD             2121
+#define GCD_DC_THRESHOLD                   456
+#define GCDEXT_DC_THRESHOLD                321
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                11
+#define GET_STR_PRECOMPUTE_THRESHOLD        25
+#define SET_STR_DC_THRESHOLD               542
+#define SET_STR_PRECOMPUTE_THRESHOLD       840
diff --git a/mpn/x86/p6/README b/mpn/x86/p6/README

index 1ded4e7177af7937c46a41f856f55afe0e4f23b2..c8e6f0d6710f9f9e6d3b0dfeabfa73814e6bfa69 100644 (file)
--- a/mpn/x86/p6/README
+++ b/mpn/x86/p6/README
@@ -52,7 +52,7 @@ Some of these might be able to be improved.
  
         mpn_mul_basecase          8.2 cycles/crossproduct (approx)
         mpn_sqr_basecase          4.0 cycles/crossproduct (approx)
-                                 or 7.75 cycles/triangleproduct (approx)
+                                 or 7.75 cycles/triangleproduct (approx)
  
  Pentium II and III have MMX and get the following improvements.
  
diff --git a/mpn/x86/p6/aors_n.asm b/mpn/x86/p6/aors_n.asm

index 784ed08d6e8d96154681dcb9fae446802618328b..3bf03627844d075228f68244c7a5c5ecf6e4f990 100644 (file)
--- a/mpn/x86/p6/aors_n.asm
+++ b/mpn/x86/p6/aors_n.asm
@@ -23,10 +23,10 @@ C TODO:
  C  * Avoid indexed addressing, it makes us stall on the two-ported register
  C    file.
  
-C                           cycles/limb
-C P6 model 0-8,10-12)           3.17
-C P6 model 9   (Banias)         ?
-C P6 model 13  (Dothan)         2.25
+C                          cycles/limb
+C P6 model 0-8,10-12           3.17
+C P6 model 9   (Banias)                2.15
+C P6 model 13  (Dothan)                2.25
  
  
  define(`rp',   `%edi')
diff --git a/mpn/x86/p6/aorsmul_1.asm b/mpn/x86/p6/aorsmul_1.asm

index d0b0cef62016d792a4e8d2f110dc7add7393fce3..2f9246795dd9e2270f80c4bb877db451ecaf2dcb 100644 (file)
--- a/mpn/x86/p6/aorsmul_1.asm
+++ b/mpn/x86/p6/aorsmul_1.asm
@@ -20,19 +20,19 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C                           cycles/limb
-C P5:
-C P6 model 0-8,10-12)            6.44
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)           6.11
+C                          cycles/limb
+C P5
+C P6 model 0-8,10-12            6.44
+C P6 model 9  (Banias)          6.15
+C P6 model 13 (Dothan)          6.11
  C P4 model 0  (Willamette)
  C P4 model 1  (?)
  C P4 model 2  (Northwood)
  C P4 model 3  (Prescott)
  C P4 model 4  (Nocona)
-C K6:
-C K7:
-C K8:
+C AMD K6
+C AMD K7
+C AMD K8
  
  
  dnl  P6 UNROLL_COUNT cycles/limb
diff --git a/mpn/x86/p6/bdiv_q_1.asm b/mpn/x86/p6/bdiv_q_1.asm

new file mode 100644 (file)

index 0000000..0ffbc78
--- /dev/null
+++ b/mpn/x86/p6/bdiv_q_1.asm
@@ -0,0 +1,275 @@
+dnl  Intel P6 mpn_modexact_1_odd -- exact division style remainder.
+
+dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  Rearranged from mpn/x86/p6/dive_1.asm by Marco Bodrato.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C       odd  even  divisor
+C P6:  10.0  12.0  cycles/limb
+
+C MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+
+C The odd case is basically the same as mpn_modexact_1_odd, just with an
+C extra store, and it runs at the same 10 cycles which is the dependent
+C chain.
+C
+C The shifts for the even case aren't on the dependent chain so in principle
+C it could run the same too, but nothing running at 10 has been found.
+C Perhaps there's too many uops (an extra 4 over the odd case).
+
+defframe(PARAM_SHIFT,  24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,     8)
+defframe(PARAM_DST,     4)
+
+defframe(SAVE_EBX,     -4)
+defframe(SAVE_ESI,     -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+deflit(STACK_SPACE, 16)
+
+dnl  re-use parameter space
+define(VAR_INVERSE,`PARAM_SRC')
+
+       TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                  mp_limb_t inverse, int shift)
+
+       ALIGN(16)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+       subl    $STACK_SPACE, %esp      FRAME_subl_esp(STACK_SPACE)
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_SIZE, %ebx
+
+       movl    %ebp, SAVE_EBP
+       movl    PARAM_INVERSE, %ebp
+
+       movl    PARAM_SHIFT, %ecx       C trailing twos
+
+L(common):
+       movl    %edi, SAVE_EDI
+       movl    PARAM_DST, %edi
+
+       leal    (%esi,%ebx,4), %esi     C src end
+
+       leal    (%edi,%ebx,4), %edi     C dst end
+       negl    %ebx                    C -size
+
+       movl    (%esi,%ebx,4), %eax     C src[0]
+
+       orl     %ecx, %ecx
+       jz      L(odd_entry)
+
+       movl    %edi, PARAM_DST
+       movl    %ebp, VAR_INVERSE
+
+L(even):
+       C eax   src[0]
+       C ebx   counter, limbs, negative
+       C ecx   shift
+       C edx
+       C esi
+       C edi
+       C ebp
+
+       xorl    %ebp, %ebp              C initial carry bit
+       xorl    %edx, %edx              C initial carry limb (for size==1)
+
+       incl    %ebx
+       jz      L(even_one)
+
+       movl    (%esi,%ebx,4), %edi     C src[1]
+
+       shrdl(  %cl, %edi, %eax)
+
+       jmp     L(even_entry)
+
+
+L(even_top):
+       C eax   scratch
+       C ebx   counter, limbs, negative
+       C ecx   shift
+       C edx   scratch
+       C esi   &src[size]
+       C edi   &dst[size] and scratch
+       C ebp   carry bit
+
+       movl    (%esi,%ebx,4), %edi
+
+       mull    PARAM_DIVISOR
+
+       movl    -4(%esi,%ebx,4), %eax
+       shrdl(  %cl, %edi, %eax)
+
+       subl    %ebp, %eax
+
+       sbbl    %ebp, %ebp
+       subl    %edx, %eax
+
+       sbbl    $0, %ebp
+
+L(even_entry):
+       imull   VAR_INVERSE, %eax
+
+       movl    PARAM_DST, %edi
+       negl    %ebp
+
+       movl    %eax, -4(%edi,%ebx,4)
+       incl    %ebx
+       jnz     L(even_top)
+
+       mull    PARAM_DIVISOR
+
+       movl    -4(%esi), %eax
+
+L(even_one):
+       shrl    %cl, %eax
+       movl    SAVE_ESI, %esi
+
+       subl    %ebp, %eax
+       movl    SAVE_EBP, %ebp
+
+       subl    %edx, %eax
+       movl    SAVE_EBX, %ebx
+
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi)
+       movl    SAVE_EDI, %edi
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+C The dependent chain here is
+C
+C      subl    %edx, %eax       1
+C      imull   %ebp, %eax       4
+C      mull    PARAM_DIVISOR    5
+C                             ----
+C      total                   10
+C
+C and this is the measured speed.  No special scheduling is necessary, out
+C of order execution hides the load latency.
+
+L(odd_top):
+       C eax   scratch (src limb)
+       C ebx   counter, limbs, negative
+       C ecx   carry bit
+       C edx   carry limb, high of last product
+       C esi   &src[size]
+       C edi   &dst[size]
+       C ebp   inverse
+
+       mull    PARAM_DIVISOR
+
+       movl    (%esi,%ebx,4), %eax
+       subl    %ecx, %eax
+
+       sbbl    %ecx, %ecx
+       subl    %edx, %eax
+
+       sbbl    $0, %ecx
+
+L(odd_entry):
+       imull   %ebp, %eax
+
+       movl    %eax, (%edi,%ebx,4)
+       negl    %ecx
+
+       incl    %ebx
+       jnz     L(odd_top)
+
+
+       movl    SAVE_ESI, %esi
+
+       movl    SAVE_EDI, %edi
+
+       movl    SAVE_EBP, %ebp
+
+       movl    SAVE_EBX, %ebx
+       addl    $STACK_SPACE, %esp
+
+       ret
+
+EPILOGUE()
+
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                           mp_limb_t divisor);
+C
+
+       ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_DIVISOR, %eax
+       subl    $STACK_SPACE, %esp      FRAME_subl_esp(STACK_SPACE)
+
+       movl    %esi, SAVE_ESI
+       movl    PARAM_SRC, %esi
+
+       movl    %ebx, SAVE_EBX
+       movl    PARAM_SIZE, %ebx
+
+       bsfl    %eax, %ecx              C trailing twos
+
+       movl    %ebp, SAVE_EBP
+
+       shrl    %cl, %eax               C d without twos
+
+       movl    %eax, %edx
+       shrl    %eax                    C d/2 without twos
+
+       movl    %edx, PARAM_DIVISOR
+       andl    $127, %eax
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %ebp)
+       movzbl  (%eax,%ebp), %ebp               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %ebp  C inv 8 bits
+')
+
+       leal    (%ebp,%ebp), %eax       C 2*inv
+
+       imull   %ebp, %ebp              C inv*inv
+       imull   %edx, %ebp      C inv*inv*d
+
+       subl    %ebp, %eax              C inv = 2*inv - inv*inv*d
+       leal    (%eax,%eax), %ebp       C 2*inv
+
+       imull   %eax, %eax              C inv*inv
+       imull   %edx, %eax      C inv*inv*d
+
+       subl    %eax, %ebp              C inv = 2*inv - inv*inv*d
+
+       jmp     L(common)
+
+EPILOGUE()
diff --git a/mpn/x86/p6/dive_1.asm b/mpn/x86/p6/dive_1.asm

index 23d1d53af59b742d7be6cdbb4a688ade2c7ca5f6..bccd5fd944b9b6192bd8a3763c4c1c18182de7f5 100644 (file)
--- a/mpn/x86/p6/dive_1.asm
+++ b/mpn/x86/p6/dive_1.asm
@@ -127,7 +127,7 @@ C   subl    %edx, %eax       1
  C      imull   %ebp, %eax       4
  C      mull    PARAM_DIVISOR    5
  C                             ----
-C       total                  10
+C      total                   10
  C
  C and this is the measured speed.  No special scheduling is necessary, out
  C of order execution hides the load latency.
diff --git a/mpn/x86/p6/gcd_1.asm b/mpn/x86/p6/gcd_1.asm

new file mode 100644 (file)

index 0000000..6b4f277
--- /dev/null
+++ b/mpn/x86/p6/gcd_1.asm
@@ -0,0 +1,146 @@
+dnl  x86 mpn_gcd_1 optimised for processors with fast BSF.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked by Torbjorn Granlund.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/bit (approx)
+C AMD K7        7.80
+C AMD K8,K9     7.79
+C AMD K10       4.08
+C AMD bd1       ?
+C AMD bobcat    7.82
+C Intel P4-2   14.9
+C Intel P4-3/4 14.0
+C Intel P6/13   5.09
+C Intel core2   4.22
+C Intel NHM     5.00
+C Intel SBR     5.00
+C Intel atom   17.1
+C VIA nano     ?
+C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
+
+C Threshold of when to call bmod when U is one limb.  Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 6)
+
+
+define(`up',    `%edi')
+define(`n',     `%esi')
+define(`v0',    `%edx')
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+       push    %edi
+       push    %esi
+
+       mov     12(%esp), up
+       mov     16(%esp), n
+       mov     20(%esp), v0
+
+       mov     (up), %eax      C U low limb
+       or      v0, %eax
+       bsf     %eax, %eax      C min(ctz(u0),ctz(v0))
+
+       bsf     v0, %ecx
+       shr     %cl, v0
+
+       push    %eax            C preserve common twos over call
+       push    v0              C preserve v0 argument over call
+
+       cmp     $1, n
+       jnz     L(reduce_nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+       mov     (up), %ecx
+       mov     %ecx, %eax
+       shr     $BMOD_THRES_LOG2, %ecx
+       cmp     %ecx, v0
+       ja      L(reduced)
+       jmp     L(bmod)
+
+L(reduce_nby1):
+       cmp     $BMOD_1_TO_MOD_1_THRESHOLD, n
+       jl      L(bmod)
+ifdef(`PIC_WITH_EBX',`
+       push    %ebx
+       call    L(movl_eip_to_ebx)
+       add     $_GLOBAL_OFFSET_TABLE_, %ebx
+')
+       push    v0              C param 3
+       push    n               C param 2
+       push    up              C param 1
+       CALL(   mpn_mod_1)
+       jmp     L(called)
+
+L(bmod):
+ifdef(`PIC_WITH_EBX',`dnl
+       push    %ebx
+       call    L(movl_eip_to_ebx)
+       add     $_GLOBAL_OFFSET_TABLE_, %ebx
+')
+       push    v0              C param 3
+       push    n               C param 2
+       push    up              C param 1
+       CALL(   mpn_modexact_1_odd)
+
+L(called):
+       add     $12, %esp       C deallocate params
+ifdef(`PIC_WITH_EBX',`dnl
+       pop     %ebx
+')
+L(reduced):
+       pop     %edx
+
+       bsf     %eax, %ecx
+C      test    %eax, %eax      C FIXME: does this lower latency?
+       jnz     L(mid)
+       jmp     L(end)
+
+       ALIGN(16)               C               K10   BD    C2    NHM   SBR
+L(top):        cmovc(  %esi, %eax)     C if x-y < 0    0,3   0,3   0,6   0,5   0,5
+       cmovc(  %edi, %edx)     C use x,y-x     0,3   0,3   2,8   1,7   1,7
+L(mid):        shr     %cl, %eax       C               1,7   1,6   2,8   2,8   2,8
+       mov     %edx, %esi      C               1     1     4     3     3
+       sub     %eax, %esi      C               2     2     5     4     4
+       bsf     %esi, %ecx      C               3     3     6     5     5
+       mov     %eax, %edi      C               2     2     3     3     4
+       sub     %edx, %eax      C               2     2     4     3     4
+       jnz     L(top)          C
+
+L(end):        pop     %ecx
+       mov     %edx, %eax
+       shl     %cl, %eax
+
+       pop     %esi
+       pop     %edi
+       ret
+
+ifdef(`PIC_WITH_EBX',`dnl
+L(movl_eip_to_ebx):
+       mov     (%esp), %ebx
+       ret
+')
+EPILOGUE()
diff --git a/mpn/x86/p6/gmp-mparam.h b/mpn/x86/p6/gmp-mparam.h

index 8ff8d48078f02e30c8a59492ff3c632781498a87..eabe5a2d1594511c3f64b8d5f04e8d3b00134a23 100644 (file)
--- a/mpn/x86/p6/gmp-mparam.h
+++ b/mpn/x86/p6/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* Intel P6 gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2008, 2009, 2010, 2012
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,44 +23,161 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define BYTES_PER_MP_LIMB 4
  
  
-/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be smaller than
-   the value in mpn/x86/p6/mmx/gmp-mparam.h.  The former is used as a hard
-   limit in mpn/x86/p6/sqr_basecase.asm, and that file will be run by the
-   p6/mmx cpus (pentium2, pentium3).  */
-
-
-/* 200MHz Pentium Pro */
-
-/* Generated by tuneup.c, 2003-02-12, gcc 2.95 */
-
-#define MUL_TOOM22_THRESHOLD             23
-#define MUL_TOOM33_THRESHOLD            140
-
-#define SQR_BASECASE_THRESHOLD            0  /* always */
-#define SQR_TOOM2_THRESHOLD              52
-#define SQR_TOOM3_THRESHOLD             189
-
-#define DIV_SB_PREINV_THRESHOLD           0  /* always */
-#define DIV_DC_THRESHOLD                116
-#define POWM_THRESHOLD                  131
-
-#define GCD_ACCEL_THRESHOLD               3
-#define JACOBI_BASE_METHOD                1
-
-#define USE_PREINV_DIVREM_1               0
-#define USE_PREINV_MOD_1                  1  /* native */
-#define DIVREM_2_THRESHOLD                0  /* always */
-#define DIVEXACT_1_THRESHOLD              0  /* always */
-#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
-
-#define GET_STR_DC_THRESHOLD             18
-#define GET_STR_PRECOMPUTE_THRESHOLD     23
-#define SET_STR_THRESHOLD              6093
-
-#define MUL_FFT_TABLE  { 464, 928, 1920, 3584, 10240, 40960, 0 }
-#define MUL_FFT_MODF_THRESHOLD          360
-#define MUL_FFT_THRESHOLD              2816
-
-#define SQR_FFT_TABLE  { 528, 1184, 1920, 4608, 14336, 40960, 0 }
-#define SQR_FFT_MODF_THRESHOLD          440
-#define SQR_FFT_THRESHOLD              2816
+/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
+   value in mpn/x86/p6/gmp-mparam.h.  The latter is used as a hard limit in
+   mpn/x86/p6/sqr_basecase.asm.  */
+
+
+/* 1867 MHz P6 model 13 */
+
+#define MOD_1_NORM_THRESHOLD                 4
+#define MOD_1_UNNORM_THRESHOLD               4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           21
+
+#define MUL_TOOM22_THRESHOLD                20
+#define MUL_TOOM33_THRESHOLD                74
+#define MUL_TOOM44_THRESHOLD               181
+#define MUL_TOOM6H_THRESHOLD               252
+#define MUL_TOOM8H_THRESHOLD               363
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     115
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 30
+#define SQR_TOOM3_THRESHOLD                101
+#define SQR_TOOM4_THRESHOLD                154
+#define SQR_TOOM6_THRESHOLD                222
+#define SQR_TOOM8_THRESHOLD                527
+
+#define MULMID_TOOM42_THRESHOLD             58
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               17
+
+#define POWM_SEC_TABLE  4,23,258,768,2388
+
+#define MUL_FFT_MODF_THRESHOLD             565  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    565, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     25, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
+    {     31, 7}, {     17, 6}, {     35, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 5}, \
+    {    383, 4}, {    991, 5}, {    511, 6}, {    267, 7}, \
+    {    157, 8}, {     91, 9}, {     47, 8}, {    111, 9}, \
+    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
+    {     79, 9}, {    159,10}, {     95,11}, {     63,10}, \
+    {    143, 9}, {    287,10}, {    159,11}, {     95,10}, \
+    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
+    {    159,10}, {    335, 9}, {    671,11}, {    191,10}, \
+    {    383, 9}, {    767,10}, {    399, 9}, {    799,10}, \
+    {    415,11}, {    223,12}, {    127,11}, {    255,10}, \
+    {    543, 9}, {   1087,11}, {    287,10}, {    607,11}, \
+    {    319,10}, {    671,12}, {    191,11}, {    383,10}, \
+    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
+    {    255,11}, {    543,10}, {   1087,11}, {    607,10}, \
+    {   1215,12}, {    319,11}, {    671,10}, {   1343,11}, \
+    {    735,10}, {   1471,12}, {    383,11}, {    799,10}, \
+    {   1599,11}, {    863,12}, {    447,11}, {    959,13}, \
+    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
+    {   1215,12}, {    639,11}, {   1343,12}, {    703,11}, \
+    {   1471,13}, {    383,12}, {    831,11}, {   1727,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
+    {    639,12}, {   1471,11}, {   2943,13}, {    767,12}, \
+    {   1727,13}, {    895,12}, {   1919,14}, {    511,13}, \
+    {   1023,12}, {   2111,13}, {   1151,12}, {   2431,13}, \
+    {   1407,12}, {   2815,14}, {    767,13}, {   1663,12}, \
+    {   3455,13}, {   8192,14}, {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 132
+#define MUL_FFT_THRESHOLD                 6784
+
+#define SQR_FFT_MODF_THRESHOLD             472  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    472, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     17, 6}, {     35, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
+    {     31, 8}, {     63, 4}, {   1023, 8}, {     67, 9}, \
+    {     39, 5}, {    639, 4}, {   1471, 6}, {    383, 7}, \
+    {    209, 8}, {    119, 9}, {     63, 7}, {    255, 8}, \
+    {    139, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
+    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
+    {    135,10}, {     79, 9}, {    159, 8}, {    319, 9}, \
+    {    167,10}, {     95,11}, {     63,10}, {    143, 9}, \
+    {    287,10}, {    159,11}, {     95,10}, {    191,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    543, 8}, \
+    {   1087,10}, {    287, 9}, {    575,11}, {    159,10}, \
+    {    319, 9}, {    639,10}, {    335, 9}, {    671,10}, \
+    {    351, 9}, {    703,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    399, 9}, {    799,10}, {    415, 9}, \
+    {    831,11}, {    223,12}, {    127,11}, {    255,10}, \
+    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
+    {   1215,11}, {    319,10}, {    671, 9}, {   1343,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
+    {    255,11}, {    543,10}, {   1087,11}, {    607,12}, \
+    {    319,11}, {    671,10}, {   1343,11}, {    735,12}, \
+    {    383,11}, {    799,10}, {   1599,11}, {    863,12}, \
+    {    447,11}, {    959,13}, {    255,12}, {    511,11}, \
+    {   1087,12}, {    575,11}, {   1215,12}, {    639,11}, \
+    {   1343,12}, {    703,11}, {   1471,13}, {    383,12}, \
+    {    767,11}, {   1599,12}, {    831,11}, {   1727,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
+    {    639,12}, {   1471,13}, {    767,12}, {   1727,13}, \
+    {    895,12}, {   1919,14}, {    511,13}, {   1023,12}, \
+    {   2111,13}, {   1151,12}, {   2431,13}, {   1407,14}, \
+    {    767,13}, {   1663,12}, {   3455,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 146
+#define SQR_FFT_THRESHOLD                 5760
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  33
+#define MULLO_MUL_N_THRESHOLD            13463
+
+#define DC_DIV_QR_THRESHOLD                 20
+#define DC_DIVAPPR_Q_THRESHOLD              56
+#define DC_BDIV_QR_THRESHOLD                60
+#define DC_BDIV_Q_THRESHOLD                134
+
+#define INV_MULMOD_BNM1_THRESHOLD           38
+#define INV_NEWTON_THRESHOLD                66
+#define INV_APPR_THRESHOLD                  63
+
+#define BINV_NEWTON_THRESHOLD              250
+#define REDC_1_TO_REDC_N_THRESHOLD          63
+
+#define MU_DIV_QR_THRESHOLD               1164
+#define MU_DIVAPPR_Q_THRESHOLD             979
+#define MUPI_DIV_QR_THRESHOLD               38
+#define MU_BDIV_QR_THRESHOLD              1442
+#define MU_BDIV_Q_THRESHOLD               1470
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                      64
+#define HGCD_APPR_THRESHOLD                105
+#define HGCD_REDUCE_THRESHOLD             3524
+#define GCD_DC_THRESHOLD                   386
+#define GCDEXT_DC_THRESHOLD                309
+#define JACOBI_BASE_METHOD                   1
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        26
+#define SET_STR_DC_THRESHOLD               587
+#define SET_STR_PRECOMPUTE_THRESHOLD      1104
diff --git a/mpn/x86/p6/mmx/gmp-mparam.h b/mpn/x86/p6/mmx/gmp-mparam.h

index f239422bf642744d1a9f2bd53adec898a00e9eb7..7bfed234ed9e49638bd94731072ae0e1bdacf87a 100644 (file)
--- a/mpn/x86/p6/mmx/gmp-mparam.h
+++ b/mpn/x86/p6/mmx/gmp-mparam.h
@@ -31,12 +31,12 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  /* 800 MHz P6 model 8 */
  
  #define MOD_1_NORM_THRESHOLD                 4
-#define MOD_1_UNNORM_THRESHOLD               5
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
+#define MOD_1_UNNORM_THRESHOLD               4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
  #define MOD_1U_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        12
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        10
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     17
  #define USE_PREINV_DIVREM_1                  1  /* native */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
  #define BMOD_1_TO_MOD_1_THRESHOLD           49
@@ -53,7 +53,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 48
+#define SQR_TOOM2_THRESHOLD                 30
  #define SQR_TOOM3_THRESHOLD                 81
  #define SQR_TOOM4_THRESHOLD                142
  #define SQR_TOOM6_THRESHOLD                258
@@ -162,7 +162,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define DC_BDIV_QR_THRESHOLD                76
  #define DC_BDIV_Q_THRESHOLD                175
  
-#define INV_MULMOD_BNM1_THRESHOLD           82
+#define INV_MULMOD_BNM1_THRESHOLD           42
  #define INV_NEWTON_THRESHOLD               268
  #define INV_APPR_THRESHOLD                 250
  
@@ -179,7 +179,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define HGCD_THRESHOLD                     121
  #define GCD_DC_THRESHOLD                   478
  #define GCDEXT_DC_THRESHOLD                361
-#define JACOBI_BASE_METHOD                   1
+#define JACOBI_BASE_METHOD                   4
  
  #define GET_STR_DC_THRESHOLD                13
  #define GET_STR_PRECOMPUTE_THRESHOLD        26
diff --git a/mpn/x86/p6/mode1o.asm b/mpn/x86/p6/mode1o.asm

index 7361164cd17ebccc4b88f057a362999395c054e5..dc88b058a47f9255df33456b9f3dc8cdc5533ecc 100644 (file)
--- a/mpn/x86/p6/mode1o.asm
+++ b/mpn/x86/p6/mode1o.asm
@@ -113,7 +113,7 @@ C   subl    %edx, %eax       1
  C      imull   %edi, %eax       4
  C      mull    PARAM_DIVISOR    5
  C                             ----
-C       total                  10
+C      total                   10
  C
  C and this is the measured speed.  No special scheduling is necessary, out
  C of order execution hides the load latency.
diff --git a/mpn/x86/p6/sse2/gmp-mparam.h b/mpn/x86/p6/sse2/gmp-mparam.h

index ecef4362440265e81c37a91d5cdc6020adb907c4..35de2674899d77b6a647629a72e4671aef53a7a7 100644 (file)
--- a/mpn/x86/p6/sse2/gmp-mparam.h
+++ b/mpn/x86/p6/sse2/gmp-mparam.h
@@ -31,38 +31,42 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  /* 1867 MHz P6 model 13 */
  
  #define MOD_1_NORM_THRESHOLD                 4
-#define MOD_1_UNNORM_THRESHOLD               6
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         8
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
+#define MOD_1_UNNORM_THRESHOLD               4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           22
+#define BMOD_1_TO_MOD_1_THRESHOLD           21
  
  #define MUL_TOOM22_THRESHOLD                20
  #define MUL_TOOM33_THRESHOLD                77
-#define MUL_TOOM44_THRESHOLD               182
-#define MUL_TOOM6H_THRESHOLD               252
+#define MUL_TOOM44_THRESHOLD               169
+#define MUL_TOOM6H_THRESHOLD               246
  #define MUL_TOOM8H_THRESHOLD               381
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      75
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     115
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      79
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     106
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
  #define SQR_TOOM2_THRESHOLD                 30
  #define SQR_TOOM3_THRESHOLD                101
  #define SQR_TOOM4_THRESHOLD                154
  #define SQR_TOOM6_THRESHOLD                222
-#define SQR_TOOM8_THRESHOLD                547
+#define SQR_TOOM8_THRESHOLD                527
+
+#define MULMID_TOOM42_THRESHOLD             58
  
  #define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               18
+#define SQRMOD_BNM1_THRESHOLD               17
  
-#define MUL_FFT_MODF_THRESHOLD             565  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             690  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
    { {    565, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
      {     25, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
@@ -98,9 +102,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      {   1407,12}, {   2815,14}, {    767,13}, {   1663,12}, \
      {   3455,13}, {   8192,14}, {  16384,15}, {  32768,16} }
  #define MUL_FFT_TABLE3_SIZE 132
-#define MUL_FFT_THRESHOLD                 6784
+#define MUL_FFT_THRESHOLD                 7424
  
-#define SQR_FFT_MODF_THRESHOLD             472  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             565  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
    { {    472, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
      {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
@@ -143,34 +147,41 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define SQR_FFT_THRESHOLD                 5760
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  34
+#define MULLO_DC_THRESHOLD                  31
  #define MULLO_MUL_N_THRESHOLD            13463
  
-#define DC_DIV_QR_THRESHOLD                 19
-#define DC_DIVAPPR_Q_THRESHOLD              56
+#define DC_DIV_QR_THRESHOLD                 25
+#define DC_DIVAPPR_Q_THRESHOLD              55
  #define DC_BDIV_QR_THRESHOLD                60
  #define DC_BDIV_Q_THRESHOLD                132
  
  #define INV_MULMOD_BNM1_THRESHOLD           38
-#define INV_NEWTON_THRESHOLD                69
+#define INV_NEWTON_THRESHOLD                65
  #define INV_APPR_THRESHOLD                  65
  
-#define BINV_NEWTON_THRESHOLD              276
-#define REDC_1_TO_REDC_N_THRESHOLD          63
+#define BINV_NEWTON_THRESHOLD              252
+#define REDC_1_TO_REDC_N_THRESHOLD          62
  
-#define MU_DIV_QR_THRESHOLD               1308
-#define MU_DIVAPPR_Q_THRESHOLD             998
-#define MUPI_DIV_QR_THRESHOLD               62
-#define MU_BDIV_QR_THRESHOLD              1442
+#define MU_DIV_QR_THRESHOLD               1164
+#define MU_DIVAPPR_Q_THRESHOLD             748
+#define MUPI_DIV_QR_THRESHOLD               38
+#define MU_BDIV_QR_THRESHOLD              1360
  #define MU_BDIV_Q_THRESHOLD               1470
  
+#define POWM_SEC_TABLE  2,23,258,879,2246
+
  #define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                      60
-#define GCD_DC_THRESHOLD                   393
+#define HGCD_THRESHOLD                      69
+#define HGCD_APPR_THRESHOLD                112
+#define HGCD_REDUCE_THRESHOLD             3389
+#define GCD_DC_THRESHOLD                   386
  #define GCDEXT_DC_THRESHOLD                303
  #define JACOBI_BASE_METHOD                   1
  
  #define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        22
-#define SET_STR_DC_THRESHOLD               587
-#define SET_STR_PRECOMPUTE_THRESHOLD       983
+#define GET_STR_PRECOMPUTE_THRESHOLD        25
+#define SET_STR_DC_THRESHOLD               582
+#define SET_STR_PRECOMPUTE_THRESHOLD      1118
+
+#define FAC_DSC_THRESHOLD                  178
+#define FAC_ODD_THRESHOLD                   34
diff --git a/mpn/x86/p6/sse2/mod_1_1.asm b/mpn/x86/p6/sse2/mod_1_1.asm

new file mode 100644 (file)

index 0000000..64575ea
--- /dev/null
+++ b/mpn/x86/p6/sse2/mod_1_1.asm
@@ -0,0 +1,23 @@
+dnl  Intel P6/SSE2 mpn_mod_1_1.
+
+dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_1_1p)
+include_mpn(`x86/pentium4/sse2/mod_1_1.asm')
diff --git a/mpn/x86/pentium/aors_n.asm b/mpn/x86/pentium/aors_n.asm

index 30d0df79b0c364657e2ed05e0e8566e5c76f5db2..aa761290560041ce31e7ccfaf88d72433740d4ae 100644 (file)
--- a/mpn/x86/pentium/aors_n.asm
+++ b/mpn/x86/pentium/aors_n.asm
@@ -78,13 +78,13 @@ deflit(`FRAME',16)
         pushl   %edx
  FRAME_pushl()
         movl    PARAM_CARRY,%eax
-       shrl    $1,%eax                 C shift bit 0 into carry
+       shrl    %eax                    C shift bit 0 into carry
         jmp     L(oop)
  
  L(endgo):
  deflit(`FRAME',16)
         movl    PARAM_CARRY,%eax
-       shrl    $1,%eax                 C shift bit 0 into carry
+       shrl    %eax                    C shift bit 0 into carry
         jmp     L(end)
  
  EPILOGUE()
diff --git a/mpn/x86/pentium/bdiv_q_1.asm b/mpn/x86/pentium/bdiv_q_1.asm

new file mode 100644 (file)

index 0000000..7e84fc8
--- /dev/null
+++ b/mpn/x86/pentium/bdiv_q_1.asm
@@ -0,0 +1,249 @@
+dnl  Intel Pentium mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  Rearranged from mpn/x86/pentium/dive_1.asm by Marco Bodrato.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C         divisor
+C       odd   even
+C P54:  24.5  30.5   cycles/limb
+C P55:  23.0  28.0
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+
+C The P55 speeds noted above, 23 cycles odd or 28 cycles even, are as
+C expected.  On P54 in the even case the shrdl pairing nonsense (see
+C mpn/x86/pentium/README) costs 1 cycle, but it's not clear why there's a
+C further 1.5 slowdown for both odd and even.
+
+defframe(PARAM_SHIFT,  24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+dnl  re-use parameter space
+define(VAR_INVERSE,`PARAM_DST')
+
+       TEXT
+
+       ALIGN(32)
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                           mp_limb_t divisor);
+C
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+       movl    $-1, %ecx
+       movl    PARAM_DIVISOR, %eax
+
+L(strip_twos):
+       ASSERT(nz, `orl %eax, %eax')
+       shrl    %eax
+       incl    %ecx                    C shift count
+
+       jnc     L(strip_twos)
+
+       leal    1(%eax,%eax), %edx      C d
+       andl    $127, %eax              C d/2, 7 bits
+
+       pushl   %ebx            FRAME_pushl()
+       pushl   %ebp            FRAME_pushl()
+
+ifdef(`PIC',`
+       call    L(here)
+L(here):
+       popl    %ebp                    C eip
+
+       addl    $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
+       C AGI
+       movl    binvert_limb_table@GOT(%ebp), %ebp
+       C AGI
+       movzbl  (%eax,%ebp), %eax
+',`
+
+dnl non-PIC
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+
+       movl    %eax, %ebp              C inv
+       addl    %eax, %eax              C 2*inv
+
+       imull   %ebp, %ebp              C inv*inv
+
+       imull   %edx, %ebp              C inv*inv*d
+
+       subl    %ebp, %eax              C inv = 2*inv - inv*inv*d
+       movl    PARAM_SIZE, %ebx
+
+       movl    %eax, %ebp
+       addl    %eax, %eax              C 2*inv
+
+       imull   %ebp, %ebp              C inv*inv
+
+       imull   %edx, %ebp              C inv*inv*d
+
+       subl    %ebp, %eax              C inv = 2*inv - inv*inv*d
+       movl    %edx, PARAM_DIVISOR     C d without twos
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       imull   PARAM_DIVISOR, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       jmp     L(common)
+EPILOGUE()
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                  mp_limb_t inverse, int shift)
+       ALIGN(32)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SHIFT, %ecx
+
+       pushl   %ebx            FRAME_pushl()
+       pushl   %ebp            FRAME_pushl()
+
+       movl    PARAM_SIZE, %ebx
+       movl    PARAM_INVERSE, %eax
+
+L(common):
+       pushl   %esi            FRAME_pushl()
+       push    %edi            FRAME_pushl()
+
+       movl    PARAM_SRC, %esi
+       movl    PARAM_DST, %edi
+       movl    %eax, VAR_INVERSE
+
+       leal    (%esi,%ebx,4), %esi     C src end
+       leal    (%edi,%ebx,4), %edi     C dst end
+
+       negl    %ebx                    C -size
+
+       xorl    %ebp, %ebp              C initial carry bit
+
+       orl     %ecx, %ecx              C shift
+       movl    (%esi,%ebx,4), %eax     C src low limb
+       jz      L(odd_entry)
+
+       xorl    %edx, %edx              C initial carry limb (for even, if one)
+       incl    %ebx
+       jz      L(one)
+
+       movl    (%esi,%ebx,4), %edx     C src second limb (for even)
+       shrdl(  %cl, %edx, %eax)
+
+       jmp     L(even_entry)
+
+
+       ALIGN(8)
+L(odd_top):
+       C eax   scratch
+       C ebx   counter, limbs, negative
+       C ecx
+       C edx
+       C esi   src end
+       C edi   dst end
+       C ebp   carry bit, 0 or -1
+
+       mull    PARAM_DIVISOR
+
+       movl    (%esi,%ebx,4), %eax
+       subl    %ebp, %edx
+
+       subl    %edx, %eax
+
+       sbbl    %ebp, %ebp
+
+L(odd_entry):
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, (%edi,%ebx,4)
+
+       incl    %ebx
+       jnz     L(odd_top)
+
+       popl    %edi
+       popl    %esi
+
+       popl    %ebp
+       popl    %ebx
+
+       ret
+
+L(even_top):
+       C eax   scratch
+       C ebx   counter, limbs, negative
+       C ecx   twos
+       C edx
+       C esi   src end
+       C edi   dst end
+       C ebp   carry bit, 0 or -1
+
+       mull    PARAM_DIVISOR
+
+       subl    %ebp, %edx              C carry bit
+       movl    -4(%esi,%ebx,4), %eax   C src limb
+
+       movl    (%esi,%ebx,4), %ebp     C and one above it
+
+       shrdl(  %cl, %ebp, %eax)
+
+       subl    %edx, %eax              C carry limb
+
+       sbbl    %ebp, %ebp
+
+L(even_entry):
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi,%ebx,4)
+       incl    %ebx
+
+       jnz     L(even_top)
+
+       mull    PARAM_DIVISOR
+
+       movl    -4(%esi), %eax          C src high limb
+       subl    %ebp, %edx
+
+L(one):
+       shrl    %cl, %eax
+
+       subl    %edx, %eax              C no carry if division is exact
+
+       imull   VAR_INVERSE, %eax
+
+       movl    %eax, -4(%edi)          C dst high limb
+       nop                             C protect against cache bank clash
+
+       popl    %edi
+       popl    %esi
+
+       popl    %ebp
+       popl    %ebx
+
+       ret
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/add_n.asm b/mpn/x86/pentium4/sse2/add_n.asm

index 04c0c68d0efe8772376c6948c01e7d6065b26e73..3c84bacb63795ada51ec3d5141b889bde465ab49 100644 (file)
--- a/mpn/x86/pentium4/sse2/add_n.asm
+++ b/mpn/x86/pentium4/sse2/add_n.asm
@@ -20,17 +20,14 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
-C                          6.0 cycles/limb if dst==src1 or dst==src2
-C P4 Prescott:             >= 5 cycles/limb
-
-C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                      mp_size_t size);
-C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                       mp_size_t size, mp_limb_t carry);
-C
-C The 4 c/l achieved here isn't particularly good, but is better than 9 c/l
-C for a basic adc loop.
+C                                      cycles/limb
+C                           dst!=src1,2  dst==src1  dst==src2
+C P6 model 0-8,10-12           -
+C P6 model 9   (Banias)                ?
+C P6 model 13  (Dothan)                ?
+C P4 model 0-1 (Willamette)    ?
+C P4 model 2   (Northwood)     4            6          6
+C P4 model 3-4 (Prescott)      4.25         7.5        7.5
  
  defframe(PARAM_CARRY,20)
  defframe(PARAM_SIZE, 16)
@@ -46,29 +43,25 @@ define(SAVE_EBX,`PARAM_SRC1')
  
  PROLOGUE(mpn_add_nc)
  deflit(`FRAME',0)
-
         movd    PARAM_CARRY, %mm0
         jmp     L(start_nc)
-
  EPILOGUE()
  
         ALIGN(8)
  PROLOGUE(mpn_add_n)
  deflit(`FRAME',0)
-
         pxor    %mm0, %mm0
-
  L(start_nc):
-       movl    PARAM_SRC1, %eax
-       movl    %ebx, SAVE_EBX
-       movl    PARAM_SRC2, %ebx
-       movl    PARAM_DST, %edx
-       movl    PARAM_SIZE, %ecx
+       mov     PARAM_SRC1, %eax
+       mov     %ebx, SAVE_EBX
+       mov     PARAM_SRC2, %ebx
+       mov     PARAM_DST, %edx
+       mov     PARAM_SIZE, %ecx
  
-       leal    (%eax,%ecx,4), %eax     C src1 end
-       leal    (%ebx,%ecx,4), %ebx     C src2 end
-       leal    (%edx,%ecx,4), %edx     C dst end
-       negl    %ecx                    C -size
+       lea     (%eax,%ecx,4), %eax     C src1 end
+       lea     (%ebx,%ecx,4), %ebx     C src2 end
+       lea     (%edx,%ecx,4), %edx     C dst end
+       neg     %ecx                    C -size
  
  L(top):
         C eax   src1 end
@@ -86,12 +79,11 @@ L(top):
  
         psrlq   $32, %mm0
  
-       addl    $1, %ecx
+       add     $1, %ecx
         jnz     L(top)
  
-
         movd    %mm0, %eax
-       movl    SAVE_EBX, %ebx
+       mov     SAVE_EBX, %ebx
         emms
         ret
  
diff --git a/mpn/x86/pentium4/sse2/addlsh1_n.asm b/mpn/x86/pentium4/sse2/addlsh1_n.asm

index 46b0903c502120876280095cfab19c2704d4fdf4..25b8b4b0f126e7c32783aa5d45325036e03db277 100644 (file)
--- a/mpn/x86/pentium4/sse2/addlsh1_n.asm
+++ b/mpn/x86/pentium4/sse2/addlsh1_n.asm
@@ -20,14 +20,15 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C          cycles/limb (approx)
-C          dst!=src1,2  dst==src1  dst==src2
-C P4 m2:      4.5         ?7.25      ?6.75
-C P4 m3:      5.3         ?         ?
+C                                      cycles/limb
+C                           dst!=src1,2  dst==src1  dst==src2
+C P6 model 0-8,10-12           -
+C P6 model 9   (Banias)                ?
+C P6 model 13  (Dothan)                ?
+C P4 model 0-1 (Willamette)    ?
+C P4 model 2   (Northwood)     4.25         6          6
+C P4 model 3-4 (Prescott)      5            8.5        8.5
  
-C mp_limb_t mpn_addlsh1_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                          mp_size_t size);
-C
  C The slightly strange combination of indexing and pointer incrementing
  C that's used seems to work best.  Not sure why, but %ecx,4 with src1 and/or
  C src2 is a slowdown.
@@ -51,18 +52,18 @@ define(SAVE_EBX,`PARAM_SRC1')
  PROLOGUE(mpn_addlsh1_n)
  deflit(`FRAME',0)
  
-       movl    PARAM_SRC1, %eax
-       movl    %ebx, SAVE_EBX
+       mov     PARAM_SRC1, %eax
+       mov     %ebx, SAVE_EBX
  
-       movl    PARAM_SRC2, %ebx
+       mov     PARAM_SRC2, %ebx
         pxor    %mm0, %mm0              C initial carry
  
-       movl    PARAM_DST, %edx
+       mov     PARAM_DST, %edx
  
-       movl    PARAM_SIZE, %ecx
+       mov     PARAM_SIZE, %ecx
  
-       leal    (%edx,%ecx,4), %edx     C dst end
-       negl    %ecx                    C -size
+       lea     (%edx,%ecx,4), %edx     C dst end
+       neg     %ecx                    C -size
  
  L(top):
         C eax   src1 end
@@ -71,24 +72,24 @@ L(top):
         C edx   dst end
         C mm0   carry
  
-       movd    (%eax), %mm1
         movd    (%ebx), %mm2
+       movd    (%eax), %mm1
         psrlq   $32, %mm0
-       leal    4(%eax), %eax
-       leal    4(%ebx), %ebx
+       lea     4(%eax), %eax
+       lea     4(%ebx), %ebx
  
-       paddq   %mm2, %mm1
+       psllq   $1, %mm2
         paddq   %mm2, %mm1
  
         paddq   %mm1, %mm0
  
         movd    %mm0, (%edx,%ecx,4)
-       addl    $1, %ecx
+       add     $1, %ecx
         jnz     L(top)
  
  
         psrlq   $32, %mm0
-       movl    SAVE_EBX, %ebx
+       mov     SAVE_EBX, %ebx
         movd    %mm0, %eax
         emms
         ret
diff --git a/mpn/x86/pentium4/sse2/addmul_1.asm b/mpn/x86/pentium4/sse2/addmul_1.asm

index 3a8d0bb9bdcb99bafe881c89846c05054993e146..48c69fb31029436bb5015f16e626b8f369c0a590 100644 (file)
--- a/mpn/x86/pentium4/sse2/addmul_1.asm
+++ b/mpn/x86/pentium4/sse2/addmul_1.asm
@@ -1,6 +1,6 @@
  dnl  mpn_addmul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
  
-dnl  Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -20,18 +20,18 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
+C                          cycles/limb
+C P6 model 0-8,10-12           -
+C P6 model 9   (Banias)                5.24
+C P6 model 13  (Dothan)                5.24
+C P4 model 0-1 (Willamette)    5
+C P4 model 2   (Northwood)     5
+C P4 model 3-4 (Prescott)      5
+
  C TODO:
  C  * Tweak eax/edx offsets in loop as to save some lea's
  C  * Perhaps software pipeline small-case code
  
-C                           cycles/limb
-C P6 model 0-8,10-12)           -
-C P6 model 9   (Banias)         ?
-C P6 model 13  (Dothan)         5.24
-C P4 model 0-1 (Willamette):    5
-C P4 model 2   (Northwood):     5
-C P4 model 3-4 (Prescott):      5
-
  C INPUT PARAMETERS
  C rp           sp + 4
  C up           sp + 8
@@ -40,22 +40,13 @@ C v0                sp + 16
  
         TEXT
         ALIGN(16)
-PROLOGUE(mpn_addmul_1c)
-       mov     4(%esp), %edx
-       mov     8(%esp), %eax
-       mov     12(%esp), %ecx
-       movd    16(%esp), %mm7
-       movd    20(%esp), %mm6
-       jmp     L(ent)
-EPILOGUE()
-       ALIGN(16)
  PROLOGUE(mpn_addmul_1)
-       mov     4(%esp), %edx
+       pxor    %mm6, %mm6
+L(ent):        mov     4(%esp), %edx
         mov     8(%esp), %eax
         mov     12(%esp), %ecx
         movd    16(%esp), %mm7
-       pxor    %mm6, %mm6
-L(ent):        cmp     $4, %ecx
+       cmp     $4, %ecx
         jnc     L(big)
  
  L(lp0):        movd    (%eax), %mm0
@@ -181,3 +172,7 @@ L(end):     pmuludq %mm7, %mm2
         emms
         ret
  EPILOGUE()
+PROLOGUE(mpn_addmul_1c)
+       movd    20(%esp), %mm6
+       jmp     L(ent)
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm b/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm

new file mode 100644 (file)

index 0000000..ae4f3ff
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm
@@ -0,0 +1,130 @@
+dnl  Intel Atom  mpn_bdiv_dbm1.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl  Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                          cycles/limb
+C                          cycles/limb
+C P5                            -
+C P6 model 0-8,10-12            -
+C P6 model 9  (Banias)          9.75
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)       8.25
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom                    8
+C AMD K6                        -
+C AMD K7                        -
+C AMD K8
+C AMD K10
+
+C TODO: This code was optimised for atom-32, consider moving it back to atom
+C      dir(atom currently grabs this code), and write a 4-way version(7c/l).
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_MUL,  16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_RP,`PARAM_MUL')
+define(SAVE_UP,`PARAM_SIZE')
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`n',  `%ecx')
+define(`reg', `%edx')
+define(`cy', `%eax')   C contains the return value
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(mpn_bdiv_dbm1c)
+       mov     PARAM_SIZE, n           C size
+       mov     up, SAVE_UP
+       mov     PARAM_SRC, up
+       movd    PARAM_MUL, %mm7
+       mov     rp, SAVE_RP
+       mov     PARAM_DST, rp
+
+       movd    (up), %mm0
+       pmuludq %mm7, %mm0
+       shr     n
+       mov     PARAM_CARRY, cy
+       jz      L(eq1)
+
+       movd    4(up), %mm1
+       jc      L(odd)
+
+       lea     4(up), up
+       pmuludq %mm7, %mm1
+       movd    %mm0, reg
+       psrlq   $32, %mm0
+       sub     reg, cy
+       movd    %mm0, reg
+       movq    %mm1, %mm0
+       dec     n
+       mov     cy, (rp)
+       lea     4(rp), rp
+       jz      L(end)
+
+C      ALIGN(16)
+L(top):        movd    4(up), %mm1
+       sbb     reg, cy
+L(odd):        movd    %mm0, reg
+       psrlq   $32, %mm0
+       pmuludq %mm7, %mm1
+       sub     reg, cy
+       lea     8(up), up
+       movd    %mm0, reg
+       movd    (up), %mm0
+       mov     cy, (rp)
+       sbb     reg, cy
+       movd    %mm1, reg
+       psrlq   $32, %mm1
+       sub     reg, cy
+       movd    %mm1, reg
+       pmuludq %mm7, %mm0
+       dec     n
+       mov     cy, 4(rp)
+       lea     8(rp), rp
+       jnz     L(top)
+
+L(end):        sbb     reg, cy
+
+L(eq1):        movd    %mm0, reg
+       psrlq   $32, %mm0
+       mov     SAVE_UP, up
+       sub     reg, cy
+       movd    %mm0, reg
+       emms
+       mov     cy, (rp)
+       sbb     reg, cy
+
+       mov     SAVE_RP, rp
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86/pentium4/sse2/bdiv_q_1.asm b/mpn/x86/pentium4/sse2/bdiv_q_1.asm

new file mode 100644 (file)

index 0000000..81a1a9c
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/bdiv_q_1.asm
@@ -0,0 +1,222 @@
+dnl  Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  Rearranged from mpn/x86/pentium4/sse2/dive_1.asm by Marco Bodrato.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4: 19.0 cycles/limb
+
+C Pairs of movd's are used to avoid unaligned loads.  Despite the loads not
+C being on the dependent chain and there being plenty of cycles available,
+C using an unaligned movq on every second iteration measured about 23 c/l.
+C
+
+defframe(PARAM_SHIFT,  24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE,   12)
+defframe(PARAM_SRC,    8)
+defframe(PARAM_DST,    4)
+
+       TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                  mp_limb_t inverse, int shift)
+       ALIGN(32)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %edx
+
+       movl    PARAM_SRC, %eax
+
+       movl    PARAM_DIVISOR, %ecx
+
+       movd    %ecx, %mm6
+       movl    PARAM_SHIFT, %ecx
+
+       movd    %ecx, %mm7              C shift
+
+       C
+
+       movl    PARAM_INVERSE, %ecx
+       movd    %ecx, %mm5              C inv
+
+       movl    PARAM_DST, %ecx
+       pxor    %mm1, %mm1              C initial carry limb
+       pxor    %mm0, %mm0              C initial carry bit
+
+       subl    $1, %edx
+       jz      L(done)
+
+       pcmpeqd %mm4, %mm4
+       psrlq   $32, %mm4               C 0x00000000FFFFFFFF
+
+C The dependent chain here is as follows.
+C
+C                                      latency
+C      psubq    s = (src-cbit) - climb    2
+C      pmuludq  q = s*inverse             8
+C      pmuludq  prod = q*divisor          8
+C      psrlq    climb = high(prod)        2
+C                                        --
+C                                        20
+C
+C Yet the loop measures 19.0 c/l, so obviously there's something gained
+C there over a straight reading of the chip documentation.
+
+L(top):
+       C eax   src, incrementing
+       C ebx
+       C ecx   dst, incrementing
+       C edx   counter, size-1 iterations
+       C
+       C mm0   carry bit
+       C mm1   carry limb
+       C mm4   0x00000000FFFFFFFF
+       C mm5   inverse
+       C mm6   divisor
+       C mm7   shift
+
+       movd    (%eax), %mm2
+       movd    4(%eax), %mm3
+       addl    $4, %eax
+       punpckldq %mm3, %mm2
+
+       psrlq   %mm7, %mm2
+       pand    %mm4, %mm2              C src
+       psubq   %mm0, %mm2              C src - cbit
+
+       psubq   %mm1, %mm2              C src - cbit - climb
+       movq    %mm2, %mm0
+       psrlq   $63, %mm0               C new cbit
+
+       pmuludq %mm5, %mm2              C s*inverse
+       movd    %mm2, (%ecx)            C q
+       addl    $4, %ecx
+
+       movq    %mm6, %mm1
+       pmuludq %mm2, %mm1              C q*divisor
+       psrlq   $32, %mm1               C new climb
+
+L(entry):
+       subl    $1, %edx
+       jnz     L(top)
+
+L(done):
+       movd    (%eax), %mm2
+       psrlq   %mm7, %mm2              C src
+       psubq   %mm0, %mm2              C src - cbit
+
+       psubq   %mm1, %mm2              C src - cbit - climb
+
+       pmuludq %mm5, %mm2              C s*inverse
+       movd    %mm2, (%ecx)            C q
+
+       emms
+       ret
+
+EPILOGUE()
+
+       ALIGN(16)
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                           mp_limb_t divisor);
+C
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+       movl    PARAM_SIZE, %edx
+
+       movl    PARAM_DIVISOR, %ecx
+
+       C eax   src
+       C ebx
+       C ecx   divisor
+       C edx   size-1
+
+       movl    %ecx, %eax
+       bsfl    %ecx, %ecx              C trailing twos
+
+       shrl    %cl, %eax               C d = divisor without twos
+       movd    %eax, %mm6
+       movd    %ecx, %mm7              C shift
+
+       shrl    %eax                    C d/2
+
+       andl    $127, %eax              C d/2, 7 bits
+
+ifdef(`PIC',`
+       LEA(    binvert_limb_table, %ecx)
+       movzbl  (%eax,%ecx), %eax               C inv 8 bits
+',`
+       movzbl  binvert_limb_table(%eax), %eax  C inv 8 bits
+')
+
+       C
+
+       movd    %eax, %mm5              C inv
+
+       movd    %eax, %mm0              C inv
+
+       pmuludq %mm5, %mm5              C inv*inv
+
+       C
+
+       pmuludq %mm6, %mm5              C inv*inv*d
+       paddd   %mm0, %mm0              C 2*inv
+
+       C
+
+       psubd   %mm5, %mm0              C inv = 2*inv - inv*inv*d
+       pxor    %mm5, %mm5
+
+       paddd   %mm0, %mm5
+       pmuludq %mm0, %mm0              C inv*inv
+
+       pcmpeqd %mm4, %mm4
+       psrlq   $32, %mm4               C 0x00000000FFFFFFFF
+
+       C
+
+       pmuludq %mm6, %mm0              C inv*inv*d
+       paddd   %mm5, %mm5              C 2*inv
+
+       movl    PARAM_SRC, %eax
+       movl    PARAM_DST, %ecx
+       pxor    %mm1, %mm1              C initial carry limb
+
+       C
+
+       psubd   %mm0, %mm5              C inv = 2*inv - inv*inv*d
+
+       ASSERT(e,`      C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+       pushl   %eax    FRAME_pushl()
+       movq    %mm6, %mm0
+       pmuludq %mm5, %mm0
+       movd    %mm0, %eax
+       cmpl    $1, %eax
+       popl    %eax    FRAME_popl()')
+
+       pxor    %mm0, %mm0              C initial carry bit
+       jmp     L(entry)
+
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/dive_1.asm b/mpn/x86/pentium4/sse2/dive_1.asm

index 5e0e38e5382b58c86459991976e678914ad8058a..493783a5919267ee9f83045295847c564f9fc095 100644 (file)
--- a/mpn/x86/pentium4/sse2/dive_1.asm
+++ b/mpn/x86/pentium4/sse2/dive_1.asm
@@ -139,13 +139,13 @@ ifdef(`PIC',`
  
  C The dependent chain here is as follows.
  C
-C                                      latency
-C      psubq    s = (src-cbit) - climb    2
-C      pmuludq  q = s*inverse             8
-C      pmuludq  prod = q*divisor          8
-C      psrlq    climb = high(prod)        2
-C                                        --
-C                                        20
+C                                      latency
+C      psubq    s = (src-cbit) - climb    2
+C      pmuludq  q = s*inverse             8
+C      pmuludq  prod = q*divisor          8
+C      psrlq    climb = high(prod)        2
+C                                        --
+C                                        20
  C
  C Yet the loop measures 19.0 c/l, so obviously there's something gained
  C there over a straight reading of the chip documentation.
diff --git a/mpn/x86/pentium4/sse2/gmp-mparam.h b/mpn/x86/pentium4/sse2/gmp-mparam.h

index 70919815c78f4ea3d3bc2914a267c10b441b230d..7a324ba196f753cb37331d73b041f4f989e9b707 100644 (file)
--- a/mpn/x86/pentium4/sse2/gmp-mparam.h
+++ b/mpn/x86/pentium4/sse2/gmp-mparam.h
@@ -24,148 +24,124 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                24
  #define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         26
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        13
  #define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      2
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           22
+#define BMOD_1_TO_MOD_1_THRESHOLD           20
  
-#define MUL_TOOM22_THRESHOLD                30
-#define MUL_TOOM33_THRESHOLD               120
-#define MUL_TOOM44_THRESHOLD               296
-#define MUL_TOOM6H_THRESHOLD               414
-#define MUL_TOOM8H_THRESHOLD               620
+#define MUL_TOOM22_THRESHOLD                29
+#define MUL_TOOM33_THRESHOLD               107
+#define MUL_TOOM44_THRESHOLD               276
+#define MUL_TOOM6H_THRESHOLD               422
+#define MUL_TOOM8H_THRESHOLD               587
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     198
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     216
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     194
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     209
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     117
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     207
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     193
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     184
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     164
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
  #define SQR_TOOM2_THRESHOLD                 48
-#define SQR_TOOM3_THRESHOLD                170
-#define SQR_TOOM4_THRESHOLD                454
-#define SQR_TOOM6_THRESHOLD                454
+#define SQR_TOOM3_THRESHOLD                173
+#define SQR_TOOM4_THRESHOLD                264
+#define SQR_TOOM6_THRESHOLD                354
  #define SQR_TOOM8_THRESHOLD                915
  
+#define MULMID_TOOM42_THRESHOLD             66
+
  #define MULMOD_BNM1_THRESHOLD               19
-#define SQRMOD_BNM1_THRESHOLD               24
+#define SQRMOD_BNM1_THRESHOLD               19
  
-#define MUL_FFT_MODF_THRESHOLD             904  /* k = 6 */
+#define MUL_FFT_MODF_THRESHOLD            1103  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
    { {    904, 6}, {     15, 5}, {     32, 6}, {     17, 5}, \
-    {     35, 6}, {     19, 5}, {     39, 6}, {     28, 7}, \
+    {     35, 6}, {     19, 5}, {     39, 6}, {     29, 7}, \
      {     15, 6}, {     33, 7}, {     17, 6}, {     35, 7}, \
      {     19, 6}, {     41, 7}, {     21, 6}, {     43, 7}, \
-    {     23, 6}, {     47, 7}, {     27, 6}, {     55, 8}, \
-    {     15, 7}, {     31, 6}, {     63, 7}, {     35, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
-    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
-    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
-    {     71, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
-    {     95, 9}, {     55,10}, {     31, 9}, {     63, 8}, \
-    {    127, 9}, {     79,10}, {     47, 9}, {    103,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    159,10}, {     95,11}, {     63,10}, {    127, 9}, \
-    {    263,10}, {    143, 9}, {    287,10}, {    159,11}, \
-    {     95,10}, {    207,12}, {     63,11}, {    127,10}, \
-    {    271,11}, {    159,10}, {    319,11}, {    191,10}, \
-    {    383,11}, {    223,12}, {    127,11}, {    287,10}, \
-    {    607,11}, {    319,12}, {    191,11}, {    383,10}, \
-    {    767,13}, {    127,12}, {    255,11}, {    511,10}, \
-    {   1055,11}, {    543,10}, {   1119, 9}, {   2239,11}, \
-    {    607,12}, {    319,11}, {    671,10}, {   1407,11}, \
-    {    735,10}, {   1471, 9}, {   2943,12}, {    383,11}, \
-    {    799,10}, {   1663,11}, {    863,10}, {   1727,12}, \
-    {    447,13}, {    255,12}, {    511,11}, {   1055,10}, \
-    {   2111,11}, {   1119,10}, {   2239, 9}, {   4479,12}, \
-    {    575,11}, {   1247,10}, {   2495, 9}, {   4991,12}, \
-    {    639,11}, {   1471,10}, {   2943,13}, {    383,12}, \
-    {    767,11}, {   1599,12}, {    831,11}, {   1727,10}, \
-    {   3455,14}, {    255,13}, {    511,12}, {   1023,11}, \
-    {   2111,12}, {   1087,11}, {   2239,10}, {   4479,12}, \
-    {   1215,11}, {   2495,10}, {   4991,13}, {    639,12}, \
-    {   1471,11}, {   2943,10}, {   5887,11}, {   3007,13}, \
-    {    767,12}, {   1727,11}, {   3455,13}, {    895,11}, \
-    {   3839,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
+    {     23, 6}, {     47, 7}, {     27, 6}, {     55, 7}, \
+    {     31, 6}, {     63, 7}, {     43, 8}, {     23, 7}, \
+    {     51, 8}, {     27, 7}, {     55, 8}, {     31, 7}, \
+    {     63, 8}, {     39, 7}, {     79, 8}, {     43, 9}, \
+    {     23, 8}, {     55, 9}, {     31, 8}, {     71, 9}, \
+    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
+    {     55,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
+    {     79,10}, {     47, 9}, {    111,11}, {     31,10}, \
+    {     63, 9}, {    143,10}, {     79, 9}, {    167,10}, \
+    {     95, 9}, {    191,10}, {    111,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    159, 9}, {    319,10}, \
+    {    175,11}, {     95,10}, {    207,12}, {     63,11}, \
+    {    127,10}, {    287,11}, {    159,10}, {    319, 9}, \
+    {    639,10}, {    351,11}, {    191,10}, {    383,11}, \
+    {    223,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
      {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 141
-#define MUL_FFT_THRESHOLD                 7552
+#define MUL_FFT_TABLE3_SIZE 77
+#define MUL_FFT_THRESHOLD                 7808
  
-#define SQR_FFT_MODF_THRESHOLD             793  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             824  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
    { {    793, 5}, {     28, 6}, {     15, 5}, {     33, 6}, \
-    {     17, 5}, {     35, 6}, {     19, 5}, {     39, 6}, \
-    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     41, 7}, {     23, 6}, \
-    {     47, 7}, {     27, 6}, {     55, 7}, {     31, 6}, \
-    {     63, 7}, {     37, 8}, {     19, 7}, {     43, 8}, \
-    {     23, 7}, {     49, 8}, {     31, 7}, {     63, 8}, \
-    {     39, 7}, {     79, 8}, {     43, 9}, {     23, 8}, \
-    {     55, 9}, {     31, 8}, {     71, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
-    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
-    {    159,10}, {    335,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    399, 9}, {    799,11}, {    223,12}, \
-    {    127,11}, {    255,10}, {    527, 9}, {   1055,10}, \
-    {    543,11}, {    287,10}, {    607, 9}, {   1215,11}, \
-    {    319,12}, {    191,11}, {    383,10}, {    799,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1055,11}, \
-    {    543,10}, {   1119, 9}, {   2239,11}, {    607,10}, \
-    {   1215,12}, {    319,11}, {    671,10}, {   1407,11}, \
-    {    735,10}, {   1471, 9}, {   2943,10}, {   1503,12}, \
-    {    383,11}, {    799,10}, {   1599,11}, {    863,10}, \
-    {   1727,12}, {    447,11}, {    991,13}, {    255,12}, \
-    {    511,11}, {   1055,10}, {   2111,11}, {   1119,10}, \
-    {   2239,12}, {    575,11}, {   1247,10}, {   2495,12}, \
-    {    639,11}, {   1471,10}, {   2943,13}, {    383,12}, \
-    {    767,11}, {   1599,12}, {    831,11}, {   1727,10}, \
-    {   3455,12}, {    959,11}, {   1919,14}, {    255,13}, \
-    {    511,12}, {   1023,11}, {   2111,12}, {   1087,11}, \
-    {   2239,10}, {   4479,12}, {   1215,11}, {   2495,13}, \
-    {    639,12}, {   1471,11}, {   2943,10}, {   5887,13}, \
-    {    767,12}, {   1727,11}, {   3455,13}, {    895,12}, \
-    {   1791,11}, {   3711,12}, {   1919,11}, {   3839,12}, \
+    {     17, 5}, {     35, 6}, {     28, 7}, {     15, 6}, \
+    {     33, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
+    {     41, 7}, {     23, 6}, {     47, 7}, {     27, 6}, \
+    {     55, 8}, {     15, 7}, {     31, 6}, {     63, 7}, \
+    {     37, 8}, {     19, 7}, {     43, 8}, {     23, 7}, \
+    {     51, 8}, {     31, 7}, {     63, 8}, {     39, 7}, \
+    {     79, 8}, {     43, 9}, {     23, 8}, {     55, 9}, \
+    {     31, 8}, {     71, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
+    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
+    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
+    {     95, 9}, {    191,10}, {    111,11}, {     63,10}, \
+    {    127, 9}, {    255,10}, {    159,11}, {     95,10}, \
+    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271,11}, {    159,10}, {    319, 9}, \
+    {    639,11}, {    191,10}, {    399, 9}, {    799,12}, \
      {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 148
-#define SQR_FFT_THRESHOLD                 5760
+#define SQR_FFT_TABLE3_SIZE 72
+#define SQR_FFT_THRESHOLD                 7296
  
-#define MULLO_BASECASE_THRESHOLD            12
-#define MULLO_DC_THRESHOLD                  51
-#define MULLO_MUL_N_THRESHOLD            13463
+#define MULLO_BASECASE_THRESHOLD            13
+#define MULLO_DC_THRESHOLD                  48
+#define MULLO_MUL_N_THRESHOLD            14709
  
-#define DC_DIV_QR_THRESHOLD                 28
-#define DC_DIVAPPR_Q_THRESHOLD              61
-#define DC_BDIV_QR_THRESHOLD                55
-#define DC_BDIV_Q_THRESHOLD                 82
+#define DC_DIV_QR_THRESHOLD                 38
+#define DC_DIVAPPR_Q_THRESHOLD              77
+#define DC_BDIV_QR_THRESHOLD                54
+#define DC_BDIV_Q_THRESHOLD                 97
  
-#define INV_MULMOD_BNM1_THRESHOLD           60
-#define INV_NEWTON_THRESHOLD                94
-#define INV_APPR_THRESHOLD                  78
+#define INV_MULMOD_BNM1_THRESHOLD           57
+#define INV_NEWTON_THRESHOLD               202
+#define INV_APPR_THRESHOLD                 116
  
  #define BINV_NEWTON_THRESHOLD              327
-#define REDC_1_TO_REDC_N_THRESHOLD          63
+#define REDC_1_TO_REDC_N_THRESHOLD          34
  
  #define MU_DIV_QR_THRESHOLD               2350
-#define MU_DIVAPPR_Q_THRESHOLD            2089
-#define MUPI_DIV_QR_THRESHOLD                7
-#define MU_BDIV_QR_THRESHOLD              2089
-#define MU_BDIV_Q_THRESHOLD               2089
-
-#define MATRIX22_STRASSEN_THRESHOLD         34
-#define HGCD_THRESHOLD                      74
-#define GCD_DC_THRESHOLD                   321
-#define GCDEXT_DC_THRESHOLD                209
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        28
-#define SET_STR_DC_THRESHOLD               123
-#define SET_STR_PRECOMPUTE_THRESHOLD      1265
+#define MU_DIVAPPR_Q_THRESHOLD            2172
+#define MUPI_DIV_QR_THRESHOLD               66
+#define MU_BDIV_QR_THRESHOLD              1787
+#define MU_BDIV_Q_THRESHOLD               2350
+
+#define POWM_SEC_TABLE  2,35,164,1068,2500
+
+#define MATRIX22_STRASSEN_THRESHOLD         30
+#define HGCD_THRESHOLD                      85
+#define HGCD_APPR_THRESHOLD                 95
+#define HGCD_REDUCE_THRESHOLD             5010
+#define GCD_DC_THRESHOLD                   393
+#define GCDEXT_DC_THRESHOLD                253
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                11
+#define GET_STR_PRECOMPUTE_THRESHOLD        24
+#define SET_STR_DC_THRESHOLD               119
+#define SET_STR_PRECOMPUTE_THRESHOLD      1084
+
+#define FAC_DSC_THRESHOLD                  342
+#define FAC_ODD_THRESHOLD                   27
diff --git a/mpn/x86/pentium4/sse2/mod_1_1.asm b/mpn/x86/pentium4/sse2/mod_1_1.asm

new file mode 100644 (file)

index 0000000..89a0b9a
--- /dev/null
+++ b/mpn/x86/pentium4/sse2/mod_1_1.asm
@@ -0,0 +1,155 @@
+dnl  x86-32 mpn_mod_1_1p for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C  * Optimize.  The present code was written quite straightforwardly.
+C  * Optimize post-loop reduction code; it is from mod_1s_4p, thus overkill.
+C  * Write a cps function that uses sse2 insns.
+
+C                           cycles/limb
+C P6 model 0-8,10-12           -
+C P6 model 9   (Banias)                ?
+C P6 model 13  (Dothan)                ?
+C P4 model 0-1 (Willamette)    ?
+C P4 model 2   (Northwood)     16
+C P4 model 3-4 (Prescott)      18
+
+C INPUT PARAMETERS
+C ap           sp + 4
+C n            sp + 8
+C b            sp + 12
+C cps          sp + 16
+
+define(`B1modb', `%mm1')
+define(`B2modb', `%mm2')
+define(`ap',     `%edx')
+define(`n',      `%eax')
+
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_1_1p)
+       push    %ebx
+       mov     8(%esp), ap
+       mov     12(%esp), n
+       mov     20(%esp), %ecx
+       movd    8(%ecx), B1modb
+       movd    12(%ecx), B2modb
+
+       lea     -4(ap,n,4), ap
+
+C FIXME: See comment in generic/mod_1_1.c.
+       movd    (ap), %mm7
+       movd    -4(ap), %mm4
+       pmuludq B1modb, %mm7
+       paddq   %mm4, %mm7
+       add     $-2, n
+       jz      L(end)
+
+       ALIGN(8)
+L(top):        movq    %mm7, %mm6
+       psrlq   $32, %mm7               C rh
+       movd    -8(ap), %mm0
+       add     $-4, ap
+       pmuludq B2modb, %mm7
+       pmuludq B1modb, %mm6
+       add     $-1, n
+       paddq   %mm0, %mm7
+       paddq   %mm6, %mm7
+       jnz     L(top)
+
+L(end):        pcmpeqd %mm4, %mm4
+       psrlq   $32, %mm4               C 0x00000000FFFFFFFF
+       pand    %mm7, %mm4              C rl
+       psrlq   $32, %mm7               C rh
+       pmuludq B1modb, %mm7            C rh,cl
+       paddq   %mm4, %mm7              C rh,rl
+       movd    4(%ecx), %mm4           C cnt
+       psllq   %mm4, %mm7              C rh,rl normalized
+       movq    %mm7, %mm2              C rl in low half
+       psrlq   $32, %mm7               C rh
+       movd    (%ecx), %mm1            C bi
+       pmuludq %mm7, %mm1              C qh,ql
+       paddq   %mm2, %mm1              C qh-1,ql
+       movd    %mm1, %ecx              C ql
+       psrlq   $32, %mm1               C qh-1
+       movd    16(%esp), %mm3          C b
+       pmuludq %mm1, %mm3              C (qh-1) * b
+       psubq   %mm3, %mm2              C r in low half (could use psubd)
+       movd    %mm2, %eax              C r
+       mov     16(%esp), %ebx
+       sub     %ebx, %eax              C r
+       cmp     %eax, %ecx
+       lea     (%eax,%ebx), %edx
+       cmovc(  %edx, %eax)
+       movd    %mm4, %ecx              C cnt
+       cmp     %ebx, %eax
+       jae     L(fix)
+       emms
+       pop     %ebx
+       shr     %cl, %eax
+       ret
+
+L(fix):        sub     %ebx, %eax
+       emms
+       pop     %ebx
+       shr     %cl, %eax
+       ret
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+C CAUTION: This is the same code as in k7/mod_1_1.asm
+       push    %ebp
+       mov     12(%esp), %ebp
+       push    %esi
+       bsr     %ebp, %ecx
+       push    %ebx
+       xor     $31, %ecx
+       mov     16(%esp), %esi
+       sal     %cl, %ebp
+       mov     %ebp, %edx
+       not     %edx
+       mov     $-1, %eax
+       div     %ebp
+       mov     %eax, (%esi)            C store bi
+       mov     %ecx, 4(%esi)           C store cnt
+       xor     %ebx, %ebx
+       sub     %ebp, %ebx
+       mov     $1, %edx
+       shld    %cl, %eax, %edx
+       imul    %edx, %ebx
+       mul     %ebx
+       add     %ebx, %edx
+       not     %edx
+       imul    %ebp, %edx
+       add     %edx, %ebp
+       cmp     %edx, %eax
+       cmovc(  %ebp, %edx)
+       shr     %cl, %ebx
+       mov     %ebx, 8(%esi)           C store B1modb
+       shr     %cl, %edx
+       mov     %edx, 12(%esi)          C store B2modb
+       pop     %ebx
+       pop     %esi
+       pop     %ebp
+       ret
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/mod_1_4.asm b/mpn/x86/pentium4/sse2/mod_1_4.asm

index cedbab1b7bcba83ab38e71e57391c001d2901b68..91d5c381d068cd6391db950007fd24a16c147a29 100644 (file)
--- a/mpn/x86/pentium4/sse2/mod_1_4.asm
+++ b/mpn/x86/pentium4/sse2/mod_1_4.asm
@@ -1,8 +1,8 @@
-dnl  mpn_mod_1_4 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
+dnl  x86-32 mpn_mod_1s_4p for Pentium 4 and P6 models with SSE2 (i.e. 9,D,E,F).
  
  dnl  Contributed to the GNU project by Torbjorn Granlund.
  
-dnl  Copyright 2009 Free Software Foundation, Inc.
+dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -24,14 +24,15 @@ include(`../config.m4')
  C TODO:
  C  * Optimize.  The present code was written quite straightforwardly.
  C  * Optimize post-loop reduction code.
+C  * Write a cps function that uses sse2 insns.
  
-C                           cycles/limb
-C P6 model 0-8,10-12)           -
-C P6 model 9   (Banias)         ?
-C P6 model 13  (Dothan)         3.4
-C P4 model 0-1 (Willamette):    ?
-C P4 model 2   (Northwood):     4
-C P4 model 3-4 (Prescott):      ?
+C                          cycles/limb
+C P6 model 0-8,10-12           -
+C P6 model 9   (Banias)                ?
+C P6 model 13  (Dothan)                3.4
+C P4 model 0-1 (Willamette)    ?
+C P4 model 2   (Northwood)     4
+C P4 model 3-4 (Prescott)      4.5
  
  C INPUT PARAMETERS
  C ap           sp + 4
@@ -44,9 +45,10 @@ define(`B2modb', `%mm2')
  define(`B3modb', `%mm3')
  define(`B4modb', `%mm4')
  define(`B5modb', `%mm5')
-define(`ap', `%edx')
-define(`n', `%eax')
+define(`ap',     `%edx')
+define(`n',      `%eax')
  
+ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_mod_1s_4p)
@@ -102,10 +104,8 @@ L(b1):     movd    (ap), %mm7
         jz      L(x)
         jmp     L(top)
  
-L(b2): movd    (ap), %mm7
-       pmuludq B1modb, %mm7
-       movd    -4(ap), %mm6
-       paddq   %mm6, %mm7
+L(b2): movd    -4(ap), %mm7            C rl
+       punpckldq (ap), %mm7            C rh
         lea     -20(ap), ap
         add     $-2, n
         jz      L(end)
@@ -135,15 +135,13 @@ L(top):   movd    4(ap), %mm0
         add     $-16, ap
         add     $-4, n
         jnz     L(top)
-L(end):
  
-       pcmpeqd %mm4, %mm4
+L(end):        pcmpeqd %mm4, %mm4
         psrlq   $32, %mm4               C 0x00000000FFFFFFFF
         pand    %mm7, %mm4              C rl
         psrlq   $32, %mm7               C rh
         pmuludq B1modb, %mm7            C rh,cl
         paddq   %mm4, %mm7              C rh,rl
-
  L(x):  movd    4(%ecx), %mm4           C cnt
         psllq   %mm4, %mm7              C rh,rl normalized
         movq    %mm7, %mm2              C rl in low half
@@ -177,80 +175,81 @@ L(fix):   sub     %ebx, %eax
         ret
  EPILOGUE()
  
+       ALIGN(16)
  PROLOGUE(mpn_mod_1s_4p_cps)
+C CAUTION: This is the same code as in k7/mod_1_4.asm
         push    %ebp
         push    %edi
         push    %esi
         push    %ebx
-       sub     $12, %esp
-       mov     36(%esp), %ebx
+       mov     20(%esp), %ebp          C FIXME: avoid bp for 0-idx
+       mov     24(%esp), %ebx
         bsr     %ebx, %ecx
         xor     $31, %ecx
-       mov     %ecx, 4(%esp)
-       sal     %cl, %ebx
+       sal     %cl, %ebx               C b << cnt
         mov     %ebx, %edx
         not     %edx
         mov     $-1, %eax
         div     %ebx
-       mov     %eax, %esi
-       mov     $1, %ebp
-       sal     %cl, %ebp
-       neg     %ecx
-       shr     %cl, %eax
-       or      %eax, %ebp
-       mov     %ebx, %eax
-       neg     %eax
-       imul    %ebp, %eax
-       mov     %esi, %ecx
-       mov     %eax, 8(%esp)
-       mul     %ecx
-       mov     %edx, %esi
-       not     %esi
-       sub     8(%esp), %esi
-       imul    %ebx, %esi
-       lea     (%esi,%ebx), %edx
-       cmp     %esi, %eax
-       cmovb(  %edx, %esi)
-       mov     %esi, %eax
-       mul     %ecx
-       lea     (%esi,%edx), %edi
-       not     %edi
-       imul    %ebx, %edi
-       lea     (%edi,%ebx), %edx
-       cmp     %edi, %eax
-       cmovb(  %edx, %edi)
+       xor     %edi, %edi
+       sub     %ebx, %edi
+       mov     $1, %esi
+       mov     %eax, (%ebp)            C store bi
+       mov     %ecx, 4(%ebp)           C store cnt
+       shld    %cl, %eax, %esi
+       imul    %edi, %esi
+       mov     %eax, %edi
+       mul     %esi
+
+       add     %esi, %edx
+       shr     %cl, %esi
+       mov     %esi, 8(%ebp)           C store B1modb
+
+       not     %edx
+       imul    %ebx, %edx
+       lea     (%edx,%ebx), %esi
+       cmp     %edx, %eax
+       cmovnc( %edx, %esi)
+       mov     %edi, %eax
+       mul     %esi
+
+       add     %esi, %edx
+       shr     %cl, %esi
+       mov     %esi, 12(%ebp)          C store B2modb
+
+       not     %edx
+       imul    %ebx, %edx
+       lea     (%edx,%ebx), %esi
+       cmp     %edx, %eax
+       cmovnc( %edx, %esi)
+       mov     %edi, %eax
+       mul     %esi
+
+       add     %esi, %edx
+       shr     %cl, %esi
+       mov     %esi, 16(%ebp)          C store B3modb
+
+       not     %edx
+       imul    %ebx, %edx
+       lea     (%edx,%ebx), %esi
+       cmp     %edx, %eax
+       cmovnc( %edx, %esi)
         mov     %edi, %eax
-       mul     %ecx
-       lea     (%edi,%edx), %ebp
-       not     %ebp
-       imul    %ebx, %ebp
-       lea     (%ebp,%ebx), %edx
-       cmp     %ebp, %eax
-       cmovb(  %edx, %ebp)
-       mov     %ebp, %eax
-       mul     %ecx
-       add     %ebp, %edx
+       mul     %esi
+
+       add     %esi, %edx
+       shr     %cl, %esi
+       mov     %esi, 20(%ebp)          C store B4modb
+
         not     %edx
         imul    %ebx, %edx
         add     %edx, %ebx
         cmp     %edx, %eax
-       cmovb(  %ebx, %edx)
-       mov     32(%esp), %eax
-       mov     %ecx, (%eax)
-       mov     4(%esp), %ecx
-       mov     %ecx, 4(%eax)
-       mov     8(%esp), %ebx
+       cmovnc( %edx, %ebx)
+
         shr     %cl, %ebx
-       mov     %ebx, 8(%eax)
-       shr     %cl, %esi
-       mov     %esi, 12(%eax)
-       shr     %cl, %edi
-       mov     %edi, 16(%eax)
-       shr     %cl, %ebp
-       mov     %ebp, 20(%eax)
-       shr     %cl, %edx
-       mov     %edx, 24(%eax)
-       add     $12, %esp
+       mov     %ebx, 24(%ebp)          C store B5modb
+
         pop     %ebx
         pop     %esi
         pop     %edi
diff --git a/mpn/x86/pentium4/sse2/mode1o.asm b/mpn/x86/pentium4/sse2/mode1o.asm

index f9d1f1496e47e850d644d1235a2bb211aed8c7fe..64d01e892690ad42b6d5e0f888754ba4fea3814c 100644 (file)
--- a/mpn/x86/pentium4/sse2/mode1o.asm
+++ b/mpn/x86/pentium4/sse2/mode1o.asm
@@ -113,13 +113,13 @@ ifdef(`PIC',`
  
  C The dependent chain here is as follows.
  C
-C                                      latency
-C      psubq    s = (src-cbit) - climb    2
-C      pmuludq  q = s*inverse             8
-C      pmuludq  prod = q*divisor          8
-C      psrlq    climb = high(prod)        2
-C                                        --
-C                                        20
+C                                      latency
+C      psubq    s = (src-cbit) - climb    2
+C      pmuludq  q = s*inverse             8
+C      pmuludq  prod = q*divisor          8
+C      psrlq    climb = high(prod)        2
+C                                        --
+C                                        20
  C
  C Yet the loop measures 19.0 c/l, so obviously there's something gained
  C there over a straight reading of the chip documentation.
diff --git a/mpn/x86/pentium4/sse2/mul_1.asm b/mpn/x86/pentium4/sse2/mul_1.asm

index 07be95192129412053442d9f423f4c4b0143daa1..aec737c270b368dec8ce703f7c0720ef5581c83e 100644 (file)
--- a/mpn/x86/pentium4/sse2/mul_1.asm
+++ b/mpn/x86/pentium4/sse2/mul_1.asm
@@ -1,6 +1,6 @@
  dnl  mpn_mul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
  
-dnl  Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -20,18 +20,18 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
+C                           cycles/limb
+C P6 model 0-8,10-12           -
+C P6 model 9   (Banias)                4.17
+C P6 model 13  (Dothan)                4.17
+C P4 model 0-1 (Willamette)    4
+C P4 model 2   (Northwood)     4
+C P4 model 3-4 (Prescott)      4.55
+
  C TODO:
  C  * Tweak eax/edx offsets in loop as to save some lea's
  C  * Perhaps software pipeline small-case code
  
-C                           cycles/limb
-C P6 model 0-8,10-12)           -
-C P6 model 9   (Banias)                ?
-C P6 model 13  (Dothan)         4.17
-C P4 model 0-1 (Willamette):   4
-C P4 model 2   (Northwood):     4
-C P4 model 3-4 (Prescott):      4.55
-
  C INPUT PARAMETERS
  C rp           sp + 4
  C up           sp + 8
@@ -40,22 +40,13 @@ C v0                sp + 16
  
         TEXT
         ALIGN(16)
-PROLOGUE(mpn_mul_1c)
-       mov     4(%esp), %edx
-       mov     8(%esp), %eax
-       mov     12(%esp), %ecx
-       movd    16(%esp), %mm7
-       movd    20(%esp), %mm6
-       jmp     L(ent)
-EPILOGUE()
-       ALIGN(16)
  PROLOGUE(mpn_mul_1)
-       mov     4(%esp), %edx
+       pxor    %mm6, %mm6
+L(ent):        mov     4(%esp), %edx
         mov     8(%esp), %eax
         mov     12(%esp), %ecx
         movd    16(%esp), %mm7
-       pxor    %mm6, %mm6
-L(ent):        cmp     $4, %ecx
+       cmp     $4, %ecx
         jnc     L(big)
  
  L(lp0):        movd    (%eax), %mm0
@@ -156,3 +147,7 @@ L(end):     pmuludq %mm7, %mm2
         emms
         ret
  EPILOGUE()
+PROLOGUE(mpn_mul_1c)
+       movd    20(%esp), %mm6
+       jmp     L(ent)
+EPILOGUE()
diff --git a/mpn/x86/pentium4/sse2/popcount.asm b/mpn/x86/pentium4/sse2/popcount.asm

index 41c86ec13af460e789ef3cd15bcefcd6b3d673d6..2f06984c7d91ee7feaad5cd54c9257e6c9af1edd 100644 (file)
--- a/mpn/x86/pentium4/sse2/popcount.asm
+++ b/mpn/x86/pentium4/sse2/popcount.asm
@@ -1,6 +1,6 @@
  dnl  X86-32 and X86-64 mpn_popcount using SSE2.
  
-dnl  Copyright 2006, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2006, 2007, 2011 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -21,28 +21,30 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C 32-bit                     popcount        hamdist
-C                           cycles/limb     cycles/limb
-C P5:                           -
-C P6 model 0-8,10-12)           -
-C P6 model 9  (Banias)          ?
-C P6 model 13 (Dothan)          4
-C P4 model 0  (Willamette)      ?
-C P4 model 1  (?)               ?
-C P4 model 2  (Northwood)       3.9
-C P4 model 3  (Prescott)        ?
-C P4 model 4  (Nocona)          ?
-C K6:                           -
-C K7:                           -
-C K8:                           ?
-
-C 64-bit                     popcount        hamdist
-C                           cycles/limb     cycles/limb
-C P4 model 4 (Nocona):          8
-C K8:                           7.5
-C K10:                         3.5
-C P6 core2:                    3.68
-C P6 corei7:                   3.15
+C 32-bit                    popcount        hamdist
+C                          cycles/limb     cycles/limb
+C P5                           -
+C P6 model 0-8,10-12           -
+C P6 model 9  (Banias)         ?
+C P6 model 13 (Dothan)         4
+C P4 model 0  (Willamette)     ?
+C P4 model 1  (?)              ?
+C P4 model 2  (Northwood)      3.9
+C P4 model 3  (Prescott)       ?
+C P4 model 4  (Nocona)         ?
+C AMD K6                       -
+C AMD K7                       -
+C AMD K8                       ?
+
+C 64-bit                    popcount        hamdist
+C                          cycles/limb     cycles/limb
+C P4 model 4 (Nocona):         8
+C AMD K8,K9                    7.5
+C AMD K10                      3.5
+C Intel core2                  3.68
+C Intel corei                  3.15
+C Intel atom                  10.8
+C VIA nano                     6.5
  
  C TODO
  C  * Make a mpn_hamdist based on this.  Alignment could either be handled by
diff --git a/mpn/x86/pentium4/sse2/sub_n.asm b/mpn/x86/pentium4/sse2/sub_n.asm

index 02d5f014749da053b63781f74a5f01029b36b177..5ad0594cbf051bdd093a56e965eebfe19af25395 100644 (file)
--- a/mpn/x86/pentium4/sse2/sub_n.asm
+++ b/mpn/x86/pentium4/sse2/sub_n.asm
@@ -20,18 +20,14 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
-C                          6.0 cycles/limb if dst==src1 or dst==src2
-C P4 Prescott:             >= 5 cycles/limb
-
-
-C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                      mp_size_t size);
-C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                       mp_size_t size, mp_limb_t carry);
-C
-C The main loop code is 2x unrolled so that the carry bit can alternate
-C between mm0 and mm1.
+C                                      cycles/limb
+C                           dst!=src1,2  dst==src1  dst==src2
+C P6 model 0-8,10-12           -
+C P6 model 9   (Banias)                ?
+C P6 model 13  (Dothan)                ?
+C P4 model 0-1 (Willamette)    ?
+C P4 model 2   (Northwood)     4            6          6
+C P4 model 3-4 (Prescott)      4.25         7.5        7.5
  
  defframe(PARAM_CARRY,20)
  defframe(PARAM_SIZE, 16)
@@ -47,10 +43,8 @@ define(SAVE_EBX,`PARAM_SRC1')
  
  PROLOGUE(mpn_sub_nc)
  deflit(`FRAME',0)
-
         movd    PARAM_CARRY, %mm0
         jmp     L(start_nc)
-
  EPILOGUE()
  
         ALIGN(8)
@@ -58,16 +52,16 @@ PROLOGUE(mpn_sub_n)
  deflit(`FRAME',0)
         pxor    %mm0, %mm0
  L(start_nc):
-       movl    PARAM_SRC1, %eax
-       movl    %ebx, SAVE_EBX
-       movl    PARAM_SRC2, %ebx
-       movl    PARAM_DST, %edx
-       movl    PARAM_SIZE, %ecx
+       mov     PARAM_SRC1, %eax
+       mov     %ebx, SAVE_EBX
+       mov     PARAM_SRC2, %ebx
+       mov     PARAM_DST, %edx
+       mov     PARAM_SIZE, %ecx
  
-       leal    (%eax,%ecx,4), %eax     C src1 end
-       leal    (%ebx,%ecx,4), %ebx     C src2 end
-       leal    (%edx,%ecx,4), %edx     C dst end
-       negl    %ecx                    C -size
+       lea     (%eax,%ecx,4), %eax     C src1 end
+       lea     (%ebx,%ecx,4), %ebx     C src2 end
+       lea     (%edx,%ecx,4), %edx     C dst end
+       neg     %ecx                    C -size
  
  L(top):
         C eax   src1 end
@@ -85,7 +79,7 @@ L(top):
  
         psrlq   $63, %mm1
  
-       addl    $1, %ecx
+       add     $1, %ecx
         jz      L(done_mm1)
  
         movd    (%eax,%ecx,4), %mm0
@@ -97,18 +91,17 @@ L(top):
  
         psrlq   $63, %mm0
  
-       addl    $1, %ecx
+       add     $1, %ecx
         jnz     L(top)
  
-
         movd    %mm0, %eax
-       movl    SAVE_EBX, %ebx
+       mov     SAVE_EBX, %ebx
         emms
         ret
  
  L(done_mm1):
         movd    %mm1, %eax
-       movl    SAVE_EBX, %ebx
+       mov     SAVE_EBX, %ebx
         emms
         ret
  
diff --git a/mpn/x86/pentium4/sse2/submul_1.asm b/mpn/x86/pentium4/sse2/submul_1.asm

index ceb41f2ac010d953a3faaf60d5277516739440ee..2ef25d31270c183656f76ea0aac2bcd05da6fbbf 100644 (file)
--- a/mpn/x86/pentium4/sse2/submul_1.asm
+++ b/mpn/x86/pentium4/sse2/submul_1.asm
@@ -1,7 +1,7 @@
  dnl  Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and
  dnl  subtract the result from a second limb vector.
  
-dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2002, 2008, 2010 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -21,40 +21,40 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon
-C     (stepping 10).
+C                          cycles/limb
+C P6 model 0-8,10-12           -
+C P6 model 9   (Banias)                6.8
+C P6 model 13  (Dothan)                6.9
+C P4 model 0-1 (Willamette)    ?
+C P4 model 2   (Northwood)     5.87
+C P4 model 3-4 (Prescott)      6.5
  
-
-C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C                         mp_limb_t mult);
-C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C                          mp_limb_t mult, mp_limb_t carry);
-C
-C This code is not particularly good at 7 c/l.  The dependent chain is only
-C 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that
-C speed isn't achieved.
+C This code represents a step forwards compared to the code available before
+C GMP 5.1, but it is not carefully tuned for either P6 or P4.  In fact, it is
+C not good for P6.  For P4 it saved a bit over 1 c/l for both Northwood and
+C Prescott compared to the old code.
  C
  C The arrangements made here to get a two instruction dependent chain are
-C slightly subtle.  In the loop the carry (or borrow rather) is a negative
-C so that a paddq can be used to give a low limb ready to store, and a high
-C limb ready to become the new carry after a psrlq.
+C slightly subtle.  In the loop the carry (or borrow rather) is a negative so
+C that a paddq can be used to give a low limb ready to store, and a high limb
+C ready to become the new carry after a psrlq.
  C
-C If the carry was a simple twos complement negative then the psrlq shift
-C would need to bring in 0 bits or 1 bits according to whether the high was
-C zero or non-zero, since a non-zero value would represent a negative
-C needing sign extension.  That wouldn't be particularly easy to arrange and
-C certainly would add an instruction to the dependent chain, so instead an
-C offset is applied so that the high limb will be 0xFFFFFFFF+c.  With c in
-C the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to
-C 0xFFFFFFFF and is therefore always positive and can always have 0 bits
-C shifted in, which is what psrlq does.
+C If the carry was a simple twos complement negative then the psrlq shift would
+C need to bring in 0 bits or 1 bits according to whether the high was zero or
+C non-zero, since a non-zero value would represent a negative needing sign
+C extension.  That wouldn't be particularly easy to arrange and certainly would
+C add an instruction to the dependent chain, so instead an offset is applied so
+C that the high limb will be 0xFFFFFFFF+c.  With c in the range -0xFFFFFFFF to
+C 0, the value 0xFFFFFFFF+c is in the range 0 to 0xFFFFFFFF and is therefore
+C always positive and can always have 0 bits shifted in, which is what psrlq
+C does.
  C
  C The extra 0xFFFFFFFF must be subtracted before c is used, but that can be
  C done off the dependent chain.  The total adjustment then is to add
-C 0xFFFFFFFF00000000 to offset the new carry, and subtract
-C 0x00000000FFFFFFFF to remove the offset from the current carry, for a net
-C add of 0xFFFFFFFE00000001.  In the code this is applied to the destination
-C limb when fetched.
+C 0xFFFFFFFF00000000 to offset the new carry, and subtract 0x00000000FFFFFFFF
+C to remove the offset from the current carry, for a net add of
+C 0xFFFFFFFE00000001.  In the code this is applied to the destination limb when
+C fetched.
  C
  C It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement
  C negative, which is how it's undone for the return value, but that doesn't
@@ -80,16 +80,16 @@ deflit(`FRAME',0)
         pxor    %mm1, %mm1              C initial borrow
  
  L(start_1c):
-       movl    PARAM_SRC, %eax
+       mov     PARAM_SRC, %eax
         pcmpeqd %mm0, %mm0
  
         movd    PARAM_MULTIPLIER, %mm7
         pcmpeqd %mm6, %mm6
  
-       movl    PARAM_DST, %edx
+       mov     PARAM_DST, %edx
         psrlq   $32, %mm0               C 0x00000000FFFFFFFF
  
-       movl    PARAM_SIZE, %ecx
+       mov     PARAM_SIZE, %ecx
         psllq   $32, %mm6               C 0xFFFFFFFF00000000
  
         psubq   %mm0, %mm6              C 0xFFFFFFFE00000001
@@ -97,32 +97,75 @@ L(start_1c):
         psubq   %mm1, %mm0              C 0xFFFFFFFF - borrow
  
  
-       C eax   src, incrementing
-       C ebx
-       C ecx   loop counter, decrementing
-       C edx   dst, incrementing
-       C
-       C mm0   0xFFFFFFFF - borrow
-       C mm6   0xFFFFFFFE00000001
-       C mm7   multiplier
-
-L(loop):
-       movd    (%eax), %mm1            C src
-       leal    4(%eax), %eax
-       movd    (%edx), %mm2            C dst
-       paddq   %mm6, %mm2              C add 0xFFFFFFFE00000001
+       movd    (%eax), %mm3            C up
+       movd    (%edx), %mm4            C rp
+
+       add     $-1, %ecx
+       paddq   %mm6, %mm4              C add 0xFFFFFFFE00000001
+       pmuludq %mm7, %mm3
+       jnz     L(gt1)
+       psubq   %mm3, %mm4              C prod
+       paddq   %mm4, %mm0              C borrow
+       movd    %mm0, (%edx)            C result
+       jmp     L(rt)
+
+L(gt1):        movd    4(%eax), %mm1           C up
+       movd    4(%edx), %mm2           C rp
+
+       add     $-1, %ecx
+       jz      L(eev)
+
+       ALIGN(16)
+L(top):        paddq   %mm6, %mm2              C add 0xFFFFFFFE00000001
         pmuludq %mm7, %mm1
+       psubq   %mm3, %mm4              C prod
+       movd    8(%eax), %mm3           C up
+       paddq   %mm4, %mm0              C borrow
+       movd    8(%edx), %mm4           C rp
+       movd    %mm0, (%edx)            C result
+       psrlq   $32, %mm0
+
+       add     $-1, %ecx
+       jz      L(eod)
+
+       paddq   %mm6, %mm4              C add 0xFFFFFFFE00000001
+       pmuludq %mm7, %mm3
         psubq   %mm1, %mm2              C prod
+       movd    12(%eax), %mm1          C up
         paddq   %mm2, %mm0              C borrow
-       subl    $1, %ecx
-       movd    %mm0, (%edx)            C result
+       movd    12(%edx), %mm2          C rp
+       movd    %mm0, 4(%edx)           C result
         psrlq   $32, %mm0
-       leal    4(%edx), %edx
-       jnz     L(loop)
  
+       lea     8(%eax), %eax
+       lea     8(%edx), %edx
+       add     $-1, %ecx
+       jnz     L(top)
+
+
+L(eev):        paddq   %mm6, %mm2              C add 0xFFFFFFFE00000001
+       pmuludq %mm7, %mm1
+       psubq   %mm3, %mm4              C prod
+       paddq   %mm4, %mm0              C borrow
+       movd    %mm0, (%edx)            C result
+       psrlq   $32, %mm0
+       psubq   %mm1, %mm2              C prod
+       paddq   %mm2, %mm0              C borrow
+       movd    %mm0, 4(%edx)           C result
+L(rt): psrlq   $32, %mm0
         movd    %mm0, %eax
-       notl    %eax
+       not     %eax
         emms
         ret
  
+L(eod):        paddq   %mm6, %mm4              C add 0xFFFFFFFE00000001
+       pmuludq %mm7, %mm3
+       psubq   %mm1, %mm2              C prod
+       paddq   %mm2, %mm0              C borrow
+       movd    %mm0, 4(%edx)           C result
+       psrlq   $32, %mm0
+       psubq   %mm3, %mm4              C prod
+       paddq   %mm4, %mm0              C borrow
+       movd    %mm0, 8(%edx)           C result
+       jmp     L(rt)
  EPILOGUE()
diff --git a/mpn/x86/rshift.asm b/mpn/x86/rshift.asm

index 8e33eabd61cc8d510ba6da85657e2487de5c23c3..b987c6a7b616e7ded1434cf765bbf8b8b0bcacfd 100644 (file)
--- a/mpn/x86/rshift.asm
+++ b/mpn/x86/rshift.asm
@@ -22,12 +22,12 @@ include(`../config.m4')
  
  
  C     cycles/limb
-C P54:   7.5
-C P55:   7.0
-C P6:    2.5
-C K6:    4.5
-C K7:    5.0
-C P4:   16.5
+C P54   7.5
+C P55   7.0
+C P6    2.5
+C K6    4.5
+C K7    5.0
+C P4   16.5
  
  
  C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/mpn/x86/sqr_basecase.asm b/mpn/x86/sqr_basecase.asm

index 9a7e13327b8c41c91ec01d28bc0a89cad846b069..53ccfcd747a26e63a20d61d532f59b1145e1da92 100644 (file)
--- a/mpn/x86/sqr_basecase.asm
+++ b/mpn/x86/sqr_basecase.asm
@@ -22,11 +22,11 @@ include(`../config.m4')
  
  
  C     cycles/crossproduct  cycles/triangleproduct
-C P5:
-C P6:
-C K6:
-C K7:
-C P4:
+C P5
+C P6
+C K6
+C K7
+C P4
  
  
  C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
diff --git a/mpn/x86/tabselect.asm b/mpn/x86/tabselect.asm

new file mode 100644 (file)

index 0000000..7c8c260
--- /dev/null
+++ b/mpn/x86/tabselect.asm
@@ -0,0 +1,104 @@
+dnl  x86 mpn_tabselect.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                          cycles/limb
+C P5                            ?
+C P6 model 0-8,10-12            ?
+C P6 model 9  (Banias)          ?
+C P6 model 13 (Dothan)          ?
+C P4 model 0  (Willamette)      ?
+C P4 model 1  (?)               ?
+C P4 model 2  (Northwood)       4.5
+C P4 model 3  (Prescott)        ?
+C P4 model 4  (Nocona)          ?
+C Intel Atom                    ?
+C AMD K6                        ?
+C AMD K7                        3.4
+C AMD K8                        ?
+C AMD K10                       ?
+
+C NOTES
+C  * This has not been tuned for any specific processor.  Its speed should not
+C    be too bad, though.
+C  * Using SSE2 could result in many-fold speedup.
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp',     `%edi')
+define(`tp',     `%esi')
+define(`n',      `%ebx')
+define(`nents',  `%ecx')
+define(`which',  `36(%esp)')
+
+define(`i',      `%ebp')
+define(`maskp',  `20(%esp)')
+define(`maskn',  `32(%esp)')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_tabselect)
+       push    %edi
+       push    %esi
+       push    %ebx
+       push    %ebp
+       mov     20(%esp), rp
+       mov     24(%esp), tp
+       mov     28(%esp), n
+       mov     32(%esp), nents
+
+       lea     (rp,n,4), rp
+       lea     (tp,n,4), tp
+       sub     nents, which
+L(outer):
+       mov     which, %eax
+       add     nents, %eax
+       neg     %eax                    C set CF iff 'which' != k
+       sbb     %eax, %eax
+       mov     %eax, maskn
+       not     %eax
+       mov     %eax, maskp
+
+       mov     n, i
+       neg     i
+
+       ALIGN(16)
+L(top):        mov     (tp,i,4), %eax
+       and     maskp, %eax
+       mov     (rp,i,4), %edx
+       and     maskn, %edx
+       or      %edx, %eax
+       mov     %eax, (rp,i,4)
+       inc     i
+       js      L(top)
+
+L(end):        mov     n, %eax
+       lea     (tp,%eax,4), tp
+       dec     nents
+       jne     L(outer)
+
+L(outer_end):
+       pop     %ebp
+       pop     %ebx
+       pop     %esi
+       pop     %edi
+       ret
+EPILOGUE()
diff --git a/mpn/x86/x86-defs.m4 b/mpn/x86/x86-defs.m4

index b1f36ddeb548c13f9925e4c604c01b12f62f1b43..9f5649503ef0cef3f42e7291403619cdd51dee43 100644 (file)
--- a/mpn/x86/x86-defs.m4
+++ b/mpn/x86/x86-defs.m4
@@ -4,8 +4,8 @@ divert(-1)
  dnl  m4 macros for x86 assembler.
  
  
-dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
-dnl  Inc.
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007, 2010, 2012 Free Software
+dnl  Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -58,24 +58,39 @@ dnl  order they appear in that structure.
  
  define(CPUVEC_FUNCS_LIST,
  ``add_n',
+`addlsh1_n',
+`addlsh2_n',
  `addmul_1',
+`addmul_2',
+`bdiv_dbm1c',
+`com',
  `copyd',
  `copyi',
  `divexact_1',
-`divexact_by3c',
  `divrem_1',
  `gcd_1',
  `lshift',
+`lshiftc',
  `mod_1',
+`mod_1_1p',
+`mod_1_1p_cps',
+`mod_1s_2p',
+`mod_1s_2p_cps',
+`mod_1s_4p',
+`mod_1s_4p_cps',
  `mod_34lsub1',
  `modexact_1c_odd',
  `mul_1',
  `mul_basecase',
+`mullo_basecase',
  `preinv_divrem_1',
  `preinv_mod_1',
+`redc_1',
+`redc_2',
  `rshift',
  `sqr_basecase',
  `sub_n',
+`sublsh1_n',
  `submul_1'')
  
  
@@ -928,7 +943,9 @@ m4_assert_numargs(1)
  
  dnl  Usage LEA(symbol,reg)
  
-define(`LEA',`
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',`
  define(`EPILOGUE_cpu',
  `
  L(movl_eip_`'substr($2,1)):
@@ -936,11 +953,12 @@ L(movl_eip_`'substr($2,1)):
         ret_internal
         SIZE($'`1, .-$'`1)')
  
-        call    L(movl_eip_`'substr($2,1))
-        addl    $_GLOBAL_OFFSET_TABLE_, $2
-        movl    $1@GOT($2), $2
-')
-
+       call    L(movl_eip_`'substr($2,1))
+       addl    $_GLOBAL_OFFSET_TABLE_, $2
+       movl    $1@GOT($2), $2
+',`
+       movl    `$'$1, $2
+')')
  
  define(`DEF_OBJECT',
  m4_assert_numargs_range(1,2)
@@ -953,4 +971,17 @@ define(`END_OBJECT',
  m4_assert_numargs(1)
  `      SIZE(`$1',.-`$1')')
  
+dnl  Usage: CALL(funcname)
+dnl
+
+define(`CALL',
+m4_assert_numargs(1)
+`ifdef(`PIC',
+  `call        GSYM_PREFIX`'$1@PLT',
+  `call        GSYM_PREFIX`'$1')')
+
+ifdef(`PIC',
+`define(`PIC_WITH_EBX')',
+`undefine(`PIC_WITH_EBX')')
+
  divert`'dnl
diff --git a/mpn/x86_64/addaddmul_1msb0.asm b/mpn/x86_64/addaddmul_1msb0.asm

index 89e7bed9800431eabc31891ae9ffe2ae4546735c..ca7aacd14b2fee7d7179a4fb8055fb240ae44f25 100644 (file)
--- a/mpn/x86_64/addaddmul_1msb0.asm
+++ b/mpn/x86_64/addaddmul_1msb0.asm
@@ -20,9 +20,13 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C K8:           2.167
-C P4:          12.0
-C P6-15:        4.0
+C AMD K8,K9     2.167
+C AMD K10       2.167
+C Intel P4     12.0
+C Intel core2   4.0
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      ?
  
  C TODO
  C  * Perhaps handle various n mod 3 sizes better.  The code now is too large.
@@ -67,7 +71,7 @@ L(top):       mul     %r9
         mul     %r8
         add     %rax, %r10
         mov     -16(bp,n,8), %rax
-       mov     $0, %r11d
+       mov     $0, R32(%r11)
         adc     %rdx, %r11
         mul     %r9
         add     %rax, %r10
@@ -77,7 +81,7 @@ L(top):       mul     %r9
         mul     %r8
         add     %rax, %r11
         mov     -8(bp,n,8), %rax
-       mov     $0, %r12d
+       mov     $0, R32(%r12)
         adc     %rdx, %r12
         mul     %r9
         add     %rax, %r11
@@ -87,7 +91,7 @@ L(top):       mul     %r9
         add     %rax, %r12
         mov     %r11, -8(rp,n,8)
         mov     (bp,n,8), %rax
-       mov     $0, %r10d
+       mov     $0, R32(%r10)
         adc     %rdx, %r10
         add     $3, n
         js      L(top)
@@ -104,7 +108,7 @@ L(end):     cmp     $1, R32(n)
         mul     %r8
         add     %rax, %r10
         mov     -16(bp), %rax
-       mov     $0, %r11d
+       mov     $0, R32(%r11)
         adc     %rdx, %r11
         mul     %r9
         add     %rax, %r10
@@ -114,7 +118,7 @@ L(end):     cmp     $1, R32(n)
         mul     %r8
         add     %rax, %r11
         mov     -8(bp), %rax
-       mov     $0, %r12d
+       mov     $0, R32(%r12)
         adc     %rdx, %r12
         mul     %r9
         add     %rax, %r11
@@ -133,7 +137,7 @@ L(end):     cmp     $1, R32(n)
         mul     %r8
         add     %rax, %r10
         mov     -8(bp), %rax
-       mov     $0, %r11d
+       mov     $0, R32(%r11)
         adc     %rdx, %r11
         mul     %r9
         add     %rax, %r10
diff --git a/mpn/x86_64/addmul_2.asm b/mpn/x86_64/addmul_2.asm

index e762113f6b043c89788641f330659b48e23976df..991817dbfa6fa238099b64d48a4b06ca4f841773 100644 (file)
--- a/mpn/x86_64/addmul_2.asm
+++ b/mpn/x86_64/addmul_2.asm
@@ -1,7 +1,7 @@
  dnl  AMD64 mpn_addmul_2 -- Multiply an n-limb vector with a 2-limb vector and
  dnl  add the result to a third limb vector.
  
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,20 +21,20 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C K8,K9:        2.375
-C K10:          2.375
-C P4:           ?
-C P6 core2:     4.45
-C P6 corei7:    4.35
+C AMD K8,K9     2.375
+C AMD K10       2.375
+C Intel P4     15-16
+C Intel core2   4.45
+C Intel NHM     4.32
+C Intel SBR     3.4
+C Intel atom    ?
+C VIA nano      4.4
  
  C This code is the result of running a code generation and optimization tool
  C suite written by David Harvey and Torbjorn Granlund.
  
  C TODO
-C  * Work on feed-in and wind-down code.
-C  * Convert "mov $0" to "xor".
-C  * Adjust initial lea to save some bytes.
-C  * Perhaps adjust n from n_param&3 value?
+C  * Tune feed-in and wind-down code.
  
  C INPUT PARAMETERS
  define(`rp',     `%rdi')
@@ -50,119 +50,124 @@ define(`w2', `%rbp')
  define(`w3', `%r10')
  define(`n',  `%r11')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_addmul_2)
+       FUNC_ENTRY(4)
+       mov     n_param, n
         push    %rbx
         push    %rbp
  
-       mov     (vp), v0
+       mov     0(vp), v0
         mov     8(vp), v1
  
-       mov     n_param, n
-       neg     n
-       lea     -32(up,n_param,8), up
-       lea     -32(rp,n_param,8), rp
-
-       and     $3, R32(n_param)
-       jz      L(am2p0)
-       cmp     $2, R32(n_param)
-       jc      L(am2p1)
-       jz      L(am2p2)
-L(am2p3):
-       mov     32(up,n,8), %rax
+       mov     R32(n_param), R32(%rbx)
+       mov     (up), %rax
+       lea     -8(up,n_param,8), up
+       lea     -8(rp,n_param,8), rp
         mul     v0
-       mov     %rax, w1
-       mov     32(up,n,8), %rax
+       neg     n
+       and     $3, R32(%rbx)
+       jz      L(b0)
+       cmp     $2, R32(%rbx)
+       jc      L(b1)
+       jz      L(b2)
+
+L(b3): mov     %rax, w1
         mov     %rdx, w2
         xor     R32(w3), R32(w3)
-       add     $2, n
-       jmp     L(am3)
-L(am2p0):
-       mov     32(up,n,8), %rax
-       mul     v0
-       mov     %rax, w0
-       mov     32(up,n,8), %rax
-       mov     %rdx, w1
-       xor     R32(w2), R32(w2)
-       add     $3, n
-       jmp     L(am0)
-L(am2p1):
-       mov     32(up,n,8), %rax
-       mul     v0
-       mov     %rax, w3
-       mov     32(up,n,8), %rax
-       mov     %rdx, w0
-       xor     R32(w1), R32(w1)
-       jmp     L(am1)
-L(am2p2):
-       mov     32(up,n,8), %rax
-       mul     v0
-       mov     %rax, w2
-       mov     32(up,n,8), %rax
+       mov     8(up,n,8), %rax
+       dec     n
+       jmp     L(lo3)
+
+L(b2): mov     %rax, w2
+       mov     8(up,n,8), %rax
         mov     %rdx, w3
         xor     R32(w0), R32(w0)
+       add     $-2, n
+       jmp     L(lo2)
+
+L(b1): mov     %rax, w3
+       mov     8(up,n,8), %rax
+       mov     %rdx, w0
         xor     R32(w1), R32(w1)
-       add     $1, n
-       jmp     L(am2)
+       inc     n
+       jmp     L(lo1)
  
-       ALIGN(32)
-L(top):
-       add     w3, (rp,n,8)            C 0 21
-       adc     %rax, w0                C 1 24
+L(b0): mov     $0, R32(w3)
+       mov     %rax, w0
         mov     8(up,n,8), %rax
-       adc     %rdx, w1                C 3 26
+       mov     %rdx, w1
+       xor     R32(w2), R32(w2)
+       jmp     L(lo0)
+
+       ALIGN(32)
+L(top):        mov     $0, R32(w1)
+       mul     v0
+       add     %rax, w3
+       mov     (up,n,8), %rax
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+L(lo1):        mul     v1
+       add     w3, (rp,n,8)
+       mov     $0, R32(w3)
+       adc     %rax, w0
         mov     $0, R32(w2)
+       mov     8(up,n,8), %rax
+       adc     %rdx, w1
         mul     v0
-       add     %rax, w0                C 2 26
+       add     %rax, w0
         mov     8(up,n,8), %rax
-       adc     %rdx, w1                C 4 28
-       adc     $0, R32(w2)             C 6 30
-L(am0):        mul     v1
-       add     w0, 8(rp,n,8)           C 3 27
-       adc     %rax, w1                C 6 30
-       adc     %rdx, w2                C 8 32
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+L(lo0):        mul     v1
+       add     w0, 8(rp,n,8)
+       adc     %rax, w1
+       adc     %rdx, w2
         mov     16(up,n,8), %rax
-       mov     $0, R32(w3)
         mul     v0
-       add     %rax, w1                C 8
+       add     %rax, w1
+       adc     %rdx, w2
+       adc     $0, R32(w3)
         mov     16(up,n,8), %rax
-       adc     %rdx, w2                C 10
-       adc     $0, R32(w3)             C 12
-L(am3):        mul     v1
-       add     w1, 16(rp,n,8)          C 9
-       adc     %rax, w2                C 12
+L(lo3):        mul     v1
+       add     w1, 16(rp,n,8)
+       adc     %rax, w2
+       adc     %rdx, w3
+       xor     R32(w0), R32(w0)
         mov     24(up,n,8), %rax
-       adc     %rdx, w3                C 14
         mul     v0
-       mov     $0, R32(w0)
-       add     %rax, w2                C 14
-       adc     %rdx, w3                C 16
-       mov     $0, R32(w1)
+       add     %rax, w2
         mov     24(up,n,8), %rax
-       adc     $0, R32(w0)             C 18
-L(am2):        mul     v1
-       add     w2, 24(rp,n,8)          C 15
-       adc     %rax, w3                C 18
-       adc     %rdx, w0                C 20
-       mov     32(up,n,8), %rax
-       mul     v0
-       add     %rax, w3                C 20
+       adc     %rdx, w3
+       adc     $0, R32(w0)
+L(lo2):        mul     v1
+       add     w2, 24(rp,n,8)
+       adc     %rax, w3
+       adc     %rdx, w0
         mov     32(up,n,8), %rax
-       adc     %rdx, w0                C 22
-       adc     $0, R32(w1)             C 24
-L(am1):        mul     v1
         add     $4, n
         js      L(top)
  
-       add     w3, (rp,n,8)
+L(end):        xor     R32(w1), R32(w1)
+       mul     v0
+       add     %rax, w3
+       mov     (up), %rax
+       adc     %rdx, w0
+       adc     R32(w1), R32(w1)
+       mul     v1
+       add     w3, (rp)
         adc     %rax, w0
         adc     %rdx, w1
-       mov     w0, 8(rp,n,8)
+       mov     w0, 8(rp)
         mov     w1, %rax
  
         pop     %rbp
         pop     %rbx
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/aorrlsh1_n.asm b/mpn/x86_64/aorrlsh1_n.asm

index 75fd009c613b4fffddcbc27698a3155b4bcae57b..ede2a5fb73f71aafa2de43a4fa5558c231846ea9 100644 (file)
--- a/mpn/x86_64/aorrlsh1_n.asm
+++ b/mpn/x86_64/aorrlsh1_n.asm
@@ -1,7 +1,8 @@
  dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
  dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
  
-dnl  Copyright 2003, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2006, 2007, 2008, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -22,12 +23,13 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        2
-C K10:          2
-C P4:           13
-C P6 core2:     3.45
-C P6 corei7:    3.45
-C P6 atom:      ?
+C AMD K8,K9     2
+C AMD K10       2
+C Intel P4      13
+C Intel core2   3.45
+C Intel corei   3.45
+C Intel atom    ?
+C VIA nano      ?
  
  
  C Sometimes speed degenerates, supposedly related to that some operand
@@ -43,20 +45,24 @@ define(`vp',`%rdx')
  define(`n', `%rcx')
  
  ifdef(`OPERATION_addlsh1_n', `
-       define(ADDSUB,        add)
-       define(ADCSBB,        adc)
-       define(func,          mpn_addlsh1_n)')
+  define(ADDSUB,       add)
+  define(ADCSBB,       adc)
+  define(func,         mpn_addlsh1_n)')
  ifdef(`OPERATION_rsblsh1_n', `
-       define(ADDSUB,        sub)
-       define(ADCSBB,        sbb)
-       define(func,          mpn_rsblsh1_n)')
+  define(ADDSUB,       sub)
+  define(ADCSBB,       sbb)
+  define(func,         mpn_rsblsh1_n)')
  
  MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(func)
+       FUNC_ENTRY(4)
         push    %rbp
  
         mov     (vp), %r8
@@ -146,5 +152,6 @@ ifdef(`OPERATION_rsblsh1_n',`
         movslq  R32(%rbp), %rax')
  
         pop     %rbp
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/aorrlsh2_n.asm b/mpn/x86_64/aorrlsh2_n.asm

index 16cecef8df29b50ea524cf69f28a29e13182ebed..898d4c01b59889a5332e0e1d0695a35d82b74920 100644 (file)
--- a/mpn/x86_64/aorrlsh2_n.asm
+++ b/mpn/x86_64/aorrlsh2_n.asm
@@ -1,8 +1,9 @@
-dnl  AMD64 mpn_addlsh2_n and mpn_rsblsh2_n.  R = 2*V +- U.
-dnl  ("rsb" means reversed subtract, name mandated by mpn_sublsh2_n which
-dnl  subtacts the shifted operand from the unshifted operand.)
+dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
+dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
  
-dnl  Copyright 2009 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009, 2010, 2011 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,134 +22,21 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-
-C           cycles/limb
-C K8,K9:        2
-C K10:          2
-C P4:           ?
-C P6 core2:     3
-C P6 corei7:    2.75
-C P6 atom:      ?
-
-C INPUT PARAMETERS
-define(`rp',   `%rdi')
-define(`up',   `%rsi')
-define(`vp',   `%rdx')
-define(`n',    `%rcx')
+define(LSH, 2)
+define(RSH, 62)
  
  ifdef(`OPERATION_addlsh2_n',`
-  define(ADDSUB,        `add')
-  define(ADCSBB,       `adc')
-  define(func, mpn_addlsh2_n)')
+  define(ADDSUB,       add)
+  define(ADCSBB,       adc)
+  define(func,         mpn_addlsh2_n)')
  ifdef(`OPERATION_rsblsh2_n',`
-  define(ADDSUB,        `sub')
-  define(ADCSBB,       `sbb')
-  define(func, mpn_rsblsh2_n)')
+  define(ADDSUB,       sub)
+  define(ADCSBB,       sbb)
+  define(func,         mpn_rsblsh2_n)')
  
  MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
  
-ASM_START()
-       TEXT
-       ALIGN(16)
-PROLOGUE(func)
-       push    %r12
-       push    %r13
-       push    %r14
-       push    %r15
-
-       mov     (vp), %r8
-       lea     (,%r8,4), %r12
-       shr     $62, %r8
-
-       mov     R32(n), R32(%rax)
-       lea     (rp,n,8), rp
-       lea     (up,n,8), up
-       lea     (vp,n,8), vp
-       neg     n
-       and     $3, R8(%rax)
-       je      L(b00)
-       cmp     $2, R8(%rax)
-       jc      L(b01)
-       je      L(b10)
-
-L(b11):        mov     8(vp,n,8), %r10
-       lea     (%r8,%r10,4), %r14
-       shr     $62, %r10
-       mov     16(vp,n,8), %r11
-       lea     (%r10,%r11,4), %r15
-       shr     $62, %r11
-       ADDSUB  (up,n,8), %r12
-       ADCSBB  8(up,n,8), %r14
-       ADCSBB  16(up,n,8), %r15
-       sbb     R32(%rax), R32(%rax)              C save carry for next
-       mov     %r12, (rp,n,8)
-       mov     %r14, 8(rp,n,8)
-       mov     %r15, 16(rp,n,8)
-       add     $3, n
-       js      L(top)
-       jmp     L(end)
-
-L(b01):        mov     %r8, %r11
-       ADDSUB  (up,n,8), %r12
-       sbb     R32(%rax), R32(%rax)              C save carry for next
-       mov     %r12, (rp,n,8)
-       add     $1, n
-       js      L(top)
-       jmp     L(end)
-
-L(b10):        mov     8(vp,n,8), %r11
-       lea     (%r8,%r11,4), %r15
-       shr     $62, %r11
-       ADDSUB  (up,n,8), %r12
-       ADCSBB  8(up,n,8), %r15
-       sbb     R32(%rax), R32(%rax)              C save carry for next
-       mov     %r12, (rp,n,8)
-       mov     %r15, 8(rp,n,8)
-       add     $2, n
-       js      L(top)
-       jmp     L(end)
-
-L(b00):        mov     8(vp,n,8), %r9
-       mov     16(vp,n,8), %r10
-       jmp     L(e00)
-
-       ALIGN(16)
-L(top):        mov     16(vp,n,8), %r10
-       mov     (vp,n,8), %r8
-       mov     8(vp,n,8), %r9
-       lea     (%r11,%r8,4), %r12
-       shr     $62, %r8
-L(e00):        lea     (%r8,%r9,4), %r13
-       shr     $62, %r9
-       mov     24(vp,n,8), %r11
-       lea     (%r9,%r10,4), %r14
-       shr     $62, %r10
-       lea     (%r10,%r11,4), %r15
-       shr     $62, %r11
-       add     R32(%rax), R32(%rax)              C restore carry
-       ADCSBB  (up,n,8), %r12
-       ADCSBB  8(up,n,8), %r13
-       ADCSBB  16(up,n,8), %r14
-       ADCSBB  24(up,n,8), %r15
-       mov     %r12, (rp,n,8)
-       mov     %r13, 8(rp,n,8)
-       mov     %r14, 16(rp,n,8)
-       sbb     R32(%rax), R32(%rax)              C save carry for next
-       mov     %r15, 24(rp,n,8)
-       add     $4, n
-       js      L(top)
-L(end):
-
-ifdef(`OPERATION_addlsh2_n',`
-       sub     R32(%r11), R32(%rax)
-       neg     R32(%rax)')
-ifdef(`OPERATION_rsblsh2_n',`
-       add     R32(%r11), R32(%rax)
-       movslq  R32(%rax), %rax')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
-       pop     %r15
-       pop     %r14
-       pop     %r13
-       pop     %r12
-       ret
-EPILOGUE()
+include_mpn(`x86_64/aorrlshC_n.asm')
diff --git a/mpn/x86_64/aorrlshC_n.asm b/mpn/x86_64/aorrlshC_n.asm

new file mode 100644 (file)

index 0000000..7e37166
--- /dev/null
+++ b/mpn/x86_64/aorrlshC_n.asm
@@ -0,0 +1,149 @@
+dnl  AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
+dnl  AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
+
+dnl  Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+C           cycles/limb
+C AMD K8,K9     2
+C AMD K10       2
+C Intel P4      ?
+C Intel core2   3
+C Intel NHM     2.75
+C Intel SBR     2.55
+C Intel atom    ?
+C VIA nano      ?
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+
+define(M, eval(m4_lshift(1,LSH)))
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       FUNC_ENTRY(4)
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       mov     (vp), %r8
+       lea     (,%r8,M), %r12
+       shr     $RSH, %r8
+
+       mov     R32(n), R32(%rax)
+       lea     (rp,n,8), rp
+       lea     (up,n,8), up
+       lea     (vp,n,8), vp
+       neg     n
+       and     $3, R8(%rax)
+       je      L(b00)
+       cmp     $2, R8(%rax)
+       jc      L(b01)
+       je      L(b10)
+
+L(b11):        mov     8(vp,n,8), %r10
+       lea     (%r8,%r10,M), %r14
+       shr     $RSH, %r10
+       mov     16(vp,n,8), %r11
+       lea     (%r10,%r11,M), %r15
+       shr     $RSH, %r11
+       ADDSUB  (up,n,8), %r12
+       ADCSBB  8(up,n,8), %r14
+       ADCSBB  16(up,n,8), %r15
+       sbb     R32(%rax), R32(%rax)              C save carry for next
+       mov     %r12, (rp,n,8)
+       mov     %r14, 8(rp,n,8)
+       mov     %r15, 16(rp,n,8)
+       add     $3, n
+       js      L(top)
+       jmp     L(end)
+
+L(b01):        mov     %r8, %r11
+       ADDSUB  (up,n,8), %r12
+       sbb     R32(%rax), R32(%rax)              C save carry for next
+       mov     %r12, (rp,n,8)
+       add     $1, n
+       js      L(top)
+       jmp     L(end)
+
+L(b10):        mov     8(vp,n,8), %r11
+       lea     (%r8,%r11,M), %r15
+       shr     $RSH, %r11
+       ADDSUB  (up,n,8), %r12
+       ADCSBB  8(up,n,8), %r15
+       sbb     R32(%rax), R32(%rax)              C save carry for next
+       mov     %r12, (rp,n,8)
+       mov     %r15, 8(rp,n,8)
+       add     $2, n
+       js      L(top)
+       jmp     L(end)
+
+L(b00):        mov     8(vp,n,8), %r9
+       mov     16(vp,n,8), %r10
+       jmp     L(e00)
+
+       ALIGN(16)
+L(top):        mov     16(vp,n,8), %r10
+       mov     (vp,n,8), %r8
+       mov     8(vp,n,8), %r9
+       lea     (%r11,%r8,M), %r12
+       shr     $RSH, %r8
+L(e00):        lea     (%r8,%r9,M), %r13
+       shr     $RSH, %r9
+       mov     24(vp,n,8), %r11
+       lea     (%r9,%r10,M), %r14
+       shr     $RSH, %r10
+       lea     (%r10,%r11,M), %r15
+       shr     $RSH, %r11
+       add     R32(%rax), R32(%rax)              C restore carry
+       ADCSBB  (up,n,8), %r12
+       ADCSBB  8(up,n,8), %r13
+       ADCSBB  16(up,n,8), %r14
+       ADCSBB  24(up,n,8), %r15
+       mov     %r12, (rp,n,8)
+       mov     %r13, 8(rp,n,8)
+       mov     %r14, 16(rp,n,8)
+       sbb     R32(%rax), R32(%rax)              C save carry for next
+       mov     %r15, 24(rp,n,8)
+       add     $4, n
+       js      L(top)
+L(end):
+
+ifelse(ADDSUB,add,`
+       sub     R32(%r11), R32(%rax)
+       neg     R32(%rax)
+',`
+       add     R32(%r11), R32(%rax)
+       movslq  R32(%rax), %rax
+')
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aorrlsh_n.asm b/mpn/x86_64/aorrlsh_n.asm

index 9aa8af97b1645f3f2d58b889f2041bea66b82c51..6e81245d6000ca534b9623be9fb7def28bee5322 100644 (file)
--- a/mpn/x86_64/aorrlsh_n.asm
+++ b/mpn/x86_64/aorrlsh_n.asm
@@ -1,8 +1,6 @@
  dnl  AMD64 mpn_addlsh_n and mpn_rsblsh_n.  R = V2^k +- U.
-dnl  ("rsb" means reversed subtract, name mandated by mpn_sublsh1_n which
-dnl  subtacts the shifted operand from the unshifted operand.)
  
-dnl  Copyright 2006 Free Software Foundation, Inc.
+dnl  Copyright 2006, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -23,10 +21,14 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        3.25   (mpn_lshift + mpn_add_n costs about 4.1 c/l)
-C K10:          3.25   (mpn_lshift + mpn_add_n costs about 4.1 c/l)
-C P4:          14
-C P6-15:        4
+C AMD K8,K9     3.1    < 3.85 for lshift + add_n
+C AMD K10       3.1    < 3.85 for lshift + add_n
+C Intel P4     14.6    > 7.33 for lshift + add_n
+C Intel core2   3.87   > 3.27 for lshift + add_n
+C Intel NHM     4      > 3.75 for lshift + add_n
+C Intel SBR    (5.8)   > 3.46 for lshift + add_n
+C Intel atom   (7.75)  < 8.75 for lshift + add_n
+C VIA nano      4.7    < 6.25 for lshift + add_n
  
  C This was written quickly and not optimized at all.  Surely one could get
  C closer to 3 c/l or perhaps even under 3 c/l.  Ideas:
@@ -44,62 +46,64 @@ define(`n', `%rcx')
  define(`cnt',  `%r8')
  
  ifdef(`OPERATION_addlsh_n',`
-  define(ADDSUBC,       `adc')
+  define(ADCSBB,       `adc')
    define(func, mpn_addlsh_n)
  ')
  ifdef(`OPERATION_rsblsh_n',`
-  define(ADDSUBC,       `sbb')
+  define(ADCSBB,       `sbb')
    define(func, mpn_rsblsh_n)
  ')
  
  MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n)
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(func)
-
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')
         push    %r12
         push    %r13
         push    %r14
-       push    %r15
+       push    %rbp
         push    %rbx
  
         mov     n, %rax
-       xor     %ebx, %ebx              C clear carry save register
-       mov     %r8d, %ecx              C shift count
-       xor     %r15d, %r15d            C limb carry
+       xor     R32(%rbx), R32(%rbx)    C clear carry save register
+       mov     R32(%r8), R32(%rcx)     C shift count
+       xor     R32(%rbp), R32(%rbp)    C limb carry
  
-       mov     %eax, %r11d
-       and     $3, %r11d
+       mov     R32(%rax), R32(%r11)
+       and     $3, R32(%r11)
         je      L(4)
-       sub     $1, %r11d
+       sub     $1, R32(%r11)
  
-L(oopette):
-       mov     0(vp), %r8
+L(012):        mov     (vp), %r8
         mov     %r8, %r12
-       shl     %cl, %r8
-       or      %r15, %r8
-       neg     %cl
-       mov     %r12, %r15
-       shr     %cl, %r15
-       neg     %cl
-       add     %ebx, %ebx
-       ADDSUBC 0(up), %r8
-       mov     %r8, 0(rp)
-       sbb     %ebx, %ebx
+       shl     R8(%rcx), %r8
+       or      %rbp, %r8
+       neg     R8(%rcx)
+       mov     %r12, %rbp
+       shr     R8(%rcx), %rbp
+       neg     R8(%rcx)
+       add     R32(%rbx), R32(%rbx)
+       ADCSBB  (up), %r8
+       mov     %r8, (rp)
+       sbb     R32(%rbx), R32(%rbx)
         lea     8(up), up
         lea     8(vp), vp
         lea     8(rp), rp
-       sub     $1, %r11d
-       jnc     L(oopette)
+       sub     $1, R32(%r11)
+       jnc     L(012)
  
-L(4):
-       sub     $4, %rax
+L(4):  sub     $4, %rax
         jc      L(end)
  
-L(oop):
-       mov     0(vp), %r8
+       ALIGN(16)
+L(top):        mov     (vp), %r8
         mov     %r8, %r12
         mov     8(vp), %r9
         mov     %r9, %r13
@@ -107,55 +111,55 @@ L(oop):
         mov     %r10, %r14
         mov     24(vp), %r11
  
-       shl     %cl, %r8
-       shl     %cl, %r9
-       shl     %cl, %r10
-       or      %r15, %r8
-       mov     %r11, %r15
-       shl     %cl, %r11
+       shl     R8(%rcx), %r8
+       shl     R8(%rcx), %r9
+       shl     R8(%rcx), %r10
+       or      %rbp, %r8
+       mov     %r11, %rbp
+       shl     R8(%rcx), %r11
  
-       neg     %cl
+       neg     R8(%rcx)
  
-       shr     %cl, %r12
-       shr     %cl, %r13
-       shr     %cl, %r14
-       shr     %cl, %r15               C used next loop
+       shr     R8(%rcx), %r12
+       shr     R8(%rcx), %r13
+       shr     R8(%rcx), %r14
+       shr     R8(%rcx), %rbp          C used next iteration
  
         or      %r12, %r9
         or      %r13, %r10
         or      %r14, %r11
  
-       neg     %cl
+       neg     R8(%rcx)
  
-       add     %ebx, %ebx              C restore carry flag
+       add     R32(%rbx), R32(%rbx)    C restore carry flag
  
-       ADDSUBC 0(up), %r8
-       ADDSUBC 8(up), %r9
-       ADDSUBC 16(up), %r10
-       ADDSUBC 24(up), %r11
+       ADCSBB  (up), %r8
+       ADCSBB  8(up), %r9
+       ADCSBB  16(up), %r10
+       ADCSBB  24(up), %r11
  
-       mov     %r8, 0(rp)
+       mov     %r8, (rp)
         mov     %r9, 8(rp)
         mov     %r10, 16(rp)
         mov     %r11, 24(rp)
  
-       sbb     %ebx, %ebx              C save carry flag
+       sbb     R32(%rbx), R32(%rbx)    C save carry flag
  
         lea     32(up), up
         lea     32(vp), vp
         lea     32(rp), rp
  
         sub     $4, %rax
-       jnc     L(oop)
-L(end):
-       add     %ebx, %ebx
-       ADDSUBC $0, %r15
-       mov     %r15, %rax
+       jnc     L(top)
+
+L(end):        add     R32(%rbx), R32(%rbx)
+       ADCSBB  $0, %rbp
+       mov     %rbp, %rax
         pop     %rbx
-       pop     %r15
+       pop     %rbp
         pop     %r14
         pop     %r13
         pop     %r12
-
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/aors_err1_n.asm b/mpn/x86_64/aors_err1_n.asm

new file mode 100644 (file)

index 0000000..f32910a
--- /dev/null
+++ b/mpn/x86_64/aors_err1_n.asm
@@ -0,0 +1,214 @@
+dnl  AMD64 mpn_add_err1_n, mpn_sub_err1_n
+
+dnl  Contributed by David Harvey.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     2.75 (most alignments, degenerates to 3 c/l for some aligments)
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   ?
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      ?
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`ep',   `%rcx')
+define(`yp',   `%r8')
+define(`n',    `%r9')
+define(`cy_param',     `8(%rsp)')
+
+define(`el',   `%rbx')
+define(`eh',   `%rbp')
+define(`t0',   `%r10')
+define(`t1',   `%r11')
+define(`t2',   `%r12')
+define(`t3',   `%r13')
+define(`w0',   `%r14')
+define(`w1',   `%r15')
+
+ifdef(`OPERATION_add_err1_n', `
+       define(ADCSBB,        adc)
+       define(func,          mpn_add_err1_n)')
+ifdef(`OPERATION_sub_err1_n', `
+       define(ADCSBB,        sbb)
+       define(func,          mpn_sub_err1_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n)
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       mov     cy_param, %rax
+
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       lea     (up,n,8), up
+       lea     (vp,n,8), vp
+       lea     (rp,n,8), rp
+
+       mov     R32(n), R32(%r10)
+       and     $3, R32(%r10)
+       jz      L(0mod4)
+       cmp     $2, R32(%r10)
+       jc      L(1mod4)
+       jz      L(2mod4)
+L(3mod4):
+       xor     R32(el), R32(el)
+       xor     R32(eh), R32(eh)
+       xor     R32(t0), R32(t0)
+       xor     R32(t1), R32(t1)
+       lea     -24(yp,n,8), yp
+       neg     n
+
+        shr     $1, %al            C restore carry
+        mov     (up,n,8), w0
+        mov     8(up,n,8), w1
+        ADCSBB  (vp,n,8), w0
+       mov     w0, (rp,n,8)
+       cmovc   16(yp), el
+        ADCSBB  8(vp,n,8), w1
+       mov     w1, 8(rp,n,8)
+       cmovc   8(yp), t0
+        mov     16(up,n,8), w0
+        ADCSBB  16(vp,n,8), w0
+       mov     w0, 16(rp,n,8)
+       cmovc   (yp), t1
+       setc    %al                C save carry
+       add     t0, el
+       adc     $0, eh
+       add     t1, el
+       adc     $0, eh
+
+       add     $3, n
+       jnz     L(loop)
+       jmp     L(end)
+
+       ALIGN(16)
+L(0mod4):
+       xor     R32(el), R32(el)
+       xor     R32(eh), R32(eh)
+       lea     (yp,n,8), yp
+       neg     n
+       jmp     L(loop)
+
+       ALIGN(16)
+L(1mod4):
+       xor     R32(el), R32(el)
+       xor     R32(eh), R32(eh)
+       lea     -8(yp,n,8), yp
+       neg     n
+
+        shr     $1, %al            C restore carry
+        mov     (up,n,8), w0
+        ADCSBB  (vp,n,8), w0
+        mov     w0, (rp,n,8)
+       cmovc   (yp), el
+       setc    %al                C save carry
+
+       add     $1, n
+       jnz     L(loop)
+       jmp     L(end)
+
+       ALIGN(16)
+L(2mod4):
+       xor     R32(el), R32(el)
+       xor     R32(eh), R32(eh)
+       xor     R32(t0), R32(t0)
+       lea     -16(yp,n,8), yp
+       neg     n
+
+        shr     $1, %al            C restore carry
+        mov     (up,n,8), w0
+        mov     8(up,n,8), w1
+        ADCSBB  (vp,n,8), w0
+        mov     w0, (rp,n,8)
+       cmovc   8(yp), el
+        ADCSBB  8(vp,n,8), w1
+        mov     w1, 8(rp,n,8)
+       cmovc   (yp), t0
+       setc    %al                C save carry
+       add     t0, el
+       adc     $0, eh
+
+       add     $2, n
+       jnz     L(loop)
+       jmp     L(end)
+
+       ALIGN(32)
+L(loop):
+        shr     $1, %al            C restore carry
+        mov     -8(yp), t0
+        mov     $0, R32(t3)
+        mov     (up,n,8), w0
+        mov     8(up,n,8), w1
+        ADCSBB  (vp,n,8), w0
+        cmovnc  t3, t0
+        ADCSBB  8(vp,n,8), w1
+        mov     -16(yp), t1
+        mov     w0, (rp,n,8)
+        mov     16(up,n,8), w0
+        mov     w1, 8(rp,n,8)
+        cmovnc  t3, t1
+        mov     -24(yp), t2
+        ADCSBB  16(vp,n,8), w0
+        cmovnc  t3, t2
+        mov     24(up,n,8), w1
+        ADCSBB  24(vp,n,8), w1
+        cmovc   -32(yp), t3
+        setc    %al                C save carry
+        add     t0, el
+        adc     $0, eh
+        add     t1, el
+        adc     $0, eh
+        add     t2, el
+        adc     $0, eh
+        mov     w0, 16(rp,n,8)
+        add     t3, el
+        lea     -32(yp), yp
+        adc     $0, eh
+        mov     w1, 24(rp,n,8)
+        add     $4, n
+        jnz     L(loop)
+
+L(end):
+       mov     el, (ep)
+       mov     eh, 8(ep)
+
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aors_err2_n.asm b/mpn/x86_64/aors_err2_n.asm

new file mode 100644 (file)

index 0000000..529b493
--- /dev/null
+++ b/mpn/x86_64/aors_err2_n.asm
@@ -0,0 +1,161 @@
+dnl  AMD64 mpn_add_err2_n, mpn_sub_err2_n
+
+dnl  Contributed by David Harvey.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     4.5
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   6.9
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      ?
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`ep',   `%rcx')
+define(`yp1',  `%r8')
+define(`yp2',   `%r9')
+define(`n_param',     `8(%rsp)')
+define(`cy_param',    `16(%rsp)')
+
+define(`cy1',   `%r14')
+define(`cy2',   `%rax')
+
+define(`n',     `%r10')
+
+define(`w',     `%rbx')
+define(`e1l',  `%rbp')
+define(`e1h',  `%r11')
+define(`e2l',  `%r12')
+define(`e2h',  `%r13')
+
+
+ifdef(`OPERATION_add_err2_n', `
+       define(ADCSBB,        adc)
+       define(func,          mpn_add_err2_n)')
+ifdef(`OPERATION_sub_err2_n', `
+       define(ADCSBB,        sbb)
+       define(func,          mpn_sub_err2_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err2_n mpn_sub_err2_n)
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       mov     cy_param, cy2
+       mov     n_param, n
+
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+
+       xor     R32(e1l), R32(e1l)
+       xor     R32(e1h), R32(e1h)
+       xor     R32(e2l), R32(e2l)
+       xor     R32(e2h), R32(e2h)
+
+       sub     yp1, yp2
+
+       lea     (rp,n,8), rp
+       lea     (up,n,8), up
+       lea     (vp,n,8), vp
+
+       test    $1, n
+       jnz     L(odd)
+
+       lea     -8(yp1,n,8), yp1
+       neg     n
+       jmp     L(top)
+
+       ALIGN(16)
+L(odd):
+       lea     -16(yp1,n,8), yp1
+       neg     n
+       shr     $1, cy2
+       mov     (up,n,8), w
+       ADCSBB  (vp,n,8), w
+       cmovc   8(yp1), e1l
+       cmovc   8(yp1,yp2), e2l
+       mov     w, (rp,n,8)
+       sbb     cy2, cy2
+       inc     n
+       jz      L(end)
+
+       ALIGN(16)
+L(top):
+        mov     (up,n,8), w
+       shr     $1, cy2         C restore carry
+       ADCSBB  (vp,n,8), w
+       mov     w, (rp,n,8)
+       sbb     cy1, cy1        C generate mask, preserve CF
+
+       mov     8(up,n,8), w
+       ADCSBB  8(vp,n,8), w
+       mov     w, 8(rp,n,8)
+       sbb     cy2, cy2        C generate mask, preserve CF
+
+       mov     (yp1), w        C (e1h:e1l) += cy1 * yp1 limb
+       and     cy1, w
+       add     w, e1l
+       adc     $0, e1h
+
+       and     (yp1,yp2), cy1  C (e2h:e2l) += cy1 * yp2 limb
+       add     cy1, e2l
+       adc     $0, e2h
+
+       mov     -8(yp1), w      C (e1h:e1l) += cy2 * next yp1 limb
+       and     cy2, w
+       add     w, e1l
+       adc     $0, e1h
+
+       mov     -8(yp1,yp2), w  C (e2h:e2l) += cy2 * next yp2 limb
+       and     cy2, w
+       add     w, e2l
+       adc     $0, e2h
+
+       add     $2, n
+       lea     -16(yp1), yp1
+       jnz     L(top)
+L(end):
+
+       mov     e1l, (ep)
+       mov     e1h, 8(ep)
+       mov     e2l, 16(ep)
+       mov     e2h, 24(ep)
+
+       and     $1, %eax        C return carry
+
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aors_err3_n.asm b/mpn/x86_64/aors_err3_n.asm

new file mode 100644 (file)

index 0000000..2ad4e74
--- /dev/null
+++ b/mpn/x86_64/aors_err3_n.asm
@@ -0,0 +1,145 @@
+dnl  AMD64 mpn_add_err3_n, mpn_sub_err3_n
+
+dnl  Contributed by David Harvey.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     7.0
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   ?
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      ?
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`ep',   `%rcx')
+define(`yp1',  `%r8')
+define(`yp2',   `%r9')
+define(`yp3_param',   `8(%rsp)')
+define(`n_param',     `16(%rsp)')
+define(`cy_param',    `24(%rsp)')
+
+define(`n',     `%r10')
+define(`yp3',   `%rcx')
+define(`t',     `%rbx')
+
+define(`e1l',  `%rbp')
+define(`e1h',  `%r11')
+define(`e2l',  `%r12')
+define(`e2h',  `%r13')
+define(`e3l',   `%r14')
+define(`e3h',   `%r15')
+
+
+
+ifdef(`OPERATION_add_err3_n', `
+       define(ADCSBB,        adc)
+       define(func,          mpn_add_err3_n)')
+ifdef(`OPERATION_sub_err3_n', `
+       define(ADCSBB,        sbb)
+       define(func,          mpn_sub_err3_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err3_n mpn_sub_err3_n)
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       mov     cy_param, %rax
+       mov     n_param, n
+
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       push    ep
+       mov     64(%rsp), yp3       C load from yp3_param
+
+       xor     R32(e1l), R32(e1l)
+       xor     R32(e1h), R32(e1h)
+       xor     R32(e2l), R32(e2l)
+       xor     R32(e2h), R32(e2h)
+       xor     R32(e3l), R32(e3l)
+       xor     R32(e3h), R32(e3h)
+
+       sub     yp1, yp2
+       sub     yp1, yp3
+
+       lea     -8(yp1,n,8), yp1
+       lea     (rp,n,8), rp
+       lea     (up,n,8), up
+       lea     (vp,n,8), vp
+       neg     n
+
+       ALIGN(16)
+L(top):
+       shr     $1, %rax                C restore carry
+       mov     (up,n,8), %rax
+       ADCSBB  (vp,n,8), %rax
+       mov     %rax, (rp,n,8)
+       sbb     %rax, %rax              C save carry and generate mask
+
+       mov     (yp1), t
+       and     %rax, t
+       add     t, e1l
+       adc     $0, e1h
+
+       mov     (yp1,yp2), t
+       and     %rax, t
+       add     t, e2l
+       adc     $0, e2h
+
+       mov     (yp1,yp3), t
+       and     %rax, t
+       add     t, e3l
+       adc     $0, e3h
+
+       lea     -8(yp1), yp1
+       inc     n
+       jnz     L(top)
+
+L(end):
+       and     $1, %eax
+       pop     ep
+
+       mov     e1l, (ep)
+       mov     e1h, 8(ep)
+       mov     e2l, 16(ep)
+       mov     e2h, 24(ep)
+       mov     e3l, 32(ep)
+       mov     e3h, 40(ep)
+
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aors_n.asm b/mpn/x86_64/aors_n.asm

index 7928f61a7e22a88482dd8a662bac1d412bc32cb1..95e18c8ed455e55b65775ea42bb5cb00f517d2b0 100644 (file)
--- a/mpn/x86_64/aors_n.asm
+++ b/mpn/x86_64/aors_n.asm
@@ -1,7 +1,7 @@
  dnl  AMD64 mpn_add_n, mpn_sub_n
  
-dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation,
-dnl  Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -24,20 +24,21 @@ C        cycles/limb
  C AMD K8,K9     1.5
  C AMD K10       1.5
  C Intel P4      ?
-C Intel core2   4.9
-C Intel corei   ?
+C Intel core2   4.9
+C Intel NHM     5.5
+C Intel SBR     1.59
  C Intel atom    4
  C VIA nano      3.25
  
-C The inner loop of this code is the result of running a code generation and
+C The loop of this code is the result of running a code generation and
  C optimization tool suite written by David Harvey and Torbjorn Granlund.
  
  C INPUT PARAMETERS
-define(`rp',   `%rdi')
-define(`up',   `%rsi')
-define(`vp',   `%rdx')
-define(`n',    `%rcx')
-define(`cy',   `%r8')          C (only for mpn_add_nc)
+define(`rp',   `%rdi') C rcx
+define(`up',   `%rsi') C rdx
+define(`vp',   `%rdx') C r8
+define(`n',    `%rcx') C r9
+define(`cy',   `%r8')  C rsp+40    (only for mpn_add_nc)
  
  ifdef(`OPERATION_add_n', `
         define(ADCSBB,        adc)
@@ -50,10 +51,15 @@ ifdef(`OPERATION_sub_n', `
  
  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
         mov     R32(n), R32(%rax)
         shr     $2, n
         and     $3, R32(%rax)
@@ -68,6 +74,7 @@ PROLOGUE(func_nc)
  EPILOGUE()
         ALIGN(16)
  PROLOGUE(func)
+       FUNC_ENTRY(4)
         mov     R32(n), R32(%rax)
         shr     $2, n
         and     $3, R32(%rax)
@@ -83,7 +90,8 @@ L(lt4):       dec     R32(%rax)
         jnz     L(2)
         ADCSBB  (vp), %r8
         mov     %r8, (rp)
-       adc     %eax, %eax
+       adc     R32(%rax), R32(%rax)
+       FUNC_EXIT()
         ret
  
  L(2):  dec     R32(%rax)
@@ -93,7 +101,8 @@ L(2):        dec     R32(%rax)
         ADCSBB  8(vp), %r9
         mov     %r8, (rp)
         mov     %r9, 8(rp)
-       adc     %eax, %eax
+       adc     R32(%rax), R32(%rax)
+       FUNC_EXIT()
         ret
  
  L(3):  mov     16(up), %r10
@@ -104,6 +113,7 @@ L(3):       mov     16(up), %r10
         mov     %r9, 8(rp)
         mov     %r10, 16(rp)
         setc    R8(%rax)
+       FUNC_EXIT()
         ret
  
         ALIGN(16)
@@ -140,6 +150,7 @@ L(end):     lea     32(up), up
         inc     R32(%rax)
         dec     R32(%rax)
         jnz     L(lt4)
-       adc     %eax, %eax
+       adc     R32(%rax), R32(%rax)
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/aorscnd_n.asm b/mpn/x86_64/aorscnd_n.asm

new file mode 100644 (file)

index 0000000..43c0171
--- /dev/null
+++ b/mpn/x86_64/aorscnd_n.asm
@@ -0,0 +1,172 @@
+dnl  AMD64 mpn_addcnd_n, mpn_subcnd_n
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     2.25
+C AMD K10       2
+C AMD bd1       3.55
+C AMD bobcat    2.5
+C Intel P4     13
+C Intel core2   2.9
+C Intel NHM     2.9
+C Intel SBR     2.4
+C Intel atom    6.5
+C VIA nano      3
+
+C NOTES
+C  * It might seem natural to use the cmov insn here, but since this function
+C    is supposed to have the exact same execution pattern for cnd true and
+C    false, and since cmov's documentation is not clear about wheather it
+C    actually reads both source operands and writes the register for a false
+C    condition, we cannot use it.
+C  * Two cases could be optimised: (1) addcnd_n could use ADCSBB-from-memory
+C    to save one insn/limb, and (2) when up=rp addcnd_n and subcnd_n could use
+C    ADCSBB-to-memory, again saving 1 insn/limb.
+C  * This runs optimally at decoder bandwidth on K10.  It has not been tuned
+C    for any other processor.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cnd',  `%r8')
+
+ifdef(`OPERATION_addcnd_n', `
+       define(ADDSUB,        add)
+       define(ADCSBB,        adc)
+       define(func,          mpn_addcnd_n)')
+ifdef(`OPERATION_subcnd_n', `
+       define(ADDSUB,        sub)
+       define(ADCSBB,        sbb)
+       define(func,          mpn_subcnd_n)')
+
+MULFUNC_PROLOGUE(mpn_addcnd_n mpn_subcnd_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+
+       neg     cnd
+       sbb     cnd, cnd                C make cnd mask
+
+       lea     (vp,n,8), vp
+       lea     (up,n,8), up
+       lea     (rp,n,8), rp
+
+       mov     R32(n), R32(%rax)
+       neg     n
+       and     $3, R32(%rax)
+       jz      L(top)                  C carry-save reg rax = 0 in this arc
+       cmp     $2, R32(%rax)
+       jc      L(b1)
+       jz      L(b2)
+
+L(b3): mov     (vp,n,8), %r12
+       mov     8(vp,n,8), %r13
+       mov     16(vp,n,8), %r14
+       mov     (up,n,8), %r10
+       mov     8(up,n,8), %rbx
+       mov     16(up,n,8), %rbp
+       and     cnd, %r12
+       and     cnd, %r13
+       and     cnd, %r14
+       ADDSUB  %r12, %r10
+       ADCSBB  %r13, %rbx
+       ADCSBB  %r14, %rbp
+       sbb     R32(%rax), R32(%rax)    C save carry
+       mov     %r10, (rp,n,8)
+       mov     %rbx, 8(rp,n,8)
+       mov     %rbp, 16(rp,n,8)
+       add     $3, n
+       js      L(top)
+       jmp     L(end)
+
+L(b2): mov     (vp,n,8), %r12
+       mov     8(vp,n,8), %r13
+       mov     (up,n,8), %r10
+       mov     8(up,n,8), %rbx
+       and     cnd, %r12
+       and     cnd, %r13
+       ADDSUB  %r12, %r10
+       ADCSBB  %r13, %rbx
+       sbb     R32(%rax), R32(%rax)    C save carry
+       mov     %r10, (rp,n,8)
+       mov     %rbx, 8(rp,n,8)
+       add     $2, n
+       js      L(top)
+       jmp     L(end)
+
+L(b1): mov     (vp,n,8), %r12
+       mov     (up,n,8), %r10
+       and     cnd, %r12
+       ADDSUB  %r12, %r10
+       sbb     R32(%rax), R32(%rax)    C save carry
+       mov     %r10, (rp,n,8)
+       add     $1, n
+       jns     L(end)
+
+       ALIGN(16)
+L(top):        mov     (vp,n,8), %r12
+       mov     8(vp,n,8), %r13
+       mov     16(vp,n,8), %r14
+       mov     24(vp,n,8), %r11
+       mov     (up,n,8), %r10
+       mov     8(up,n,8), %rbx
+       mov     16(up,n,8), %rbp
+       mov     24(up,n,8), %r9
+       and     cnd, %r12
+       and     cnd, %r13
+       and     cnd, %r14
+       and     cnd, %r11
+       add     R32(%rax), R32(%rax)    C restore carry
+       ADCSBB  %r12, %r10
+       ADCSBB  %r13, %rbx
+       ADCSBB  %r14, %rbp
+       ADCSBB  %r11, %r9
+       sbb     R32(%rax), R32(%rax)    C save carry
+       mov     %r10, (rp,n,8)
+       mov     %rbx, 8(rp,n,8)
+       mov     %rbp, 16(rp,n,8)
+       mov     %r9, 24(rp,n,8)
+       add     $4, n
+       js      L(top)
+
+L(end):        neg     R32(%rax)
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/aorsmul_1.asm b/mpn/x86_64/aorsmul_1.asm

index cbf4b4693bb748f8ea76ebdb9b42cd97075ec9e4..08ae46c7fbdbb1a5f82c8e4d299d26a6d48757f9 100644 (file)
--- a/mpn/x86_64/aorsmul_1.asm
+++ b/mpn/x86_64/aorsmul_1.asm
@@ -1,6 +1,7 @@
  dnl  AMD64 mpn_addmul_1 and mpn_submul_1.
  
-dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -20,27 +21,30 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C K8,K9:        2.5
-C K10:          2.5
-C P4:          14.9
-C P6 core2:     5.09
-C P6 corei7:
-C P6 atom:     21.3
-
-C The inner loop of this code is the result of running a code generation and
+C AMD K8,K9     2.5
+C AMD K10       2.5
+C AMD bd1       5.0
+C AMD bobcat    6.17
+C Intel P4     14.9
+C Intel core2   5.09
+C Intel NHM     4.9
+C Intel SBR     4.0
+C Intel atom   21.3
+C VIA nano      5.0
+
+C The loop of this code is the result of running a code generation and
  C optimization tool suite written by David Harvey and Torbjorn Granlund.
  
-C TODO:
-C  * The inner loop is great, but the prologue and epilogue code was
-C    quickly written.  Tune it!
+C TODO
+C  * The loop is great, but the prologue and epilogue code was quickly written.
+C    Tune it!
  
-C INPUT PARAMETERS
-define(`rp',    `%rdi')
-define(`up',    `%rsi')
-define(`n_param',`%rdx')
-define(`vl',    `%rcx')
+define(`rp',      `%rdi')   C rcx
+define(`up',      `%rsi')   C rdx
+define(`n_param', `%rdx')   C r8
+define(`vl',      `%rcx')   C r9
  
-define(`n',    `%r11')
+define(`n',       `%r11')
  
  ifdef(`OPERATION_addmul_1',`
        define(`ADDSUB',        `add')
@@ -51,17 +55,33 @@ ifdef(`OPERATION_submul_1',`
        define(`func',  `mpn_submul_1')
  ')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
  
+IFDOS(`        define(`up', ``%rsi'')  ') dnl
+IFDOS(`        define(`rp', ``%rcx'')  ') dnl
+IFDOS(`        define(`vl', ``%r9'')   ') dnl
+IFDOS(`        define(`r9', ``rdi'')   ') dnl
+IFDOS(`        define(`n',  ``%r8'')   ') dnl
+IFDOS(`        define(`r8', ``r11'')   ') dnl
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(func)
+
+IFDOS(``push   %rsi            '')
+IFDOS(``push   %rdi            '')
+IFDOS(``mov    %rdx, %rsi      '')
+
         mov     (up), %rax              C read first u limb early
         push    %rbx
-       mov     n_param, %rbx           C move away n from rdx, mul uses it
+IFSTD(`        mov     n_param, %rbx   ')      C move away n from rdx, mul uses it
+IFDOS(`        mov     n, %rbx         ')
         mul     vl
-       mov     %rbx, %r11
+IFSTD(`        mov     %rbx, n         ')
  
         and     $3, R32(%rbx)
         jz      L(b0)
@@ -113,7 +133,7 @@ L(top):     ADDSUB  %r10, (rp,n,8)
         adc     %rax, %r9
         mov     (up,n,8), %rax
         adc     %rdx, %r8
-       mov     $0, %r10d
+       mov     $0, R32(%r10)
  L(L1): mul     vl
         ADDSUB  %r9, 8(rp,n,8)
         adc     %rax, %r8
@@ -126,11 +146,11 @@ L(L0):    mov     8(up,n,8), %rax
  L(L3): mov     16(up,n,8), %rax
         mul     vl
         ADDSUB  %rbx, 24(rp,n,8)
-       mov     $0, %r8d                # zero
-       mov     %r8, %rbx               # zero
+       mov     $0, R32(%r8)            C zero
+       mov     %r8, %rbx               C zero
         adc     %rax, %r10
         mov     24(up,n,8), %rax
-       mov     %r8, %r9                # zero
+       mov     %r8, %r9                C zero
         adc     %rdx, %r9
  L(L2): mul     vl
         add     $4, n
@@ -144,5 +164,7 @@ L(ret):     adc     $0, %rdx
         mov     %rdx, %rax
  
         pop     %rbx
+IFDOS(``pop    %rdi            '')
+IFDOS(``pop    %rsi            '')
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/atom/aorrlsh1_n.asm b/mpn/x86_64/atom/aorrlsh1_n.asm

new file mode 100644 (file)

index 0000000..5eee990
--- /dev/null
+++ b/mpn/x86_64/atom/aorrlsh1_n.asm
@@ -0,0 +1,226 @@
+dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+dnl  Optimised for Intel Atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C  * This code is slightly large at 433 bytes.
+C  * sublsh1_n.asm and this file use the same basic pattern.
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   ?
+C Intel NHM     ?
+C Intel SBR     ?
+C Intel atom    4.875  (4.75 is probably possible)
+C VIA nano      ?
+
+C INPUT PARAMETERS
+define(`rp',       `%rdi')
+define(`up',       `%rsi')
+define(`vp',       `%rdx')
+define(`n',        `%rcx')
+define(`cy',       `%r8')
+
+ifdef(`OPERATION_addlsh1_n', `
+  define(ADDSUB,       add)
+  define(ADCSBB,       adc)
+  define(func_n,       mpn_addlsh1_n)
+  define(func_nc,      mpn_addlsh1_nc)')
+ifdef(`OPERATION_rsblsh1_n', `
+  define(ADDSUB,       sub)
+  define(ADCSBB,       sbb)
+  define(func_n,       mpn_rsblsh1_n)
+  define(func_nc,      mpn_rsblsh1_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func_n)
+       FUNC_ENTRY(4)
+       push    %rbp
+       xor     R32(%rbp), R32(%rbp)
+L(ent):        mov     R32(n), R32(%rax)
+       and     $3, R32(%rax)
+       jz      L(b0)
+       cmp     $2, R32(%rax)
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): mov     (vp), %r8
+       add     %r8, %r8
+       lea     8(vp), vp
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+       ADCSBB  (up), %r8
+       mov     %r8, (rp)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       lea     8(up), up
+       lea     8(rp), rp
+       jmp     L(b0)
+
+L(b2): mov     (vp), %r8
+       add     %r8, %r8
+       mov     8(vp), %r9
+       adc     %r9, %r9
+       lea     16(vp), vp
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+       ADCSBB  (up), %r8
+       mov     %r8, (rp)
+       ADCSBB  8(up), %r9
+       mov     %r9, 8(rp)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       lea     16(up), up
+       lea     16(rp), rp
+       jmp     L(b0)
+
+L(b3): mov     (vp), %r8
+       add     %r8, %r8
+       mov     8(vp), %r9
+       adc     %r9, %r9
+       mov     16(vp), %r10
+       adc     %r10, %r10
+       lea     24(vp), vp
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+       ADCSBB  (up), %r8
+       mov     %r8, (rp)
+       ADCSBB  8(up), %r9
+       mov     %r9, 8(rp)
+       ADCSBB  16(up), %r10
+       mov     %r10, 16(rp)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       lea     24(up), up
+       lea     24(rp), rp
+
+L(b0): test    $4, R8(n)
+       jz      L(skp)
+       add     R32(%rax), R32(%rax)    C restore scy
+       mov     (vp), %r8
+       adc     %r8, %r8
+       mov     8(vp), %r9
+       adc     %r9, %r9
+       mov     16(vp), %r10
+       adc     %r10, %r10
+       mov     24(vp), %r11
+       adc     %r11, %r11
+       lea     32(vp), vp
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+       ADCSBB  (up), %r8
+       mov     %r8, (rp)
+       ADCSBB  8(up), %r9
+       mov     %r9, 8(rp)
+       ADCSBB  16(up), %r10
+       mov     %r10, 16(rp)
+       ADCSBB  24(up), %r11
+       mov     %r11, 24(rp)
+       lea     32(up), up
+       lea     32(rp), rp
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+
+L(skp):        cmp     $8, n
+       jl      L(rtn)
+
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %rbx
+       lea     -64(rp), rp
+       jmp     L(x)
+
+       ALIGN(16)
+L(top):        add     R32(%rax), R32(%rax)    C restore scy
+       lea     64(rp), rp
+       mov     (vp), %r8
+       adc     %r8, %r8
+       mov     8(vp), %r9
+       adc     %r9, %r9
+       mov     16(vp), %r10
+       adc     %r10, %r10
+       mov     24(vp), %r11
+       adc     %r11, %r11
+       mov     32(vp), %r12
+       adc     %r12, %r12
+       mov     40(vp), %r13
+       adc     %r13, %r13
+       mov     48(vp), %r14
+       adc     %r14, %r14
+       mov     56(vp), %rbx
+       adc     %rbx, %rbx
+       lea     64(vp), vp
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+       ADCSBB  (up), %r8
+       mov     %r8, (rp)
+       ADCSBB  8(up), %r9
+       mov     %r9, 8(rp)
+       ADCSBB  16(up), %r10
+       mov     %r10, 16(rp)
+       ADCSBB  24(up), %r11
+       mov     %r11, 24(rp)
+       ADCSBB  32(up), %r12
+       mov     %r12, 32(rp)
+       ADCSBB  40(up), %r13
+       mov     %r13, 40(rp)
+       ADCSBB  48(up), %r14
+       mov     %r14, 48(rp)
+       ADCSBB  56(up), %rbx
+       mov     %rbx, 56(rp)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       lea     64(up), up
+L(x):  sub     $8, n
+       jge     L(top)
+
+L(end):        pop     %rbx
+       pop     %r14
+       pop     %r13
+       pop     %r12
+L(rtn):
+ifdef(`OPERATION_addlsh1_n',`
+       add     R32(%rbp), R32(%rax)
+       neg     R32(%rax)')
+ifdef(`OPERATION_rsblsh1_n',`
+       sub     R32(%rax), R32(%rbp)
+       movslq  R32(%rbp), %rax')
+
+       pop     %rbp
+       FUNC_EXIT()
+       ret
+EPILOGUE()
+PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       push    %rbp
+       neg     %r8                     C set CF
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       jmp     L(ent)
+EPILOGUE()
diff --git a/mpn/x86_64/atom/aorrlsh2_n.asm b/mpn/x86_64/atom/aorrlsh2_n.asm

new file mode 100644 (file)

index 0000000..091ffb8
--- /dev/null
+++ b/mpn/x86_64/atom/aorrlsh2_n.asm
@@ -0,0 +1,180 @@
+dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
+dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
+dnl  Optimised for Intel Atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   ?
+C Intel NHM     ?
+C Intel SBR     ?
+C Intel atom    5.75
+C VIA nano      ?
+
+C INPUT PARAMETERS
+define(`rp',       `%rdi')
+define(`up',       `%rsi')
+define(`vp',       `%rdx')
+define(`n',        `%rcx')
+
+define(`LSH', 2)
+define(`RSH', 62)
+define(M, eval(m4_lshift(1,LSH)))
+
+ifdef(`OPERATION_addlsh2_n', `
+  define(ADDSUB,       add)
+  define(ADCSBB,       adc)
+  define(func_n,       mpn_addlsh2_n)
+  define(func_nc,      mpn_addlsh2_nc)')
+ifdef(`OPERATION_rsblsh2_n', `
+  define(ADDSUB,       sub)
+  define(ADCSBB,       sbb)
+  define(func_n,       mpn_rsblsh2_n)
+  define(func_nc,      mpn_rsblsh2_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func_n)
+       FUNC_ENTRY(4)
+       push    %rbx
+       push    %rbp
+
+       mov     R32(n), R32(%rax)
+       and     $3, R32(%rax)
+       jz      L(b0)                   C we rely on rax = 0 at target
+       cmp     $2, R32(%rax)
+       mov     $0, R32(%rax)
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): mov     (vp), %r9
+       lea     (%rax,%r9,M), %rbp
+       shr     $RSH, %r9
+       sub     $1, n
+       lea     -8(up), up
+       lea     -8(rp), rp
+       jz      L(cj1)
+       mov     8(vp), %r10
+       lea     (%r9,%r10,M), %r9
+       shr     $RSH, %r10
+       mov     16(vp), %r11
+       lea     24(vp), vp
+       mov     (vp), %r8
+       lea     (%r10,%r11,M), %r10
+       shr     $RSH, %r11
+       add     R32(%rax), R32(%rax)
+       jmp     L(L1)
+
+L(b2): lea     -32(rp), rp
+       mov     (vp), %r8
+       lea     -32(up), up
+       lea     (%rax,%r8,M), %rbx
+       shr     $RSH, %r8
+       mov     8(vp), %r9
+       sub     $2, n
+       jle     L(end)
+       jmp     L(top)
+
+L(b3): lea     -24(up), up
+       mov     (vp), %r11
+       lea     -24(rp), rp
+       mov     8(vp), %r8
+       lea     (%rax,%r11,M), %r10
+       shr     $RSH, %r11
+       lea     8(vp), vp
+       lea     (%r11,%r8,M), %rbx
+       add     $1, n
+       jmp     L(L3)
+
+L(b0): lea     -16(up), up
+       mov     (vp), %r10
+       lea     (%rax,%r10,M), %r9
+       shr     $RSH, %r10
+       mov     8(vp), %r11
+       lea     -16(rp), rp
+       mov     16(vp), %r8
+       lea     (%r10,%r11,M), %r10
+       shr     $RSH, %r11
+       add     R32(%rax), R32(%rax)
+       lea     16(vp), vp
+       jmp     L(L0)
+
+       ALIGN(16)
+L(top):        lea     (%r8,%r9,M), %rbp
+       shr     $RSH, %r9
+       lea     32(up), up
+       mov     16(vp), %r10
+       lea     (%r9,%r10,M), %r9
+       shr     $RSH, %r10
+       mov     24(vp), %r11
+       lea     32(rp), rp
+       lea     32(vp), vp
+       mov     (vp), %r8
+       lea     (%r10,%r11,M), %r10
+       shr     $RSH, %r11
+       add     R32(%rax), R32(%rax)
+       ADCSBB  (up), %rbx
+       mov     %rbx, (rp)
+L(L1): ADCSBB  8(up), %rbp
+       mov     %rbp, 8(rp)
+L(L0): ADCSBB  16(up), %r9
+       lea     (%r11,%r8,M), %rbx
+       mov     %r9, 16(rp)
+L(L3): ADCSBB  24(up), %r10
+       sbb     R32(%rax), R32(%rax)
+L(L2): shr     $RSH, %r8
+       mov     8(vp), %r9
+       mov     %r10, 24(rp)
+       sub     $4, n
+       jg      L(top)
+
+L(end):        lea     (%r8,%r9,M), %rbp
+       shr     $RSH, %r9
+       lea     32(up), up
+       lea     32(rp), rp
+       add     R32(%rax), R32(%rax)
+       ADCSBB  (up), %rbx
+       mov     %rbx, (rp)
+L(cj1):        ADCSBB  8(up), %rbp
+       mov     %rbp, 8(rp)
+
+ifdef(`OPERATION_addlsh2_n',`
+       mov     R32(n), R32(%rax)       C zero rax
+       adc     %r9, %rax')
+ifdef(`OPERATION_rsblsh2_n',`
+       sbb     n, %r9                  C subtract 0
+       mov     %r9, %rax')
+
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/atom/aors_n.asm b/mpn/x86_64/atom/aors_n.asm

index 6319ae8a5d851b9c996c07c7ac1ce6584e62ae5a..f13f97632b0d439c5f9570a76c6607bab97aa748 100644 (file)
--- a/mpn/x86_64/atom/aors_n.asm
+++ b/mpn/x86_64/atom/aors_n.asm
@@ -1,6 +1,7 @@
  dnl  X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Atom.
  
-dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,127 +20,8 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-
-C           cycles/limb
-C K8,K9:        1.85
-C K10:          ?
-C P4:           ?
-C P6-15 (Core2): ?
-C P6-28 (Atom):         3
-
-C INPUT PARAMETERS
-define(`rp',   `%rdi')
-define(`up',   `%rsi')
-define(`vp',   `%rdx')
-define(`n',    `%rcx')
-define(`cy',   `%r8')          C (only for mpn_add_nc)
-
-ifdef(`OPERATION_add_n', `
-       define(ADCSBB,        adc)
-       define(func,          mpn_add_n)
-       define(func_nc,       mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
-       define(ADCSBB,        sbb)
-       define(func,          mpn_sub_n)
-       define(func_nc,       mpn_sub_nc)')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ASM_START()
-       TEXT
-       ALIGN(16)
-PROLOGUE(func_nc)
-       jmp     L(ent)
-EPILOGUE()
-PROLOGUE(func)
-       xor     %r8, %r8
-L(ent):
-       mov     R32(%rcx), R32(%rax)
-       shr     $2, %rcx
-       and     $3, R32(%rax)
-       jz      L(b0)
-       cmp     $2, R32(%rax)
-       jz      L(b2)
-       jg      L(b3)
-
-L(b1): mov     (%rsi), %r10
-       test    %rcx, %rcx
-       jnz     L(gt1)
-       shr     R32(%r8)                        C Set CF from argument
-       ADCSBB  (%rdx), %r10
-       mov     %r10, (%rdi)
-       mov     R32(%rcx), R32(%rax)            C zero rax
-       adc     R32(%rax), R32(%rax)
-       ret
-L(gt1):        shr     R32(%r8)
-       ADCSBB  (%rdx), %r10
-       mov     8(%rsi), %r11
-       lea     16(%rsi), %rsi
-       lea     -16(%rdx), %rdx
-       lea     -16(%rdi), %rdi
-       jmp     L(m1)
-
-L(b2): mov     (%rsi), %r9
-       mov     8(%rsi), %r10
-       lea     -8(%rdx), %rdx
-       test    %rcx, %rcx
-       jnz     L(gt2)
-       shr     R32(%r8)
-       lea     -40(%rdi), %rdi
-       jmp     L(e2)
-L(gt2):        shr     R32(%r8)
-       ADCSBB  8(%rdx), %r9
-       mov     16(%rsi), %r11
-       lea     -8(%rsi), %rsi
-       lea     -8(%rdi), %rdi
-       jmp     L(m2)
-
-L(b3): mov     (%rsi), %rax
-       mov     8(%rsi), %r9
-       mov     16(%rsi), %r10
-       test    %rcx, %rcx
-       jnz     L(gt3)
-       shr     R32(%r8)
-       lea     -32(%rdi), %rdi
-       jmp     L(e3)
-L(gt3):        shr     R32(%r8)
-       ADCSBB  (%rdx), %rax
-       jmp     L(m3)
-
-L(b0): mov     (%rsi), %r11
-       neg     R32(%r8)
-       lea     -24(%rdx), %rdx
-       lea     -24(%rdi), %rdi
-       lea     8(%rsi), %rsi
-       jmp     L(m0)
-
-       ALIGN(8)
-L(top):        mov     %r11, 24(%rdi)
-       ADCSBB  (%rdx), %rax
-       lea     32(%rdi), %rdi
-L(m3): mov     %rax, (%rdi)
-       ADCSBB  8(%rdx), %r9
-       mov     24(%rsi), %r11
-L(m2): mov     %r9, 8(%rdi)
-       ADCSBB  16(%rdx), %r10
-       lea     32(%rsi), %rsi
-L(m1): mov     %r10, 16(%rdi)
-L(m0): ADCSBB  24(%rdx), %r11
-       mov     (%rsi), %rax
-       mov     8(%rsi), %r9
-       lea     32(%rdx), %rdx
-       dec     %rcx
-       mov     16(%rsi), %r10
-       jnz     L(top)
-
-       mov     %r11, 24(%rdi)
-L(e3): ADCSBB  (%rdx), %rax
-       mov     %rax, 32(%rdi)
-L(e2): ADCSBB  8(%rdx), %r9
-       mov     %r9, 40(%rdi)
-L(e1): ADCSBB  16(%rdx), %r10
-       mov     %r10, 48(%rdi)
-       mov     R32(%rcx), R32(%rax)            C zero rax
-       adc     R32(%rax), R32(%rax)
-       ret
-EPILOGUE()
+include_mpn(`x86_64/coreisbr/aors_n.asm')
diff --git a/mpn/x86_64/atom/copyd.asm b/mpn/x86_64/atom/copyd.asm

new file mode 100644 (file)

index 0000000..6c6e9db
--- /dev/null
+++ b/mpn/x86_64/atom/copyd.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_copyd optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyd)
+include_mpn(`x86_64/fastsse/copyd-palignr.asm')
diff --git a/mpn/x86_64/atom/copyi.asm b/mpn/x86_64/atom/copyi.asm

new file mode 100644 (file)

index 0000000..4714449
--- /dev/null
+++ b/mpn/x86_64/atom/copyi.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_copyi optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyi)
+include_mpn(`x86_64/fastsse/copyi-palignr.asm')
diff --git a/mpn/x86_64/atom/dive_1.asm b/mpn/x86_64/atom/dive_1.asm

new file mode 100644 (file)

index 0000000..543d1a2
--- /dev/null
+++ b/mpn/x86_64/atom/dive_1.asm
@@ -0,0 +1,26 @@
+dnl  AMD64 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_divexact_1)
+include_mpn(`x86_64/nano/dive_1.asm')
diff --git a/mpn/x86_64/atom/gmp-mparam.h b/mpn/x86_64/atom/gmp-mparam.h

index a124f3c5f0e4f5967177072cf8fe6e0567f02584..bc19a095c70d2c493c38a6b44c7e93944624937b 100644 (file)
--- a/mpn/x86_64/atom/gmp-mparam.h
+++ b/mpn/x86_64/atom/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* Intel Atom/64 gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,16 +21,25 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define GMP_LIMB_BITS 64
  #define BYTES_PER_MP_LIMB 8
  
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
+/* These routines exists for all x86_64 chips, but they are slower on Atom
+   than separate add/sub and shift.  Make sure they are not really used.  */
+#undef HAVE_NATIVE_mpn_rsh1add_n
+#undef HAVE_NATIVE_mpn_rsh1sub_n
+
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         37
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     69
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           32
+#define BMOD_1_TO_MOD_1_THRESHOLD           15
  
  #define MUL_TOOM22_THRESHOLD                10
  #define MUL_TOOM33_THRESHOLD                66
@@ -39,149 +48,120 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define MUL_TOOM8H_THRESHOLD               236
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      76
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      66
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     131
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      78
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      88
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
  #define SQR_TOOM2_THRESHOLD                 16
  #define SQR_TOOM3_THRESHOLD                 65
  #define SQR_TOOM4_THRESHOLD                166
-#define SQR_TOOM6_THRESHOLD                226
+#define SQR_TOOM6_THRESHOLD                222
  #define SQR_TOOM8_THRESHOLD                333
  
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD                9
+#define MULMID_TOOM42_THRESHOLD             14
+
+#define MULMOD_BNM1_THRESHOLD                7
+#define SQRMOD_BNM1_THRESHOLD               10
  
-#define MUL_FFT_MODF_THRESHOLD             208  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             212  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    208, 5}, {      7, 4}, {     15, 5}, {     11, 6}, \
-    {      6, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {      9, 6}, \
-    {     19, 7}, {     13, 8}, {      7, 7}, {     16, 8}, \
-    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
-    {     13, 9}, {      7, 8}, {     15, 7}, {     31, 8}, \
-    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
-    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
-    {     23,10}, {     15, 9}, {     39,10}, {     23, 9}, \
-    {     47,11}, {     15,10}, {     31, 9}, {     63, 8}, \
-    {    127, 9}, {     67,10}, {     39, 9}, {     79, 8}, \
-    {    159,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255, 7}, {    511,10}, {     71, 9}, \
-    {    143, 8}, {    287, 7}, {    575,10}, {     79, 9}, \
-    {    159, 8}, {    319,11}, {     47, 9}, {    191,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    143, 9}, {    287, 8}, {    575,11}, \
-    {     79,10}, {    159, 9}, {    319,10}, {    175, 9}, \
-    {    351, 8}, {    703, 7}, {   1407,10}, {    191, 9}, \
-    {    415,11}, {    111,10}, {    223, 9}, {    447,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
-    {    143,10}, {    287, 9}, {    575, 8}, {   1151,10}, \
-    {    319,11}, {    175,10}, {    351, 9}, {    703, 8}, \
-    {   1407,11}, {    191,10}, {    383, 9}, {    767,10}, \
-    {    415,11}, {    223,10}, {    447, 9}, {    895,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    287,10}, {    575, 9}, {   1151,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
-    {   1407,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    447,10}, {    895,11}, \
-    {    479,13}, {    127,12}, {    255,11}, {    511,12}, \
-    {    287,11}, {    575,10}, {   1151,12}, {    319,11}, \
-    {    639,12}, {    351,11}, {    703,10}, {   1407,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,10}, {   1663,12}, {    447,11}, {    895,14}, \
-    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    575,11}, {   1151,13}, {    319,12}, {    703,11}, \
-    {   1407,13}, {    383,12}, {    831,13}, {    447,12}, \
-    {    895,14}, {    255,13}, {    511,12}, {   1023,13}, \
-    {    575,12}, {   1151,13}, {    703,12}, {   1407,14}, \
-    {    383,13}, {    831,12}, {   1663,13}, {    895,15}, \
-    {    255,14}, {    511,13}, {   1023,12}, {   2175,13}, \
-    {   1151,14}, {    639,13}, {   1407,12}, {   2815,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1919,15}, \
-    {    511,14}, {   1023,13}, {   2047,14}, {   1151,13}, \
-    {   2431,14}, {   1407,13}, {   2815,15}, {    767,14}, \
-    {   1663,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+  { {    220, 5}, {      7, 4}, {     15, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
+    {     15, 7}, {      8, 6}, {     17, 7}, {     13, 8}, \
+    {      7, 7}, {     17, 8}, {      9, 7}, {     19, 8}, \
+    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
+    {     15, 7}, {     31, 8}, {     19, 9}, {     11, 8}, \
+    {     25,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
+    {     19, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     79, 8}, \
+    {    159,10}, {     47, 9}, {     95, 8}, {    191, 7}, \
+    {    383,10}, {     55,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255,10}, {     71, 9}, {    143,10}, \
+    {     79, 9}, {    159,11}, {     47,10}, {     95, 9}, \
+    {    191, 8}, {    383,12}, {     31,11}, {     63, 9}, \
+    {    255,10}, {    143, 9}, {    287,11}, {     79,10}, \
+    {    159, 9}, {    319,10}, {    175, 9}, {    351, 8}, \
+    {    703,11}, {     95,10}, {    191, 9}, {    383,10}, \
+    {    207, 9}, {    415,11}, {    111,10}, {    223, 9}, \
+    {    447,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,11}, {    143, 9}, {    575,10}, {    319,11}, \
+    {    175,10}, {    351, 9}, {    703,11}, {    191,10}, \
+    {    383,11}, {    223,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
      { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
      {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 193
-#define MUL_FFT_THRESHOLD                 1728
+#define MUL_FFT_TABLE3_SIZE 101
+#define MUL_FFT_THRESHOLD                 2112
  
-#define SQR_FFT_MODF_THRESHOLD             208  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             184  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    208, 5}, {      7, 4}, {     15, 5}, {     11, 6}, \
-    {      6, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {      8, 6}, \
-    {     17, 7}, {     17, 8}, {      9, 7}, {     19, 8}, \
-    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
-    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
-    {     15, 8}, {     31, 9}, {     19, 8}, {     39, 9}, \
-    {     23,10}, {     15, 9}, {     39,10}, {     23, 9}, \
-    {     47,11}, {     15,10}, {     31, 9}, {     67,10}, \
-    {     39, 9}, {     79, 8}, {    159,10}, {     47,11}, \
-    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
-    {     71, 9}, {    143, 8}, {    287, 7}, {    575, 9}, \
-    {    159, 8}, {    319,11}, {     47, 9}, {    191,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    143, 9}, {    287, 8}, {    575,10}, \
-    {    159, 9}, {    319, 8}, {    639, 9}, {    351, 8}, \
-    {    703,10}, {    191, 9}, {    383,10}, {    207, 9}, \
-    {    415,11}, {    111,10}, {    223,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,11}, {    143,10}, \
-    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
-    {    639,11}, {    175,10}, {    351, 9}, {    703,11}, \
-    {    191,10}, {    383,11}, {    207,10}, {    415,11}, \
-    {    223,10}, {    447,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    287,10}, {    575,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    447,13}, {    127,12}, \
-    {    255,11}, {    543,12}, {    287,11}, {    575,12}, \
-    {    319,11}, {    639,12}, {    351,13}, {    191,12}, \
-    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
-    {    479,13}, {    255,10}, {   2047,12}, {    575,13}, \
-    {    319,11}, {   1279,12}, {    703,13}, {    383,12}, \
-    {    831,13}, {    447,12}, {    895,14}, {    255,13}, \
-    {    511,12}, {   1023,13}, {    575,12}, {   1151,13}, \
-    {    703,14}, {    383,13}, {    831,12}, {   1663,13}, \
-    {    895,15}, {    255,14}, {    511,13}, {   1151,14}, \
-    {    639,13}, {   1407,12}, {   2815,14}, {    767,13}, \
-    {   1663,14}, {    895,13}, {   1791,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 160
+  { {    188, 5}, {      6, 4}, {     13, 5}, {      7, 4}, \
+    {     15, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
+    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
+    {     15, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
+    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
+    {     13, 9}, {      7, 8}, {     19, 9}, {     11, 8}, \
+    {     25,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
+    {     19, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
+    {     47, 7}, {    383, 9}, {    103,11}, {     31,10}, \
+    {     63, 9}, {    127, 8}, {    255, 7}, {    511, 8}, \
+    {    287, 7}, {    575,10}, {     79, 9}, {    159, 8}, \
+    {    319,11}, {     47, 9}, {    191,12}, {     31,11}, \
+    {     63,10}, {    127, 9}, {    255,10}, {    143, 7}, \
+    {   1151, 9}, {    351,11}, {     95,10}, {    191,11}, \
+    {    111,10}, {    223,12}, {     63, 9}, {    511,11}, \
+    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
+    {    319, 9}, {    639,11}, {    175,10}, {    351, 9}, \
+    {    703,12}, {     95,11}, {    191,10}, {    383,11}, \
+    {    207,10}, {    415,11}, {    223,10}, {    447,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 95
  #define SQR_FFT_THRESHOLD                 1600
  
-#define MULLO_BASECASE_THRESHOLD             0
-#define MULLO_DC_THRESHOLD                  22
-#define MULLO_MUL_N_THRESHOLD             3176
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  33
+#define MULLO_MUL_N_THRESHOLD             4141
  
-#define DC_DIV_QR_THRESHOLD                 26
-#define DC_DIVAPPR_Q_THRESHOLD              93
+#define DC_DIV_QR_THRESHOLD                 27
+#define DC_DIVAPPR_Q_THRESHOLD              94
  #define DC_BDIV_QR_THRESHOLD                27
-#define DC_BDIV_Q_THRESHOLD                 62
+#define DC_BDIV_Q_THRESHOLD                 54
  
  #define INV_MULMOD_BNM1_THRESHOLD           18
  #define INV_NEWTON_THRESHOLD               131
-#define INV_APPR_THRESHOLD                 110
+#define INV_APPR_THRESHOLD                 106
  
  #define BINV_NEWTON_THRESHOLD              165
-#define REDC_1_TO_REDC_2_THRESHOLD          12
+#define REDC_1_TO_REDC_2_THRESHOLD          14
  #define REDC_2_TO_REDC_N_THRESHOLD          36
  
  #define MU_DIV_QR_THRESHOLD                792
  #define MU_DIVAPPR_Q_THRESHOLD             807
  #define MUPI_DIV_QR_THRESHOLD               67
  #define MU_BDIV_QR_THRESHOLD               654
-#define MU_BDIV_Q_THRESHOLD                792
+#define MU_BDIV_Q_THRESHOLD                748
  
-#define MATRIX22_STRASSEN_THRESHOLD         13
-#define HGCD_THRESHOLD                      83
-#define GCD_DC_THRESHOLD                   198
-#define GCDEXT_DC_THRESHOLD                198
+#define POWM_SEC_TABLE  4,32,204,724,1926
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      84
+#define HGCD_APPR_THRESHOLD                 87
+#define HGCD_REDUCE_THRESHOLD             1182
+#define GCD_DC_THRESHOLD                   195
+#define GCDEXT_DC_THRESHOLD                180
  #define JACOBI_BASE_METHOD                   2
  
-#define GET_STR_DC_THRESHOLD                15
-#define GET_STR_PRECOMPUTE_THRESHOLD        27
-#define SET_STR_DC_THRESHOLD               254
-#define SET_STR_PRECOMPUTE_THRESHOLD      1122
+#define GET_STR_DC_THRESHOLD                18
+#define GET_STR_PRECOMPUTE_THRESHOLD        32
+#define SET_STR_DC_THRESHOLD               256
+#define SET_STR_PRECOMPUTE_THRESHOLD      1151
+
+#define FAC_DSC_THRESHOLD                 1065
+#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/mpn/x86_64/atom/lshift.asm b/mpn/x86_64/atom/lshift.asm

new file mode 100644 (file)

index 0000000..f62d6ce
--- /dev/null
+++ b/mpn/x86_64/atom/lshift.asm
@@ -0,0 +1,112 @@
+dnl  AMD64 mpn_lshift -- mpn left shift, optimised for Atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   ?
+C Intel NHM     ?
+C Intel SBR     ?
+C Intel atom    4.5
+C VIA nano      ?
+
+C TODO
+C  * Consider using 4-way unrolling.  We reach 4 c/l, but the code is 2.5 times
+C    larger.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`cnt',  `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_lshift)
+       FUNC_ENTRY(4)
+       lea     -8(up,n,8), up
+       lea     -8(rp,n,8), rp
+       shr     R32(n)
+       mov     (up), %rax
+       jnc     L(evn)
+
+       mov     %rax, %r11
+       shl     R8(%rcx), %r11
+       neg     R8(%rcx)
+       shr     R8(%rcx), %rax
+       test    n, n
+       jnz     L(gt1)
+       mov     %r11, (rp)
+       FUNC_EXIT()
+       ret
+
+L(gt1):        mov     -8(up), %r8
+       mov     %r8, %r10
+       shr     R8(%rcx), %r8
+       jmp     L(lo1)
+
+L(evn):        mov     %rax, %r10
+       neg     R8(%rcx)
+       shr     R8(%rcx), %rax
+       mov     -8(up), %r9
+       mov     %r9, %r11
+       shr     R8(%rcx), %r9
+       neg     R8(%rcx)
+       dec     n
+       lea     8(rp), rp
+       lea     -8(up), up
+       jz      L(end)
+
+       ALIGN(8)
+L(top):        shl     R8(%rcx), %r10
+       or      %r10, %r9
+       shl     R8(%rcx), %r11
+       neg     R8(%rcx)
+       mov     -8(up), %r8
+       mov     %r8, %r10
+       mov     %r9, -8(rp)
+       shr     R8(%rcx), %r8
+       lea     -16(rp), rp
+L(lo1):        mov     -16(up), %r9
+       or      %r11, %r8
+       mov     %r9, %r11
+       shr     R8(%rcx), %r9
+       lea     -16(up), up
+       neg     R8(%rcx)
+       mov     %r8, (rp)
+       dec     n
+       jg      L(top)
+
+L(end):        shl     R8(%rcx), %r10
+       or      %r10, %r9
+       shl     R8(%rcx), %r11
+       mov     %r9, -8(rp)
+       mov     %r11, -16(rp)
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/atom/lshiftc.asm b/mpn/x86_64/atom/lshiftc.asm

new file mode 100644 (file)

index 0000000..f566835
--- /dev/null
+++ b/mpn/x86_64/atom/lshiftc.asm
@@ -0,0 +1,116 @@
+dnl  AMD64 mpn_lshiftc -- mpn left shift with complement, optimised for Atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   ?
+C Intel NHM     ?
+C Intel SBR     ?
+C Intel atom    5
+C VIA nano      ?
+
+C TODO
+C  * Consider using 4-way unrolling.  We reach 4.5 c/l, but the code is 2.5
+C    times larger.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`cnt',  `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_lshiftc)
+       FUNC_ENTRY(4)
+       lea     -8(up,n,8), up
+       lea     -8(rp,n,8), rp
+       shr     R32(n)
+       mov     (up), %rax
+       jnc     L(evn)
+
+       mov     %rax, %r11
+       shl     R8(%rcx), %r11
+       neg     R8(%rcx)
+       shr     R8(%rcx), %rax
+       test    n, n
+       jnz     L(gt1)
+       not     %r11
+       mov     %r11, (rp)
+       FUNC_EXIT()
+       ret
+
+L(gt1):        mov     -8(up), %r8
+       mov     %r8, %r10
+       shr     R8(%rcx), %r8
+       jmp     L(lo1)
+
+L(evn):        mov     %rax, %r10
+       neg     R8(%rcx)
+       shr     R8(%rcx), %rax
+       mov     -8(up), %r9
+       mov     %r9, %r11
+       shr     R8(%rcx), %r9
+       neg     R8(%rcx)
+       lea     8(rp), rp
+       lea     -8(up), up
+       jmp     L(lo0)
+
+C      ALIGN(16)
+L(top):        shl     R8(%rcx), %r10
+       or      %r10, %r9
+       shl     R8(%rcx), %r11
+       not     %r9
+       neg     R8(%rcx)
+       mov     -8(up), %r8
+       lea     -16(rp), rp
+       mov     %r8, %r10
+       shr     R8(%rcx), %r8
+       mov     %r9, 8(rp)
+L(lo1):        or      %r11, %r8
+       mov     -16(up), %r9
+       mov     %r9, %r11
+       shr     R8(%rcx), %r9
+       lea     -16(up), up
+       neg     R8(%rcx)
+       not     %r8
+       mov     %r8, (rp)
+L(lo0):        dec     n
+       jg      L(top)
+
+L(end):        shl     R8(%rcx), %r10
+       or      %r10, %r9
+       not     %r9
+       shl     R8(%rcx), %r11
+       not     %r11
+       mov     %r9, -8(rp)
+       mov     %r11, -16(rp)
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/atom/popcount.asm b/mpn/x86_64/atom/popcount.asm

new file mode 100644 (file)

index 0000000..4753acf
--- /dev/null
+++ b/mpn/x86_64/atom/popcount.asm
@@ -0,0 +1,24 @@
+dnl  x86-64 mpn_popcount.
+
+dnl  Copyright 2007, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/mpn/x86_64/atom/rsh1aors_n.asm b/mpn/x86_64/atom/rsh1aors_n.asm

new file mode 100644 (file)

index 0000000..b1db9a6
--- /dev/null
+++ b/mpn/x86_64/atom/rsh1aors_n.asm
@@ -0,0 +1,276 @@
+dnl  x86-64 mpn_rsh1add_n/mpn_rsh1sub_n.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C  * Schedule loop less.  It is now almost surely overscheduled, resulting in
+C    large feed-in and wind-down code.
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   ?
+C Intel NMH     ?
+C Intel SBR     ?
+C Intel atom    5.25
+C VIA nano      ?
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n',`%rcx')
+
+ifdef(`OPERATION_rsh1add_n', `
+       define(ADDSUB,        add)
+       define(ADCSBB,        adc)
+       define(func_n,        mpn_rsh1add_n)
+       define(func_nc,       mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+       define(ADDSUB,        sub)
+       define(ADCSBB,        sbb)
+       define(func_n,        mpn_rsh1sub_n)
+       define(func_nc,       mpn_rsh1sub_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func_n)
+       FUNC_ENTRY(4)
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       mov     (up), %r15
+       ADDSUB  (vp), %r15
+       sbb     R32(%rbx), R32(%rbx)
+       xor     R32(%rax), R32(%rax)
+       shr     %r15
+       adc     R32(%rax), R32(%rax)    C return value
+
+       mov     R32(n), R32(%rbp)
+       and     $3, R32(%rbp)
+       jz      L(b0)
+       cmp     $2, R32(%rbp)
+       jae     L(b23)
+
+L(b1): dec     n
+       jnz     L(gt1)
+       shl     $63, %rbx
+       add     %rbx, %r15
+       mov     %r15, (rp)
+       jmp     L(cj1)
+L(gt1):        lea     24(up), up
+       lea     24(vp), vp
+       mov     -16(up), %r9
+       add     R32(%rbx), R32(%rbx)
+       mov     -8(up), %r10
+       lea     24(rp), rp
+       mov     (up), %r11
+       ADCSBB  -16(vp), %r9
+       ADCSBB  -8(vp), %r10
+       mov     %r15, %r12
+       ADCSBB  (vp), %r11
+       mov     %r9, %r13
+       sbb     R32(%rbx), R32(%rbx)
+       mov     %r11, %r15
+       mov     %r10, %r14
+       shl     $63, %r11
+       shl     $63, %r10
+       shl     $63, %r9
+       or      %r9, %r12
+       shr     %r13
+       mov     8(up), %r8
+       shr     %r14
+       or      %r10, %r13
+       shr     %r15
+       or      %r11, %r14
+       sub     $4, n
+       jz      L(cj5)
+L(gt5):        mov     16(up), %r9
+       add     R32(%rbx), R32(%rbx)
+       mov     24(up), %r10
+       ADCSBB  8(vp), %r8
+       mov     %r15, %rbp
+       mov     32(up), %r11
+       jmp     L(lo1)
+
+L(b23):        jnz     L(b3)
+       mov     8(up), %r8
+       sub     $2, n
+       jnz     L(gt2)
+       add     R32(%rbx), R32(%rbx)
+       ADCSBB  8(vp), %r8
+       mov     %r8, %r12
+       jmp     L(cj2)
+L(gt2):        mov     16(up), %r9
+       add     R32(%rbx), R32(%rbx)
+       mov     24(up), %r10
+       ADCSBB  8(vp), %r8
+       mov     %r15, %rbp
+       mov     32(up), %r11
+       ADCSBB  16(vp), %r9
+       lea     32(up), up
+       ADCSBB  24(vp), %r10
+       mov     %r9, %r13
+       ADCSBB  32(vp), %r11
+       mov     %r8, %r12
+       jmp     L(lo2)
+
+L(b3): lea     40(up), up
+       lea     8(vp), vp
+       mov     %r15, %r14
+       add     R32(%rbx), R32(%rbx)
+       mov     -32(up), %r11
+       ADCSBB  0(vp), %r11
+       lea     8(rp), rp
+       sbb     R32(%rbx), R32(%rbx)
+       mov     %r11, %r15
+       shl     $63, %r11
+       mov     -24(up), %r8
+       shr     %r15
+       or      %r11, %r14
+       sub     $3, n
+       jnz     L(gt3)
+       add     R32(%rbx), R32(%rbx)
+       ADCSBB  8(vp), %r8
+       jmp     L(cj3)
+L(gt3):        mov     -16(up), %r9
+       add     R32(%rbx), R32(%rbx)
+       mov     -8(up), %r10
+       ADCSBB  8(vp), %r8
+       mov     %r15, %rbp
+       mov     (up), %r11
+       ADCSBB  16(vp), %r9
+       ADCSBB  24(vp), %r10
+       mov     %r8, %r12
+       jmp     L(lo3)
+
+L(b0): lea     48(up), up
+       lea     16(vp), vp
+       add     R32(%rbx), R32(%rbx)
+       mov     -40(up), %r10
+       lea     16(rp), rp
+       mov     -32(up), %r11
+       ADCSBB  -8(vp), %r10
+       mov     %r15, %r13
+       ADCSBB  (vp), %r11
+       sbb     R32(%rbx), R32(%rbx)
+       mov     %r11, %r15
+       mov     %r10, %r14
+       shl     $63, %r11
+       shl     $63, %r10
+       mov     -24(up), %r8
+       shr     %r14
+       or      %r10, %r13
+       shr     %r15
+       or      %r11, %r14
+       sub     $4, n
+       jnz     L(gt4)
+       add     R32(%rbx), R32(%rbx)
+       ADCSBB  8(vp), %r8
+       jmp     L(cj4)
+L(gt4):        mov     -16(up), %r9
+       add     R32(%rbx), R32(%rbx)
+       mov     -8(up), %r10
+       ADCSBB  8(vp), %r8
+       mov     %r15, %rbp
+       mov     (up), %r11
+       ADCSBB  16(vp), %r9
+       jmp     L(lo0)
+
+       ALIGN(8)
+L(top):        mov     16(up), %r9
+       shr     %r14
+       or      %r10, %r13
+       shr     %r15
+       or      %r11, %r14
+       add     R32(%rbx), R32(%rbx)
+       mov     24(up), %r10
+       mov     %rbp, (rp)
+       ADCSBB  8(vp), %r8
+       mov     %r15, %rbp
+       lea     32(rp), rp
+       mov     32(up), %r11
+L(lo1):        ADCSBB  16(vp), %r9
+       lea     32(up), up
+       mov     %r12, -24(rp)
+L(lo0):        ADCSBB  24(vp), %r10
+       mov     %r8, %r12
+       mov     %r13, -16(rp)
+L(lo3):        ADCSBB  32(vp), %r11
+       mov     %r9, %r13
+       mov     %r14, -8(rp)
+L(lo2):        sbb     R32(%rbx), R32(%rbx)
+       shl     $63, %r8
+       mov     %r11, %r15
+       shr     %r12
+       mov     %r10, %r14
+       shl     $63, %r9
+       lea     32(vp), vp
+       shl     $63, %r10
+       or      %r8, %rbp
+       shl     $63, %r11
+       or      %r9, %r12
+       shr     %r13
+       mov     8(up), %r8
+       sub     $4, n
+       jg      L(top)
+
+L(end):        shr     %r14
+       or      %r10, %r13
+       shr     %r15
+       or      %r11, %r14
+       mov     %rbp, (rp)
+       lea     32(rp), rp
+L(cj5):        add     R32(%rbx), R32(%rbx)
+       ADCSBB  8(vp), %r8
+       mov     %r12, -24(rp)
+L(cj4):        mov     %r13, -16(rp)
+L(cj3):        mov     %r8, %r12
+       mov     %r14, -8(rp)
+L(cj2):        sbb     R32(%rbx), R32(%rbx)
+       shl     $63, %r8
+       shr     %r12
+       or      %r8, %r15
+       shl     $63, %rbx
+       add     %rbx, %r12
+       mov     %r15, (rp)
+       mov     %r12, 8(rp)
+L(cj1):        pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/atom/rshift.asm b/mpn/x86_64/atom/rshift.asm

new file mode 100644 (file)

index 0000000..a986b83
--- /dev/null
+++ b/mpn/x86_64/atom/rshift.asm
@@ -0,0 +1,110 @@
+dnl  AMD64 mpn_rshift -- mpn right shift, optimised for Atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   ?
+C Intel NHM     ?
+C Intel SBR     ?
+C Intel atom    4.5
+C VIA nano      ?
+
+C TODO
+C  * Consider using 4-way unrolling.  We reach 4 c/l, but the code is 2.5 times
+C    larger.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`n',    `%rdx')
+define(`cnt',  `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_rshift)
+       FUNC_ENTRY(4)
+       shr     R32(n)
+       mov     (up), %rax
+       jnc     L(evn)
+
+       mov     %rax, %r11
+       shr     R8(cnt), %r11
+       neg     R8(cnt)
+       shl     R8(cnt), %rax
+       test    n, n
+       jnz     L(gt1)
+       mov     %r11, (rp)
+       FUNC_EXIT()
+       ret
+
+L(gt1):        mov     8(up), %r8
+       mov     %r8, %r10
+       shl     R8(cnt), %r8
+       jmp     L(lo1)
+
+L(evn):        mov     %rax, %r10
+       neg     R8(cnt)
+       shl     R8(cnt), %rax
+       mov     8(up), %r9
+       mov     %r9, %r11
+       shl     R8(cnt), %r9
+       neg     R8(cnt)
+       dec     n
+       lea     -8(rp), rp
+       lea     8(up), up
+       jz      L(end)
+
+       ALIGN(8)
+L(top):        shr     R8(cnt), %r10
+       or      %r10, %r9
+       shr     R8(cnt), %r11
+       neg     R8(cnt)
+       mov     8(up), %r8
+       mov     %r8, %r10
+       mov     %r9, 8(rp)
+       shl     R8(cnt), %r8
+       lea     16(rp), rp
+L(lo1):        mov     16(up), %r9
+       or      %r11, %r8
+       mov     %r9, %r11
+       shl     R8(cnt), %r9
+       lea     16(up), up
+       neg     R8(cnt)
+       mov     %r8, (rp)
+       dec     n
+       jg      L(top)
+
+L(end):        shr     R8(cnt), %r10
+       or      %r10, %r9
+       shr     R8(cnt), %r11
+       mov     %r9, 8(rp)
+       mov     %r11, 16(rp)
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/atom/sublsh1_n.asm b/mpn/x86_64/atom/sublsh1_n.asm

new file mode 100644 (file)

index 0000000..3e19448
--- /dev/null
+++ b/mpn/x86_64/atom/sublsh1_n.asm
@@ -0,0 +1,228 @@
+dnl  AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1) optimised for Intel Atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C  * This code is slightly large at 501 bytes.
+C  * aorrlsh1_n.asm and this file use the same basic pattern.
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   ?
+C Intel NHM     ?
+C Intel SBR     ?
+C Intel atom    5      (4.875 is probably possible)
+C VIA nano      ?
+
+C INPUT PARAMETERS
+define(`rp',       `%rdi')
+define(`up',       `%rsi')
+define(`vp',       `%rdx')
+define(`n',        `%rcx')
+define(`cy',       `%r8')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_sublsh1_n)
+       FUNC_ENTRY(4)
+       push    %rbp
+       push    %r15
+       xor     R32(%rbp), R32(%rbp)
+L(ent):        mov     R32(n), R32(%rax)
+       and     $3, R32(%rax)
+       jz      L(b0)
+       cmp     $2, R32(%rax)
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): mov     (vp), %r8
+       add     %r8, %r8
+       lea     8(vp), vp
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+       mov     (up), %r15
+       sbb     %r8, %r15
+       mov     %r15, (rp)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       lea     8(up), up
+       lea     8(rp), rp
+       jmp     L(b0)
+
+L(b2): mov     (vp), %r8
+       add     %r8, %r8
+       mov     8(vp), %r9
+       adc     %r9, %r9
+       lea     16(vp), vp
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+       mov     (up), %r15
+       sbb     %r8, %r15
+       mov     %r15, (rp)
+       mov     8(up), %r15
+       sbb     %r9, %r15
+       mov     %r15, 8(rp)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       lea     16(up), up
+       lea     16(rp), rp
+       jmp     L(b0)
+
+L(b3): mov     (vp), %r8
+       add     %r8, %r8
+       mov     8(vp), %r9
+       adc     %r9, %r9
+       mov     16(vp), %r10
+       adc     %r10, %r10
+       lea     24(vp), vp
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+       mov     (up), %r15
+       sbb     %r8, %r15
+       mov     %r15, (rp)
+       mov     8(up), %r15
+       sbb     %r9, %r15
+       mov     %r15, 8(rp)
+       mov     16(up), %r15
+       sbb     %r10, %r15
+       mov     %r15, 16(rp)
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       lea     24(up), up
+       lea     24(rp), rp
+
+L(b0): test    $4, R8(n)
+       jz      L(skp)
+       add     R32(%rax), R32(%rax)    C restore scy
+       mov     (vp), %r8
+       adc     %r8, %r8
+       mov     8(vp), %r9
+       adc     %r9, %r9
+       mov     16(vp), %r10
+       adc     %r10, %r10
+       mov     24(vp), %r11
+       adc     %r11, %r11
+       lea     32(vp), vp
+       sbb     R32(%rax), R32(%rax)    C save scy
+       add     R32(%rbp), R32(%rbp)    C restore acy
+       mov     (up), %r15
+       sbb     %r8, %r15
+       mov     %r15, (rp)
+       mov     8(up), %r15
+       sbb     %r9, %r15
+       mov     %r15, 8(rp)
+       mov     16(up), %r15
+       sbb     %r10, %r15
+       mov     %r15, 16(rp)
+       mov     24(up), %r15
+       sbb     %r11, %r15
+       mov     %r15, 24(rp)
+       lea     32(up), up
+       lea     32(rp), rp
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+
+L(skp):        cmp     $8, n
+       jl      L(rtn)
+
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %rbx
+       lea     -64(rp), rp
+       jmp     L(x)
+
+       ALIGN(16)
+L(top):        mov     (vp), %r8
+       add     R32(%rax), R32(%rax)
+       lea     64(vp), vp
+       adc     %r8, %r8
+       mov     -56(vp), %r9
+       adc     %r9, %r9
+       mov     -48(vp), %r10
+       adc     %r10, %r10
+       mov     -40(vp), %r11
+       adc     %r11, %r11
+       mov     -32(vp), %r12
+       adc     %r12, %r12
+       mov     -24(vp), %r13
+       adc     %r13, %r13
+       mov     -16(vp), %r14
+       adc     %r14, %r14
+       mov     -8(vp), %r15
+       adc     %r15, %r15
+       sbb     R32(%rax), R32(%rax)
+       add     R32(%rbp), R32(%rbp)
+       mov     (up), %rbp
+       lea     64(rp), rp
+       mov     8(up), %rbx
+       sbb     %r8, %rbp
+       mov     32(up), %r8
+       mov     %rbp, (rp)
+       sbb     %r9, %rbx
+       mov     16(up), %rbp
+       mov     %rbx, 8(rp)
+       sbb     %r10, %rbp
+       mov     24(up), %rbx
+       mov     %rbp, 16(rp)
+       sbb     %r11, %rbx
+       mov     %rbx, 24(rp)
+       sbb     %r12, %r8
+       mov     40(up), %r9
+       mov     %r8, 32(rp)
+       sbb     %r13, %r9
+       mov     48(up), %rbp
+       mov     %r9, 40(rp)
+       sbb     %r14, %rbp
+       mov     56(up), %rbx
+       mov     %rbp, 48(rp)
+       sbb     %r15, %rbx
+       lea     64(up), up
+       mov     %rbx, 56(rp)
+       sbb     R32(%rbp), R32(%rbp)
+L(x):  sub     $8, n
+       jge     L(top)
+
+L(end):        pop     %rbx
+       pop     %r14
+       pop     %r13
+       pop     %r12
+L(rtn):
+       add     R32(%rbp), R32(%rax)
+       neg     R32(%rax)
+
+       pop     %r15
+       pop     %rbp
+       FUNC_EXIT()
+       ret
+EPILOGUE()
+PROLOGUE(mpn_sublsh1_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       push    %rbp
+       push    %r15
+       neg     %r8                     C set CF
+       sbb     R32(%rbp), R32(%rbp)    C save acy
+       jmp     L(ent)
+EPILOGUE()
diff --git a/mpn/x86_64/bd1/aorsmul_1.asm b/mpn/x86_64/bd1/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..9cf5db9
--- /dev/null
+++ b/mpn/x86_64/bd1/aorsmul_1.asm
@@ -0,0 +1,171 @@
+dnl  AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD Bulldozer.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9
+C AMD K10
+C AMD bd1       4.5-4.7
+C AMD bobcat
+C Intel P4
+C Intel core2
+C Intel NHM
+C Intel SBR
+C Intel atom
+C VIA nano
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * Try to make loop run closer to 4 c/l.
+
+define(`rp',      `%rdi')   C rcx
+define(`up',      `%rsi')   C rdx
+define(`n_param', `%rdx')   C r8
+define(`v0',      `%rcx')   C r9
+
+define(`n',       `%r11')
+
+ifdef(`OPERATION_addmul_1',`
+      define(`ADDSUB',        `add')
+      define(`func',  `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+      define(`ADDSUB',        `sub')
+      define(`func',  `mpn_submul_1')
+')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+IFDOS(`        define(`up', ``%rsi'')  ') dnl
+IFDOS(`        define(`rp', ``%rcx'')  ') dnl
+IFDOS(`        define(`v0', ``%r9'')   ') dnl
+IFDOS(`        define(`r9', ``rdi'')   ') dnl
+IFDOS(`        define(`n',  ``%r8'')   ') dnl
+IFDOS(`        define(`r8', ``r11'')   ') dnl
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+IFDOS(``push   %rsi            '')
+IFDOS(``push   %rdi            '')
+IFDOS(``mov    %rdx, %rsi      '')
+
+       mov     (up), %rax              C read first u limb early
+       push    %rbx
+IFSTD(`        mov     n_param, %rbx   ')      C move away n from rdx, mul uses it
+IFDOS(`        mov     n, %rbx         ')
+       mul     v0
+
+IFSTD(`        mov     %rbx, n         ')
+
+       and     $3, R32(%rbx)
+       lea     -16(rp,n,8), rp
+       jz      L(b0)
+       cmp     $2, R32(%rbx)
+       jb      L(b1)
+       jz      L(b2)
+
+L(b3): mov     $0, R32(%r8)
+       mov     %rax, %rbx
+       mov     $0, R32(%r9)
+       mov     8(up), %rax
+       mov     %rdx, %r10
+       lea     (up,n,8), up
+       not     n
+       jmp     L(L3)
+
+L(b0): mov     $0, R32(%r10)
+       mov     %rax, %r8
+       mov     %rdx, %rbx
+       mov     8(up), %rax
+       lea     (up,n,8), up
+       neg     n
+       jmp     L(L0)
+
+L(b1): cmp     $1, n
+       jz      L(n1)
+       mov     %rax, %r9
+       mov     8(up), %rax
+       mov     %rdx, %r8
+       mov     $0, R32(%rbx)
+       lea     (up,n,8), up
+       neg     n
+       inc     n
+       jmp     L(L1)
+
+L(b2): mov     $0, R32(%rbx)
+       mov     %rax, %r10
+       mov     %rdx, %r9
+       mov     8(up), %rax
+       mov     $0, R32(%r8)
+       lea     (up,n,8), up
+       neg     n
+       add     $2, n
+       jns     L(end)
+
+       ALIGN(32)
+L(top):        mul     v0
+       ADDSUB  %r10, (rp,n,8)
+       adc     %rax, %r9
+       mov     (up,n,8), %rax
+       adc     %rdx, %r8
+L(L1): mul     v0
+       mov     $0, R32(%r10)
+       ADDSUB  %r9, 8(rp,n,8)
+       adc     %rax, %r8
+       adc     %rdx, %rbx
+       mov     8(up,n,8), %rax
+L(L0): mul     v0
+       ADDSUB  %r8, 16(rp,n,8)
+       mov     $0, R32(%r8)
+       adc     %rax, %rbx
+       mov     $0, R32(%r9)
+       mov     16(up,n,8), %rax
+       adc     %rdx, %r10
+L(L3): mul     v0
+       ADDSUB  %rbx, 24(rp,n,8)
+       mov     $0, R32(%rbx)
+       adc     %rax, %r10
+       adc     %rdx, %r9
+       mov     24(up,n,8), %rax
+       add     $4, n
+       js      L(top)
+
+L(end):        mul     v0
+       ADDSUB  %r10, (rp)
+       adc     %r9, %rax
+       adc     %r8, %rdx
+L(n1): ADDSUB  %rax, 8(rp)
+       adc     $0, %rdx
+       mov     %rdx, %rax
+
+       pop     %rbx
+IFDOS(``pop    %rdi            '')
+IFDOS(``pop    %rsi            '')
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86_64/bd1/gcd_1.asm b/mpn/x86_64/bd1/gcd_1.asm

new file mode 100644 (file)

index 0000000..bbbdbcd
--- /dev/null
+++ b/mpn/x86_64/bd1/gcd_1.asm
@@ -0,0 +1,26 @@
+dnl  AMD64 mpn_gcd_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_1)
+include_mpn(`x86_64/core2/gcd_1.asm')
diff --git a/mpn/x86_64/bd1/gmp-mparam.h b/mpn/x86_64/bd1/gmp-mparam.h

index 73890febd7f148e75604c5b337dbaa8fe6c3affb..f64c49181d8671cfe7c5c0ccccf6e834884e47fa 100644 (file)
--- a/mpn/x86_64/bd1/gmp-mparam.h
+++ b/mpn/x86_64/bd1/gmp-mparam.h
@@ -23,160 +23,124 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        12
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     14
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        24
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           24
+#define BMOD_1_TO_MOD_1_THRESHOLD           20
  
-#define MUL_TOOM22_THRESHOLD                18
-#define MUL_TOOM33_THRESHOLD                53
+#define MUL_TOOM22_THRESHOLD                16
+#define MUL_TOOM33_THRESHOLD                57
  #define MUL_TOOM44_THRESHOLD               154
-#define MUL_TOOM6H_THRESHOLD               274
-#define MUL_TOOM8H_THRESHOLD               466
+#define MUL_TOOM6H_THRESHOLD               250
+#define MUL_TOOM8H_THRESHOLD               309
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     140
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     108
  #define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD     109
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     143
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
  #define SQR_TOOM2_THRESHOLD                 24
-#define SQR_TOOM3_THRESHOLD                 85
-#define SQR_TOOM4_THRESHOLD                119
+#define SQR_TOOM3_THRESHOLD                139
+#define SQR_TOOM4_THRESHOLD                218
  #define SQR_TOOM6_THRESHOLD                318
-#define SQR_TOOM8_THRESHOLD                502
+#define SQR_TOOM8_THRESHOLD                434
+
+#define MULMID_TOOM42_THRESHOLD             22
  
  #define MULMOD_BNM1_THRESHOLD               11
-#define SQRMOD_BNM1_THRESHOLD               16
+#define SQRMOD_BNM1_THRESHOLD               13
  
-#define MUL_FFT_MODF_THRESHOLD             412  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             396  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    412, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
-    {     13, 7}, {     28, 8}, {     15, 7}, {     31, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     33, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
-    {     63, 9}, {     39,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {    103,12}, \
-    {     31,11}, {     63,10}, {    127,11}, {     79,10}, \
-    {    175,11}, {     95,10}, {    191,12}, {     63,11}, \
-    {    127,10}, {    255,11}, {    143,10}, {    287,11}, \
-    {    159,12}, {     95,13}, {     63,12}, {    127,11}, \
-    {    271, 9}, {   1087,11}, {    287,10}, {    575,11}, \
-    {    303,12}, {    159,11}, {    319,10}, {    671,11}, \
-    {    351,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    447,13}, {    127,12}, \
-    {    255,11}, {    543,12}, {    287,11}, {    575,10}, \
-    {   1215,12}, {    319,11}, {    639,12}, {    351,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,10}, {   1663,12}, {    447,14}, {    127,13}, \
-    {    255,12}, {    543,11}, {   1087,10}, {   2175,12}, \
-    {    575,11}, {   1151,12}, {    607,11}, {   1215,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
-    {   1343,10}, {   2687,12}, {    703,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    831,13}, {    447,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1087,11}, \
-    {   2175,13}, {    575,12}, {   1215,11}, {   2431,10}, \
-    {   4863,13}, {    639,12}, {   1343,11}, {   2687,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
-    {   1535,13}, {    831,12}, {   1663,13}, {    959,15}, \
-    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
-    {   1215,12}, {   2431,11}, {   4863,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1471,12}, {   2943,11}, \
-    {   5887,14}, {    767,13}, {   1599,12}, {   3199,13}, \
-    {   1727,14}, {    895,13}, {   1919,12}, {   3839,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,12}, {   4863,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 168
+  { {    380, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     23, 7}, {     12, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     25, 8}, \
+    {     13, 7}, {     27, 8}, {     15, 7}, {     32, 8}, \
+    {     17, 7}, {     35, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     31, 8}, {     63, 9}, {     35, 8}, \
+    {     71, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15,10}, {     31, 9}, {     71,10}, {     39, 9}, \
+    {     87,10}, {     47, 9}, {     99,10}, {     55,11}, \
+    {     31,10}, {     87,11}, {     47,10}, {    103,12}, \
+    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    167,11}, {     95,12}, {     63,11}, {    127,10}, \
+    {    255,11}, {    143,10}, {    287, 9}, {    575,10}, \
+    {    303,11}, {    159,12}, {     95,11}, {    191,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 75
  #define MUL_FFT_THRESHOLD                 4736
  
-#define SQR_FFT_MODF_THRESHOLD             368  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    368, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
-    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
-    {     19, 7}, {     39, 8}, {     27, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+  { {    332, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     25, 7}, {     25, 8}, \
+    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     33, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
      {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
      {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
      {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
      {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    135,11}, {     79,10}, {    159,11}, {     95,10}, \
-    {    191,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    543,11}, {    143, 9}, {    575,12}, \
-    {     95,11}, {    191,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    271,10}, {    543,11}, \
-    {    287,10}, {    575,11}, {    303,12}, {    159,11}, \
-    {    335,12}, {    191,11}, {    415,12}, {    223,11}, \
-    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
-    {    319,11}, {    639,10}, {   1279,12}, {    351,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,10}, {   1663,12}, {    447,11}, {    895,14}, \
-    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    543,11}, {   1087,10}, {   2175,12}, {    575,11}, \
-    {   1151,12}, {    607,13}, {    319,12}, {    639,11}, \
-    {   1279,12}, {    671,11}, {   1343,10}, {   2687,12}, \
-    {    703,13}, {    383,12}, {    767,11}, {   1599,12}, \
-    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
-    {    511,12}, {   1087,11}, {   2175,13}, {    575,12}, \
-    {   1151,11}, {   2303,12}, {   1215,11}, {   2431,10}, \
-    {   4863,13}, {    639,12}, {   1343,11}, {   2687,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
-    {   1599,13}, {    831,12}, {   1727,13}, {    895,15}, \
-    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
-    {   1215,12}, {   2431,11}, {   4863,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1471,12}, {   2943,11}, \
-    {   5887,14}, {    767,13}, {   1599,12}, {   3199,13}, \
-    {   1727,14}, {    895,13}, {   1919,12}, {   3839,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,12}, {   4863,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 172
+    {    127, 9}, {    255,10}, {    135,11}, {     79,10}, \
+    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
+    {    143,10}, {    303,11}, {    159,10}, {    319,12}, \
+    {     95,11}, {    191,10}, {    383,11}, {    207,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 71
  #define SQR_FFT_THRESHOLD                 3264
  
-#define MULLO_BASECASE_THRESHOLD             4
-#define MULLO_DC_THRESHOLD                  30
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  37
  #define MULLO_MUL_N_THRESHOLD             8648
  
-#define DC_DIV_QR_THRESHOLD                 38
-#define DC_DIVAPPR_Q_THRESHOLD             187
+#define DC_DIV_QR_THRESHOLD                 57
+#define DC_DIVAPPR_Q_THRESHOLD             204
  #define DC_BDIV_QR_THRESHOLD                48
-#define DC_BDIV_Q_THRESHOLD                 92
-
-#define INV_MULMOD_BNM1_THRESHOLD           49
-#define INV_NEWTON_THRESHOLD               202
-#define INV_APPR_THRESHOLD                 197
-
-#define BINV_NEWTON_THRESHOLD              246
-#define REDC_1_TO_REDC_2_THRESHOLD          55
-#define REDC_2_TO_REDC_N_THRESHOLD           0  /* anomaly: never REDC_2 */
-
-#define MU_DIV_QR_THRESHOLD               1470
-#define MU_DIVAPPR_Q_THRESHOLD            1470
-#define MUPI_DIV_QR_THRESHOLD               90
-#define MU_BDIV_QR_THRESHOLD              1187
-#define MU_BDIV_Q_THRESHOLD               1470
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                      96
-#define GCD_DC_THRESHOLD                   400
-#define GCDEXT_DC_THRESHOLD                288
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        27
-#define SET_STR_DC_THRESHOLD               172
-#define SET_STR_PRECOMPUTE_THRESHOLD      1341
+#define DC_BDIV_Q_THRESHOLD                107
+
+#define INV_MULMOD_BNM1_THRESHOLD           30
+#define INV_NEWTON_THRESHOLD               228
+#define INV_APPR_THRESHOLD                 214
+
+#define BINV_NEWTON_THRESHOLD              248
+#define REDC_1_TO_REDC_2_THRESHOLD          51
+#define REDC_2_TO_REDC_N_THRESHOLD           0  /* always */
+
+#define MU_DIV_QR_THRESHOLD               1334
+#define MU_DIVAPPR_Q_THRESHOLD            1387
+#define MUPI_DIV_QR_THRESHOLD              108
+#define MU_BDIV_QR_THRESHOLD              1142
+#define MU_BDIV_Q_THRESHOLD               1308
+
+#define POWM_SEC_TABLE  2,44,411,580,2246
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                     117
+#define HGCD_APPR_THRESHOLD                 50
+#define HGCD_REDUCE_THRESHOLD             2681
+#define GCD_DC_THRESHOLD                   487
+#define GCDEXT_DC_THRESHOLD                318
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        20
+#define SET_STR_DC_THRESHOLD               418
+#define SET_STR_PRECOMPUTE_THRESHOLD      1340
+
+#define FAC_DSC_THRESHOLD                  462
+#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/mpn/x86_64/bd1/hamdist.asm b/mpn/x86_64/bd1/hamdist.asm

new file mode 100644 (file)

index 0000000..9e35f2d
--- /dev/null
+++ b/mpn/x86_64/bd1/hamdist.asm
@@ -0,0 +1,27 @@
+dnl  AMD64 mpn_hamdist -- hamming distance.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86_64/k10/hamdist.asm')
diff --git a/mpn/x86_64/bd1/mul_1.asm b/mpn/x86_64/bd1/mul_1.asm

new file mode 100644 (file)

index 0000000..bf556c3
--- /dev/null
+++ b/mpn/x86_64/bd1/mul_1.asm
@@ -0,0 +1,174 @@
+dnl  AMD64 mpn_mul_1 optimised for AMD Bulldozer.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9
+C AMD K10
+C AMD bd1       4
+C AMD bobcat
+C Intel P4
+C Intel core2
+C Intel NHM
+C Intel SBR
+C Intel atom
+C VIA nano
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * Move loop code into feed-in blocks, to save insn for zeroing regs.
+
+define(`rp',      `%rdi')   C rcx
+define(`up',      `%rsi')   C rdx
+define(`n_param', `%rdx')   C r8
+define(`v0',      `%rcx')   C r9
+
+define(`n',       `%rbx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(`        define(`up', ``%rsi'')  ') dnl
+IFDOS(`        define(`rp', ``%rcx'')  ') dnl
+IFDOS(`        define(`v0', ``%r9'')   ') dnl
+IFDOS(`        define(`r9', ``rdi'')   ') dnl
+IFDOS(`        define(`n',  ``%r8'')   ') dnl
+IFDOS(`        define(`r8', ``rbx'')   ') dnl
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+IFDOS(``push   %rsi            '')
+IFDOS(``push   %rdi            '')
+IFDOS(``mov    %rdx, %rsi      '')
+
+       mov     (up), %rax              C read first u limb early
+       push    %rbx
+IFSTD(`        mov     n_param, %r11   ')      C move away n from rdx, mul uses it
+IFDOS(`        mov     n, %r11         ')
+       mul     v0
+
+IFSTD(` add    %r8, %rax       ')
+IFDOS(` add    64(%rsp), %rax  ')      C 40 + 3*8  (3 push insns)
+       adc     $0, %rdx
+       jmp     L(common)
+
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(mpn_mul_1)
+IFDOS(``push   %rsi            '')
+IFDOS(``push   %rdi            '')
+IFDOS(``mov    %rdx, %rsi      '')
+
+       mov     (up), %rax              C read first u limb early
+       push    %rbx
+IFSTD(`        mov     n_param, %r11   ')      C move away n from rdx, mul uses it
+IFDOS(`        mov     n, %r11         ')
+       mul     v0
+
+L(common):
+IFSTD(`        mov     %r11, n         ')
+
+       and     $3, R32(%r11)
+       lea     -16(rp,n,8), rp
+       jz      L(b0)
+       cmp     $2, R32(%r11)
+       jb      L(b1)
+       jz      L(b2)
+
+L(b3): mov     %rax, %r10
+       mov     %rdx, %r11
+       mov     8(up), %rax
+       mul     v0
+       lea     (up,n,8), up
+       not     n
+       jmp     L(L3)
+
+L(b0): mov     %rax, %r9
+       mov     %rdx, %r10
+       mov     8(up), %rax
+       lea     (up,n,8), up
+       neg     n
+       jmp     L(L0)
+
+L(b1): mov     %rax, %r8
+       cmp     $1, n
+       jz      L(n1)
+       mov     %rdx, %r9
+       lea     (up,n,8), up
+       neg     n
+       mov     %r8, 16(rp,n,8)
+       inc     n
+       jmp     L(L1)
+
+L(b2): mov     %rax, %r11
+       mov     %rdx, %r8
+       mov     8(up), %rax
+       lea     (up,n,8), up
+       neg     n
+       add     $2, n
+       jns     L(end)
+
+       ALIGN(16)
+L(top):        mul     v0
+       mov     %rdx, %r9
+       add     %rax, %r8
+       adc     $0, %r9
+       mov     %r8, 8(rp,n,8)
+       mov     %r11, (rp,n,8)
+L(L1): mov     (up,n,8), %rax
+       mul     v0
+       add     %rax, %r9
+       mov     %rdx, %r10
+       mov     8(up,n,8), %rax
+       adc     $0, %r10
+L(L0): mul     v0
+       add     %rax, %r10
+       mov     %rdx, %r11
+       mov     16(up,n,8), %rax
+       adc     $0, %r11
+       mul     v0
+       mov     %r9, 16(rp,n,8)
+L(L3): add     %rax, %r11
+       mov     %r10, 24(rp,n,8)
+       mov     %rdx, %r8
+       adc     $0, %r8
+       add     $4, n
+       mov     -8(up,n,8), %rax
+       js      L(top)
+
+L(end):        mul     v0
+       add     %rax, %r8
+       adc     $0, %rdx
+       mov     %r11, (rp)
+L(n1): mov     %r8, 8(rp)
+       mov     %rdx, %rax
+
+       pop     %rbx
+IFDOS(``pop    %rdi            '')
+IFDOS(``pop    %rsi            '')
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86_64/bd1/popcount.asm b/mpn/x86_64/bd1/popcount.asm

new file mode 100644 (file)

index 0000000..c93c93b
--- /dev/null
+++ b/mpn/x86_64/bd1/popcount.asm
@@ -0,0 +1,27 @@
+dnl  AMD64 mpn_popcount -- population count.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86_64/k10/popcount.asm')
diff --git a/mpn/x86_64/bdiv_dbm1c.asm b/mpn/x86_64/bdiv_dbm1c.asm

index d6775aef96f87eb959d64e311a78b790bc89fff2..f3d7084d1f1d8572e29811e046551c1ea407d216 100644 (file)
--- a/mpn/x86_64/bdiv_dbm1c.asm
+++ b/mpn/x86_64/bdiv_dbm1c.asm
@@ -1,6 +1,6 @@
  dnl  x86_64 mpn_bdiv_dbm1.
  
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -20,80 +20,76 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C K8,K9:        2.25
-C K10:           ?
-C P4:          12.5
-C P6 core2:     4.0
-C P6 corei7:    3.8
-C P6 atom:     20
+C AMD K8,K9     2.25
+C AMD K10       2.25
+C Intel P4     12.5
+C Intel core2   4
+C Intel NHM     3.75
+C Intel SBR     3.6
+C Intel atom   20
+C VIA nano      4
  
  C TODO
-C  * Do proper 4-way feed-in instead of the current epilogue
+C  * Optimise feed-in code.
  
-C INPUT PARAMETERS shared
-define(`qp',   `%rdi')
-define(`up',   `%rsi')
-define(`n',    `%rdx')
-define(`bd',   `%rcx')
-define(`cy',   `%r8')
+C INPUT PARAMETERS
+define(`qp',     `%rdi')
+define(`up',     `%rsi')
+define(`n_param', `%rdx')
+define(`bd',     `%rcx')
+define(`cy',     `%r8')
  
+define(`n',       `%r9')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_bdiv_dbm1c)
-       mov     (%rsi), %rax
-       mov     %rdx, %r9               C n
-
-       mul     %rcx
-       sub     %rax, %r8
-       mov     %r8, (%rdi)
-       sbb     %rdx, %r8
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       mov     (up), %rax
+       mov     n_param, n
+       mov     R32(n_param), R32(%r11)
+       mul     bd
+       lea     (up,n,8), up
+       lea     (qp,n,8), qp
+       neg     n
+       and     $3, R32(%r11)
+       jz      L(lo0)
+       lea     -4(n,%r11), n
+       cmp     $2, R32(%r11)
+       jc      L(lo1)
+       jz      L(lo2)
+       jmp     L(lo3)
  
-       lea     (%rsi,%r9,8), %rsi
-       lea     (%rdi,%r9,8), %rdi
-       neg     %r9
-       add     $4, %r9
-       jns     L(end)
         ALIGN(16)
-L(top):
-       mov     -24(%rsi,%r9,8), %rax
-       mul     %rcx
-       sub     %rax, %r8
-       mov     %r8, -24(%rdi,%r9,8)
+L(top):        mov     (up,n,8), %rax
+       mul     bd
+L(lo0):        sub     %rax, %r8
+       mov     %r8, (qp,n,8)
         sbb     %rdx, %r8
-L(3):
-       mov     -16(%rsi,%r9,8), %rax
-       mul     %rcx
-       sub     %rax, %r8
-       mov     %r8, -16(%rdi,%r9,8)
+       mov     8(up,n,8), %rax
+       mul     bd
+L(lo3):        sub     %rax, %r8
+       mov     %r8, 8(qp,n,8)
         sbb     %rdx, %r8
-L(2):
-       mov     -8(%rsi,%r9,8), %rax
-       mul     %rcx
-       sub     %rax, %r8
-       mov     %r8, -8(%rdi,%r9,8)
+       mov     16(up,n,8), %rax
+       mul     bd
+L(lo2):        sub     %rax, %r8
+       mov     %r8, 16(qp,n,8)
         sbb     %rdx, %r8
-L(1):
-       mov     (%rsi,%r9,8), %rax
-       mul     %rcx
-       sub     %rax, %r8
-       mov     %r8, (%rdi,%r9,8)
+       mov     24(up,n,8), %rax
+       mul     bd
+L(lo1):        sub     %rax, %r8
+       mov     %r8, 24(qp,n,8)
         sbb     %rdx, %r8
+       add     $4, n
+       jnz     L(top)
  
-       add     $4, %r9
-       js      L(top)
-L(end):
-       je      L(3x)
-       cmp     $2, %r9
-       jg      L(ret)
-       mov     $-1, %r9
-       je      L(1)
-       jmp     L(2)
-L(3x):
-       dec     %r9
-       jmp     L(3)
-
-L(ret):        mov     %r8, %rax
+       mov     %r8, %rax
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/bdiv_q_1.asm b/mpn/x86_64/bdiv_q_1.asm

index 2356f2bc02731f50acf91cb3a449c1babf6b7eee..a814e284356c0c59d6a71b29192d712c75ffb55f 100644 (file)
--- a/mpn/x86_64/bdiv_q_1.asm
+++ b/mpn/x86_64/bdiv_q_1.asm
@@ -1,8 +1,8 @@
  dnl  AMD64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by
  dnl  1-limb divisor, returning quotient only.
  
-dnl  Copyright 2001, 2002, 2004, 2005, 2006, 2009 Free Software Foundation,
-dnl  Inc.
+dnl  Copyright 2001, 2002, 2004, 2005, 2006, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -11,7 +11,7 @@ dnl  it under the terms of the GNU Lesser General Public License as published
  dnl  by the Free Software Foundation; either version 3 of the License, or (at
  dnl  your option) any later version.
  
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
  dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  dnl  License for more details.
@@ -23,31 +23,35 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:       10
-C K10:         10
-C P4:          33
-C P6 core2:    13.25
-C P6 corei7:   14
-C P6 atom:     42
+C AMD K8,K9    10
+C AMD K10      10
+C Intel P4     33
+C Intel core2  13.25
+C Intel corei  14
+C Intel atom   42
+C VIA nano      ?
  
  
  C INPUT PARAMETERS
-C rp           rdi
-C up           rsi
-C n            rdx
-C d            rcx
-C di           r8      just mpn_pi1_bdiv_q_1
-C shift                r9      just mpn_pi1_bdiv_q_1
+define(`rp',           `%rdi')
+define(`up',           `%rsi')
+define(`n',            `%rdx')
+define(`d',            `%rcx')
+define(`di',           `%r8')          C       just mpn_pi1_bdiv_q_1
+define(`ncnt',         `%r9')          C       just mpn_pi1_bdiv_q_1
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_bdiv_q_1)
+       FUNC_ENTRY(4)
         push    %rbx
  
         mov     %rcx, %rax
-       xor     R32(%rcx), R32(%rcx)    C shift count
+       xor     R32(%rcx), R32(%rcx)    C ncnt count
         mov     %rdx, %r10
  
         bt      $0, R32(%rax)
@@ -57,11 +61,7 @@ L(odd):      mov     %rax, %rbx
         shr     R32(%rax)
         and     $127, R32(%rax)         C d/2, 7 bits
  
-ifdef(`PIC',`
-       mov     binvert_limb_table@GOTPCREL(%rip), %rdx
-',`
-       movabs  $binvert_limb_table, %rdx
-')
+       LEA(    binvert_limb_table, %rdx)
  
         movzbl  (%rdx,%rax), R32(%rax)  C inv 8 bits
  
@@ -90,20 +90,23 @@ L(evn):     bsf     %rax, %rcx
  EPILOGUE()
  
  PROLOGUE(mpn_pi1_bdiv_q_1)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+IFDOS(`        mov     64(%rsp), %r9   ')
         push    %rbx
  
         mov     %rcx, %r11              C d
         mov     %rdx, %r10              C n
-       mov     %r9, %rcx               C shift
-L(com):
-       mov     (%rsi), %rax            C up[0]
+       mov     %r9, %rcx               C ncnt
+
+L(com):        mov     (up), %rax              C up[0]
  
         dec     %r10
         jz      L(one)
  
-       mov     8(%rsi), %rdx           C up[1]
-       lea     (%rsi,%r10,8), %rsi     C up end
-       lea     (%rdi,%r10,8), %rdi     C rp end
+       mov     8(up), %rdx             C up[1]
+       lea     (up,%r10,8), up         C up end
+       lea     (rp,%r10,8), rp         C rp end
         neg     %r10                    C -n
  
         shrd    R8(%rcx), %rdx, %rax
@@ -115,15 +118,13 @@ L(com):
  L(top):
         C rax   q
         C rbx   carry bit, 0 or 1
-       C rcx   shift
+       C rcx   ncnt
         C rdx
-       C rsi   up end
-       C rdi   rp end
         C r10   counter, limbs, negative
  
         mul     %r11                    C carry limb in rdx
-       mov     (%rsi,%r10,8), %rax
-       mov     8(%rsi,%r10,8), %r9
+       mov     (up,%r10,8), %rax
+       mov     8(up,%r10,8), %r9
         shrd    R8(%rcx), %r9, %rax
         nop
         sub     %rbx, %rax              C apply carry bit
@@ -131,23 +132,25 @@ L(top):
         sub     %rdx, %rax              C apply carry limb
         adc     $0, %rbx
  L(ent):        imul    %r8, %rax
-       mov     %rax, (%rdi,%r10,8)
+       mov     %rax, (rp,%r10,8)
         inc     %r10
         jnz     L(top)
  
         mul     %r11                    C carry limb in rdx
-       mov     (%rsi), %rax            C up high limb
+       mov     (up), %rax              C up high limb
         shr     R8(%rcx), %rax
         sub     %rbx, %rax              C apply carry bit
         sub     %rdx, %rax              C apply carry limb
         imul    %r8, %rax
-       mov     %rax, (%rdi)
+       mov     %rax, (rp)
         pop     %rbx
+       FUNC_EXIT()
         ret
  
  L(one):        shr     R8(%rcx), %rax
         imul    %r8, %rax
-       mov     %rax, (%rdi)
+       mov     %rax, (rp)
         pop     %rbx
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/bobcat/aorsmul_1.asm b/mpn/x86_64/bobcat/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..412d5e2
--- /dev/null
+++ b/mpn/x86_64/bobcat/aorsmul_1.asm
@@ -0,0 +1,176 @@
+dnl  AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD bobcat.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     4.5
+C AMD K10       4.5
+C AMD bd1       4.75
+C AMD bobcat    5
+C Intel P4     17.7
+C Intel core2   5.5
+C Intel NHM     5.43
+C Intel SBR     3.92
+C Intel atom   23
+C VIA nano      5.63
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ifdef(`OPERATION_addmul_1',`
+      define(`ADDSUB',        `add')
+      define(`func',  `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+      define(`ADDSUB',        `sub')
+      define(`func',  `mpn_submul_1')
+')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+C Standard parameters
+define(`rp',              `%rdi')
+define(`up',              `%rsi')
+define(`n_param',         `%rdx')
+define(`v0',              `%rcx')
+C Standard allocations
+define(`n',               `%rbx')
+define(`w0',              `%r8')
+define(`w1',              `%r9')
+define(`w2',              `%r10')
+define(`w3',              `%r11')
+
+C DOS64 parameters
+IFDOS(` define(`rp',      `%rcx')    ') dnl
+IFDOS(` define(`up',      `%rsi')    ') dnl
+IFDOS(` define(`n_param', `%r8')     ') dnl
+IFDOS(` define(`v0',      `%r9')     ') dnl
+C DOS64 allocations
+IFDOS(` define(`n',       `%rbx')    ') dnl
+IFDOS(` define(`w0',      `%r8')     ') dnl
+IFDOS(` define(`w1',      `%rdi')    ') dnl
+IFDOS(` define(`w2',      `%r10')    ') dnl
+IFDOS(` define(`w3',      `%r11')    ') dnl
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+IFDOS(`        push    %rsi            ')
+IFDOS(`        push    %rdi            ')
+IFDOS(`        mov     %rdx, %rsi      ')
+
+       push    %rbx
+       mov     (up), %rax
+
+       lea     -16(rp,n_param,8), rp
+       lea     -16(up,n_param,8), up
+
+       mov     n_param, n
+       and     $3, R32(n_param)
+       jz      L(b0)
+       cmp     $2, R32(n_param)
+       ja      L(b3)
+       jz      L(b2)
+
+L(b1): mul     v0
+       cmp     $1, n
+       jz      L(n1)
+       mov     %rax, w2
+       mov     %rdx, w3
+       neg     n
+       add     $3, n
+       jmp     L(L1)
+L(n1): ADDSUB  %rax, 8(rp)
+       adc     $0, %rdx
+       mov     %rdx, %rax
+       pop     %rbx
+IFDOS(`        pop     %rdi            ')
+IFDOS(`        pop     %rsi            ')
+       ret
+
+L(b3): mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       neg     n
+       inc     n
+       jmp     L(L3)
+
+L(b0): mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       neg     n
+       add     $2, n
+       jmp     L(L0)
+
+L(b2): mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       neg     n
+       jmp     L(L2)
+
+       ALIGN(16)
+L(top):        ADDSUB  w0, -16(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+L(L1): mov     0(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       ADDSUB  w2, -8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+L(L0): mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       ADDSUB  w0, 0(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+L(L3): mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       ADDSUB  w2, 8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+L(L2): mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(top)
+
+L(end):        ADDSUB  w0, (rp)
+       adc     w1, w2
+       adc     $0, w3
+       ADDSUB  w2, 8(rp)
+       adc     $0, w3
+       mov     w3, %rax
+
+       pop     %rbx
+IFDOS(`        pop     %rdi            ')
+IFDOS(`        pop     %rsi            ')
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/bobcat/copyd.asm b/mpn/x86_64/bobcat/copyd.asm

new file mode 100644 (file)

index 0000000..ca47bbd
--- /dev/null
+++ b/mpn/x86_64/bobcat/copyd.asm
@@ -0,0 +1,80 @@
+dnl  AMD64 mpn_copyd optimised for AMD bobcat.
+
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     1
+C AMD K10       1-2  (alignment fluctuations)
+C AMD bd1       ?
+C AMD bobcat    1.5
+C Intel P4      2.8
+C Intel core2   1
+C Intel NHM     1-1.25
+C Intel SBR     1
+C Intel atom    2.87
+C VIA nano      2
+
+C INPUT PARAMETERS
+C rp   rdi
+C up   rsi
+C n    rdx
+
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_copyd)
+       FUNC_ENTRY(3)
+       sub     $4, n
+       jl      L(end)
+       ALIGN(16)
+L(top):        mov     24(up,n,8), %r8
+       mov     %r8, 24(rp,n,8)
+       mov     16(up,n,8), %r8
+       mov     %r8, 16(rp,n,8)
+       mov     8(up,n,8), %r8
+       mov     %r8, 8(rp,n,8)
+       mov     (up,n,8), %r8
+       mov     %r8, (rp,n,8)
+L(ent):        sub     $4, n
+       jge     L(top)
+
+L(end):        cmp     $-4, R32(n)
+       jz      L(ret)
+       mov     24(up,n,8), %r8
+       mov     %r8, 24(rp,n,8)
+       cmp     $-3, R32(n)
+       jz      L(ret)
+       mov     16(up,n,8), %r8
+       mov     %r8, 16(rp,n,8)
+       cmp     $-2, R32(n)
+       jz      L(ret)
+       mov     8(up,n,8), %r8
+       mov     %r8, 8(rp,n,8)
+
+L(ret):        FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/bobcat/copyi.asm b/mpn/x86_64/bobcat/copyi.asm

new file mode 100644 (file)

index 0000000..7ce0811
--- /dev/null
+++ b/mpn/x86_64/bobcat/copyi.asm
@@ -0,0 +1,83 @@
+dnl  AMD64 mpn_copyi optimised for AMD bobcat.
+
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     1
+C AMD K10       1-2  (alignment fluctuations)
+C AMD bd1       ?
+C AMD bobcat    1.5
+C Intel P4      2.8
+C Intel core2   1
+C Intel NHM     1-1.25
+C Intel SBR     1
+C Intel atom    2.87
+C VIA nano      2
+
+C INPUT PARAMETERS
+C rp   rdi
+C up   rsi
+C n    rdx
+
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_copyi)
+       FUNC_ENTRY(3)
+       lea     -32(up,n,8), up
+       lea     -32(rp,n,8), rp
+       neg     n
+       add     $4, n
+       jg      L(end)
+       ALIGN(16)
+L(top):        mov     (up,n,8), %r8
+       mov     %r8, (rp,n,8)
+       mov     8(up,n,8), %r8
+       mov     %r8, 8(rp,n,8)
+       mov     16(up,n,8), %r8
+       mov     %r8, 16(rp,n,8)
+       mov     24(up,n,8), %r8
+       mov     %r8, 24(rp,n,8)
+L(ent):        add     $4, n
+       jle     L(top)
+
+L(end):        cmp     $4, R32(n)
+       jz      L(ret)
+       mov     (up,n,8), %r8
+       mov     %r8, (rp,n,8)
+       cmp     $3, R32(n)
+       jz      L(ret)
+       mov     8(up,n,8), %r8
+       mov     %r8, 8(rp,n,8)
+       cmp     $2, R32(n)
+       jz      L(ret)
+       mov     16(up,n,8), %r8
+       mov     %r8, 16(rp,n,8)
+
+L(ret):        FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/bobcat/gmp-mparam.h b/mpn/x86_64/bobcat/gmp-mparam.h

index 02ce93823aa99c1db2848016923b3db614d84f3e..da0936abcd2b4945d973149fb0f3003b38ef7570 100644 (file)
--- a/mpn/x86_64/bobcat/gmp-mparam.h
+++ b/mpn/x86_64/bobcat/gmp-mparam.h
@@ -1,7 +1,7 @@
-/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
+/* AMD Bobcat gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010, 2012 Free Software Foundation, Inc.
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,159 +21,133 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define GMP_LIMB_BITS 64
  #define BYTES_PER_MP_LIMB 8
  
+/* 1600 MHz AMD Bobcat E-350 */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         10
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         6
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     18
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        39
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           24
+#define BMOD_1_TO_MOD_1_THRESHOLD           17
  
-#define MUL_TOOM22_THRESHOLD                16
-#define MUL_TOOM33_THRESHOLD                45
-#define MUL_TOOM44_THRESHOLD               336
-#define MUL_TOOM6H_THRESHOLD               426
-#define MUL_TOOM8H_THRESHOLD               446
+#define MUL_TOOM22_THRESHOLD                27
+#define MUL_TOOM33_THRESHOLD                32
+#define MUL_TOOM44_THRESHOLD               272
+#define MUL_TOOM6H_THRESHOLD               357
+#define MUL_TOOM8H_THRESHOLD                 0  /* always */
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      98
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     103
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD     115
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     172
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     119
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     128
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     160
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 24
-#define SQR_TOOM3_THRESHOLD                 77
-#define SQR_TOOM4_THRESHOLD                354
-#define SQR_TOOM6_THRESHOLD                366
+#define SQR_TOOM2_THRESHOLD                 28
+#define SQR_TOOM3_THRESHOLD                 93
+#define SQR_TOOM4_THRESHOLD                372
+#define SQR_TOOM6_THRESHOLD                  0  /* always */
  #define SQR_TOOM8_THRESHOLD                430
  
+#define MULMID_TOOM42_THRESHOLD             24
+
  #define MULMOD_BNM1_THRESHOLD               11
  #define SQRMOD_BNM1_THRESHOLD               13
  
-#define MUL_FFT_MODF_THRESHOLD             400  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             460  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    400, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     12, 5}, {     25, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
-    {     25, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
+  { {    372, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     10, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     25, 7}, {     13, 6}, \
+    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
      {     11, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
      {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     49, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
-    {     63, 9}, {     39,10}, {     23, 9}, {     55,11}, \
-    {     15,10}, {     31, 9}, {     71,10}, {     39, 9}, \
+    {     29, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     55,11}, \
+    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
      {     83,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     87,11}, {     47,10}, {    103,12}, \
-    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
-    {    159, 9}, {    319,10}, {    167,11}, {     95,10}, \
-    {    191,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    271,11}, {    143,10}, {    303, 9}, {    607,11}, \
-    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
-    {    383,11}, {    207,13}, {     63,12}, {    127,11}, \
-    {    271,10}, {    543,11}, {    287,10}, {    575,11}, \
-    {    303,10}, {    607,12}, {    159,11}, {    319,10}, \
-    {    639,11}, {    351,10}, {    703,11}, {    367,12}, \
-    {    191,11}, {    415,12}, {    223,11}, {    447,13}, \
-    {    127,12}, {    255,11}, {    543,12}, {    287,11}, \
-    {    607,12}, {    319,11}, {    639,12}, {    351,11}, \
-    {    703,13}, {    191,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    831,12}, {    447,14}, {    127,13}, \
-    {    255,12}, {    607,13}, {    319,12}, {    703,13}, \
-    {    383,12}, {    831,13}, {    447,12}, {    895,14}, \
-    {    255,13}, {    511,12}, {   1023,13}, {    575,12}, \
-    {   1151,13}, {    703,14}, {    383,13}, {    831,12}, \
-    {   1663,13}, {    895,15}, {    255,14}, {    511,13}, \
-    {   1087,12}, {   2175,13}, {   1151,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1407,14}, {    767,13}, \
-    {   1663,14}, {    895,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
-    {   1279,13}, {   2687,14}, {   1407,15}, {    767,14}, \
-    {   1535,13}, {   3199,14}, {   1663,13}, {   3455,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 160
-#define MUL_FFT_THRESHOLD                 4736
-
-#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
+    {     31,10}, {     79,11}, {     47,10}, {     95, 8}, \
+    {    383,10}, {    111,12}, {     31,11}, {     63,10}, \
+    {    143,11}, {     79, 9}, {    319,10}, {    167,11}, \
+    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271,11}, {    143,10}, {    287, 9}, {    575,10}, \
+    {    303,11}, {    159,10}, {    319,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,13}, {   8192,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 86
+#define MUL_FFT_THRESHOLD                 5760
+
+#define SQR_FFT_MODF_THRESHOLD             376  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
    { {    340, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
-    {     19, 7}, {     39, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    135, 9}, {    271,11}, {     79,10}, {    159, 9}, \
-    {    319,11}, {     95,10}, {    191, 9}, {    383,12}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     13, 6}, \
+    {     27, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
+    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
+    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
+    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
+    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
+    {    127,11}, {     95,10}, {    191, 9}, {    383,12}, \
      {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543,11}, {    143,10}, {    287, 9}, \
-    {    575,10}, {    303, 9}, {    607,11}, {    159,10}, \
-    {    319, 9}, {    639,12}, {     95,11}, {    191,10}, \
-    {    383,11}, {    207,10}, {    415,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
-    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    479,13}, {    127,12}, \
-    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
-    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,12}, {    479,14}, {    127,13}, {    255,12}, \
-    {    607,13}, {    319,12}, {    703,13}, {    383,12}, \
-    {    831,13}, {    447,12}, {    895,14}, {    255,13}, \
-    {    511,12}, {   1023,13}, {    575,12}, {   1151,13}, \
-    {    703,14}, {    383,13}, {    895,15}, {    255,14}, \
-    {    511,13}, {   1087,12}, {   2175,13}, {   1151,14}, \
-    {    639,13}, {   1343,12}, {   2687,14}, {    767,13}, \
-    {   1599,12}, {   3199,13}, {   1663,14}, {    895,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,12}, {   4863,14}, {   1279,13}, {   2687,15}, \
-    {    767,14}, {   1535,13}, {   3199,14}, {   1663,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 160
-#define SQR_FFT_THRESHOLD                 3264
-
-#define MULLO_BASECASE_THRESHOLD             3
-#define MULLO_DC_THRESHOLD                  43
-#define MULLO_MUL_N_THRESHOLD             9174
-
-#define DC_DIV_QR_THRESHOLD                 43
-#define DC_DIVAPPR_Q_THRESHOLD             142
-#define DC_BDIV_QR_THRESHOLD                44
-#define DC_BDIV_Q_THRESHOLD                 80
-
-#define INV_MULMOD_BNM1_THRESHOLD           42
-#define INV_NEWTON_THRESHOLD               181
-#define INV_APPR_THRESHOLD                 157
-
-#define BINV_NEWTON_THRESHOLD              230
-#define REDC_1_TO_REDC_2_THRESHOLD          54
-#define REDC_2_TO_REDC_N_THRESHOLD           0  /* anomaly: never REDC_2 */
-
-#define MU_DIV_QR_THRESHOLD               1442
-#define MU_DIVAPPR_Q_THRESHOLD            1442
-#define MUPI_DIV_QR_THRESHOLD               91
-#define MU_BDIV_QR_THRESHOLD              1142
-#define MU_BDIV_Q_THRESHOLD               1334
-
-#define MATRIX22_STRASSEN_THRESHOLD         14
-#define HGCD_THRESHOLD                      95
-#define GCD_DC_THRESHOLD                   298
-#define GCDEXT_DC_THRESHOLD                283
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                15
-#define GET_STR_PRECOMPUTE_THRESHOLD        30
-#define SET_STR_DC_THRESHOLD               306
-#define SET_STR_PRECOMPUTE_THRESHOLD      1628
+    {    271, 9}, {    543,10}, {    287, 9}, {    575,10}, \
+    {    319,12}, {     95,11}, {    191,10}, {    383,11}, \
+    {    207,10}, {    415,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 77
+#define SQR_FFT_THRESHOLD                 3648
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  52
+#define MULLO_MUL_N_THRESHOLD            11278
+
+#define DC_DIV_QR_THRESHOLD                 71
+#define DC_DIVAPPR_Q_THRESHOLD             202
+#define DC_BDIV_QR_THRESHOLD                76
+#define DC_BDIV_Q_THRESHOLD                151
+
+#define INV_MULMOD_BNM1_THRESHOLD           51
+#define INV_NEWTON_THRESHOLD               248
+#define INV_APPR_THRESHOLD                 204
+
+#define BINV_NEWTON_THRESHOLD              252
+#define REDC_1_TO_REDC_2_THRESHOLD          46
+#define REDC_2_TO_REDC_N_THRESHOLD           0  /* always */
+
+#define MU_DIV_QR_THRESHOLD               1470
+#define MU_DIVAPPR_Q_THRESHOLD            1589
+#define MUPI_DIV_QR_THRESHOLD              122
+#define MU_BDIV_QR_THRESHOLD              1334
+#define MU_BDIV_Q_THRESHOLD               1442
+
+#define POWM_SEC_TABLE  1,41,322,840,1421
+
+#define MATRIX22_STRASSEN_THRESHOLD         15
+#define HGCD_THRESHOLD                      86
+#define HGCD_APPR_THRESHOLD                 50
+#define HGCD_REDUCE_THRESHOLD             3014
+#define GCD_DC_THRESHOLD                   483
+#define GCDEXT_DC_THRESHOLD                303
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                16
+#define GET_STR_PRECOMPUTE_THRESHOLD        31
+#define SET_STR_DC_THRESHOLD               266
+#define SET_STR_PRECOMPUTE_THRESHOLD      1424
+
+#define FAC_DSC_THRESHOLD                  906
+#define FAC_ODD_THRESHOLD                   46
diff --git a/mpn/x86_64/bobcat/mul_1.asm b/mpn/x86_64/bobcat/mul_1.asm

new file mode 100644 (file)

index 0000000..dd9560c
--- /dev/null
+++ b/mpn/x86_64/bobcat/mul_1.asm
@@ -0,0 +1,177 @@
+dnl  AMD64 mpn_mul_1 optimised for AMD bobcat.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     4.5
+C AMD K10       4.5
+C AMD bd1       4.62
+C AMD bobcat    5
+C Intel P4     14
+C Intel core2   4.5
+C Intel NHM     4.23
+C Intel SBR     3.0
+C Intel atom   21
+C VIA nano      4.94
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C Standard parameters
+define(`rp',              `%rdi')
+define(`up',              `%rsi')
+define(`n_param',         `%rdx')
+define(`v0',              `%rcx')
+define(`cy',              `%r8')
+C Standard allocations
+define(`n',               `%rbx')
+define(`w0',              `%r8')
+define(`w1',              `%r9')
+define(`w2',              `%r10')
+define(`w3',              `%r11')
+
+C DOS64 parameters
+IFDOS(` define(`rp',      `%rcx')    ') dnl
+IFDOS(` define(`up',      `%rsi')    ') dnl
+IFDOS(` define(`n_param', `%r8')     ') dnl
+IFDOS(` define(`v0',      `%r9')     ') dnl
+IFDOS(` define(`cy',      `64(%rsp)')') dnl
+C DOS64 allocations
+IFDOS(` define(`n',       `%rbx')    ') dnl
+IFDOS(` define(`w0',      `%r8')     ') dnl
+IFDOS(` define(`w1',      `%rdi')    ') dnl
+IFDOS(` define(`w2',      `%r10')    ') dnl
+IFDOS(` define(`w3',      `%r11')    ') dnl
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+IFDOS(`        push    %rsi            ')
+IFDOS(`        push    %rdi            ')
+IFDOS(`        mov     %rdx, %rsi      ')
+       mov     cy, w2
+       jmp     L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_mul_1)
+IFDOS(`        push    %rsi            ')
+IFDOS(`        push    %rdi            ')
+IFDOS(`        mov     %rdx, %rsi      ')
+       xor     w2, w2
+L(com):        push    %rbx
+       mov     (up), %rax
+
+       lea     -16(rp,n_param,8), rp
+       lea     -16(up,n_param,8), up
+
+       mov     n_param, n
+       and     $3, R32(n_param)
+       jz      L(b0)
+       cmp     $2, R32(n_param)
+       ja      L(b3)
+       jz      L(b2)
+
+L(b1): mul     v0
+       cmp     $1, n
+       jz      L(n1)
+       neg     n
+       add     $3, n
+       add     %rax, w2
+       mov     %rdx, w3
+       jmp     L(L1)
+L(n1): add     %rax, w2
+       mov     %rdx, %rax
+       mov     w2, 8(rp)
+       adc     $0, %rax
+       pop     %rbx
+IFDOS(`        pop     %rdi            ')
+IFDOS(`        pop     %rsi            ')
+       ret
+
+L(b3): mul     v0
+       neg     n
+       inc     n
+       add     %rax, w2
+       mov     %rdx, w3
+       jmp     L(L3)
+
+L(b0): mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       neg     n
+       add     $2, n
+       add     w2, w0
+       jmp     L(L0)
+
+L(b2): mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       neg     n
+       add     w2, w0
+       jmp     L(L2)
+
+       ALIGN(16)
+L(top):        mov     w0, -16(rp,n,8)
+       add     w1, w2
+L(L1): adc     $0, w3
+       mov     0(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, -8(rp,n,8)
+       add     w3, w0
+L(L0): adc     $0, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     w0, 0(rp,n,8)
+       add     w1, w2
+L(L3): adc     $0, w3
+       mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, 8(rp,n,8)
+       add     w3, w0
+L(L2): adc     $0, w1
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(top)
+
+L(end):        mov     w0, (rp)
+       add     w1, w2
+       adc     $0, w3
+       mov     w2, 8(rp)
+       mov     w3, %rax
+
+       pop     %rbx
+IFDOS(`        pop     %rdi            ')
+IFDOS(`        pop     %rsi            ')
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/bobcat/mul_basecase.asm b/mpn/x86_64/bobcat/mul_basecase.asm

new file mode 100644 (file)

index 0000000..7487012
--- /dev/null
+++ b/mpn/x86_64/bobcat/mul_basecase.asm
@@ -0,0 +1,476 @@
+dnl  AMD64 mpn_mul_basecase optimised for AMD bobcat.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     4.5
+C AMD K10       4.5
+C AMD bd1       4.75
+C AMD bobcat    5
+C Intel P4     17.7
+C Intel core2   5.5
+C Intel NHM     5.43
+C Intel SBR     3.92
+C Intel atom   23
+C VIA nano      5.63
+
+C This mul_basecase is based on mul_1 and addmul_1, since these both run at the
+C multiply insn bandwidth, without any apparent loop branch exit pipeline
+C replays experienced on K8.  The structure is unusual: it falls into mul_1 in
+C the same way for all n, then it splits into 4 different wind-down blocks and
+C 4 separate addmul_1 loops.
+C
+C We have not tried using the same addmul_1 loops with a switch into feed-in
+C code, as we do in other basecase implementations.  Doing that could save
+C substantial code volume, but would also probably add some overhead.
+
+C TODO
+C  * Tune un < 3 code.
+C  * Fix slowdown for un=vn=3 (67->71) compared to default code.
+C  * This is 1263 bytes, compared to 1099 bytes for default code.  Consider
+C    combining addmul loops like that code.  Tolerable slowdown?
+C  * Lots of space could be saved by replacing the "switch" code by gradual
+C    jumps out from mul_1 winddown code, perhaps with no added overhead.
+C  * Are the ALIGN(16) really necessary?  They add about 25 bytes of padding.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C Standard parameters
+define(`rp',              `%rdi')
+define(`up',              `%rsi')
+define(`un_param',        `%rdx')
+define(`vp',              `%rcx')
+define(`vn',              `%r8')
+C Standard allocations
+define(`un',              `%rbx')
+define(`w0',              `%r10')
+define(`w1',              `%r11')
+define(`w2',              `%r12')
+define(`w3',              `%r13')
+define(`n',               `%rbp')
+define(`v0',              `%r9')
+
+C Temp macro for allowing control over indexing.
+C Define to return $1 for more conservative ptr handling.
+define(`X',`$2')
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mul_basecase)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')
+
+       mov     (up), %rax
+       mov     (vp), v0
+
+       cmp     $2, un_param
+       ja      L(ge3)
+       jz      L(u2)
+
+       mul     v0                      C u0 x v0
+       mov     %rax, (rp)
+       mov     %rdx, 8(rp)
+       FUNC_EXIT()
+       ret
+
+L(u2): mul     v0                      C u0 x v0
+       mov     %rax, (rp)
+       mov     8(up), %rax
+       mov     %rdx, w0
+       mul     v0
+       add     %rax, w0
+       mov     %rdx, w1
+       adc     $0, w1
+       cmp     $1, R32(vn)
+       jnz     L(u2v2)
+       mov     w0, 8(rp)
+       mov     w1, 16(rp)
+       FUNC_EXIT()
+       ret
+
+L(u2v2):mov    8(vp), v0
+       mov     (up), %rax
+       mul     v0
+       add     %rax, w0
+       mov     w0, 8(rp)
+       mov     %rdx, %r8               C CAUTION: r8 realloc
+       adc     $0, %r8
+       mov     8(up), %rax
+       mul     v0
+       add     w1, %r8
+       adc     $0, %rdx
+       add     %r8, %rax
+       adc     $0, %rdx
+       mov     %rax, 16(rp)
+       mov     %rdx, 24(rp)
+       FUNC_EXIT()
+       ret
+
+
+L(ge3):        push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+
+       lea     8(vp), vp
+
+       lea     -24(rp,un_param,8), rp
+       lea     -24(up,un_param,8), up
+       xor     R32(un), R32(un)
+       mov     $2, R32(n)
+       sub     un_param, un
+       sub     un_param, n
+
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       jmp     L(L3)
+
+       ALIGN(16)
+L(top):        mov     w0, -16(rp,n,8)
+       add     w1, w2
+       adc     $0, w3
+       mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, -8(rp,n,8)
+       add     w3, w0
+       adc     $0, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     w0, (rp,n,8)
+       add     w1, w2
+       adc     $0, w3
+L(L3): mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, 8(rp,n,8)
+       add     w3, w0
+       adc     $0, w1
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(top)
+
+       mov     w0, -16(rp,n,8)
+       add     w1, w2
+       adc     $0, w3
+
+C Switch on n into right addmul_l loop
+       test    n, n
+       jz      L(r2)
+       cmp     $2, R32(n)
+       ja      L(r3)
+       jz      L(r0)
+       jmp     L(r1)
+
+
+L(r3): mov     w2, X(-8(rp,n,8),16(rp))
+       mov     w3, X((rp,n,8),24(rp))
+       add     $2, un
+
+C outer loop(3)
+L(to3):        dec     vn
+       jz      L(ret)
+       mov     (vp), v0
+       mov     8(up,un,8), %rax
+       lea     8(vp), vp
+       lea     8(rp), rp
+       mov     un, n
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       jmp     L(al3)
+
+       ALIGN(16)
+L(ta3):        add     w0, -16(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, -8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     w0, (rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+L(al3):        mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, 8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(ta3)
+
+       add     w0, X(-16(rp,n,8),8(rp))
+       adc     w1, w2
+       adc     $0, w3
+       add     w2, X(-8(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w3, X((rp,n,8),24(rp))
+       jmp     L(to3)
+
+
+L(r2): mov     X(0(up,n,8),(up)), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, X(-8(rp,n,8),-8(rp))
+       add     w3, w0
+       adc     $0, w1
+       mov     X(8(up,n,8),8(up)), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     w0, X((rp,n,8),(rp))
+       add     w1, w2
+       adc     $0, w3
+       mov     X(16(up,n,8),16(up)), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, X(8(rp,n,8),8(rp))
+       add     w3, w0
+       adc     $0, w1
+       mov     w0, X(16(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w1, X(24(rp,n,8),24(rp))
+       inc     un
+
+C outer loop(2)
+L(to2):        dec     vn
+       jz      L(ret)
+       mov     (vp), v0
+       mov     16(up,un,8), %rax
+       lea     8(vp), vp
+       lea     8(rp), rp
+       mov     un, n
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       jmp     L(al2)
+
+       ALIGN(16)
+L(ta2):        add     w0, -16(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, -8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     w0, (rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, 8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+L(al2):        mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(ta2)
+
+       add     w0, X(-16(rp,n,8),8(rp))
+       adc     w1, w2
+       adc     $0, w3
+       add     w2, X(-8(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w3, X((rp,n,8),24(rp))
+       jmp     L(to2)
+
+
+L(r1): mov     X(0(up,n,8),8(up)), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, X(-8(rp,n,8),(rp))
+       add     w3, w0
+       adc     $0, w1
+       mov     X(8(up,n,8),16(up)), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     w0, X((rp,n,8),8(rp))
+       add     w1, w2
+       adc     $0, w3
+       mov     w2, X(8(rp,n,8),16(rp))
+       mov     w3, X(16(rp,n,8),24(rp))
+       add     $4, un
+
+C outer loop(1)
+L(to1):        dec     vn
+       jz      L(ret)
+       mov     (vp), v0
+       mov     -8(up,un,8), %rax
+       lea     8(vp), vp
+       lea     8(rp), rp
+       mov     un, n
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       jmp     L(al1)
+
+       ALIGN(16)
+L(ta1):        add     w0, -16(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+L(al1):        mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, -8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     w0, (rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, 8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(ta1)
+
+       add     w0, X(-16(rp,n,8),8(rp))
+       adc     w1, w2
+       adc     $0, w3
+       add     w2, X(-8(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w3, X((rp,n,8),24(rp))
+       jmp     L(to1)
+
+
+L(r0): mov     X((up,n,8),16(up)), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, X(-8(rp,n,8),8(rp))
+       add     w3, w0
+       adc     $0, w1
+       mov     w0, X((rp,n,8),16(rp))
+       mov     w1, X(8(rp,n,8),24(rp))
+       add     $3, un
+
+C outer loop(0)
+L(to0):        dec     vn
+       jz      L(ret)
+       mov     (vp), v0
+       mov     (up,un,8), %rax
+       lea     8(vp), vp
+       lea     8(rp), rp
+       mov     un, n
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       jmp     L(al0)
+
+       ALIGN(16)
+L(ta0):        add     w0, -16(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, -8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+L(al0):        mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     w0, (rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, 8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(ta0)
+
+       add     w0, X(-16(rp,n,8),8(rp))
+       adc     w1, w2
+       adc     $0, w3
+       add     w2, X(-8(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w3, X((rp,n,8),24(rp))
+       jmp     L(to0)
+
+
+L(ret):        pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/bobcat/sqr_basecase.asm b/mpn/x86_64/bobcat/sqr_basecase.asm

new file mode 100644 (file)

index 0000000..248ec70
--- /dev/null
+++ b/mpn/x86_64/bobcat/sqr_basecase.asm
@@ -0,0 +1,555 @@
+dnl  AMD64 mpn_sqr_basecase optimised for AMD bobcat.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     4.5
+C AMD K10       4.5
+C AMD bd1       4.75
+C AMD bobcat    5
+C Intel P4     17.7
+C Intel core2   5.5
+C Intel NHM     5.43
+C Intel SBR     3.92
+C Intel atom   23
+C VIA nano      5.63
+
+C This sqr_basecase is based on mul_1 and addmul_1, since these both run at the
+C multiply insn bandwidth, without any apparent loop branch exit pipeline
+C replays experienced on K8.  The structure is unusual: it falls into mul_1 in
+C the same way for all n, then it splits into 4 different wind-down blocks and
+C 4 separate addmul_1 loops.
+C
+C We have not tried using the same addmul_1 loops with a switch into feed-in
+C code, as we do in other basecase implementations.  Doing that could save
+C substantial code volume, but would also probably add some overhead.
+
+C TODO
+C  * Tune un < 4 code.
+C  * Perhaps implement a larger final corner (it is now 2 x 1).
+C  * Lots of space could be saved by replacing the "switch" code by gradual
+C    jumps out from mul_1 winddown code, perhaps with no added overhead.
+C  * Are the ALIGN(16) really necessary?  They add about 25 bytes of padding.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C Standard parameters
+define(`rp',              `%rdi')
+define(`up',              `%rsi')
+define(`un_param',        `%rdx')
+C Standard allocations
+define(`un',              `%rbx')
+define(`w0',              `%r8')
+define(`w1',              `%r9')
+define(`w2',              `%r10')
+define(`w3',              `%r11')
+define(`n',               `%rbp')
+define(`v0',              `%rcx')
+
+C Temp macro for allowing control over indexing.
+C Define to return $1 for more conservative ptr handling.
+define(`X',`$2')
+dnl define(`X',`$1')
+
+
+ASM_START()
+       TEXT
+       ALIGN(64)
+PROLOGUE(mpn_sqr_basecase)
+       FUNC_ENTRY(3)
+
+       mov     (up), %rax
+
+       cmp     $2, R32(un_param)
+       jae     L(ge2)
+
+       mul     %rax
+       mov     %rax, (rp)
+       mov     %rdx, 8(rp)
+       FUNC_EXIT()
+       ret
+
+L(ge2):        mov     (up), v0
+       jnz     L(g2)
+
+       mul     %rax
+       mov     %rax, (rp)
+       mov     8(up), %rax
+       mov     %rdx, w0
+       mul     v0
+       add     %rax, w0
+       mov     %rdx, w1
+       adc     $0, w1
+       mov     8(up), v0
+       mov     (up), %rax
+       mul     v0
+       add     %rax, w0
+       mov     w0, 8(rp)
+       mov     %rdx, w0                C CAUTION: r8 realloc
+       adc     $0, w0
+       mov     8(up), %rax
+       mul     v0
+       add     w1, w0
+       adc     $0, %rdx
+       add     w0, %rax
+       adc     $0, %rdx
+       mov     %rax, 16(rp)
+       mov     %rdx, 24(rp)
+       FUNC_EXIT()
+       ret
+
+L(g2): cmp     $3, R32(un_param)
+       ja      L(g3)
+       mul     %rax
+       mov     %rax, (rp)
+       mov     %rdx, 8(rp)
+       mov     8(up), %rax
+       mul     %rax
+       mov     %rax, 16(rp)
+       mov     %rdx, 24(rp)
+       mov     16(up), %rax
+       mul     %rax
+       mov     %rax, 32(rp)
+       mov     %rdx, 40(rp)
+
+       mov     (up), v0
+       mov     8(up), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     16(up), %rax
+       mul     v0
+       xor     R32(w2), R32(w2)
+       add     %rax, w1
+       adc     %rdx, w2
+
+       mov     8(up), v0
+       mov     16(up), %rax
+       mul     v0
+       xor     R32(w3), R32(w3)
+       add     %rax, w2
+       adc     %rdx, w3
+       add     w0, w0
+       adc     w1, w1
+       adc     w2, w2
+       adc     w3, w3
+       mov     $0, R32(v0)
+       adc     v0, v0
+       add     w0, 8(rp)
+       adc     w1, 16(rp)
+       adc     w2, 24(rp)
+       adc     w3, 32(rp)
+       adc     v0, 40(rp)
+       FUNC_EXIT()
+       ret
+
+L(g3): push    %rbx
+       push    %rbp
+
+       mov     8(up), %rax
+       lea     -24(rp,un_param,8), rp
+       lea     -24(up,un_param,8), up
+       neg     un_param
+       push    un_param                C for sqr_diag_addlsh1
+       lea     (un_param), un
+       lea     3(un_param), n
+
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       jmp     L(L3)
+
+       ALIGN(16)
+L(top):        mov     w0, -16(rp,n,8)
+       add     w1, w2
+       adc     $0, w3
+       mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, -8(rp,n,8)
+       add     w3, w0
+       adc     $0, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     w0, (rp,n,8)
+       add     w1, w2
+       adc     $0, w3
+L(L3): mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, 8(rp,n,8)
+       add     w3, w0
+       adc     $0, w1
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(top)
+
+       mov     w0, -16(rp,n,8)
+       add     w1, w2
+       adc     $0, w3
+
+       test    n, n
+       jz      L(r2)
+       cmp     $2, R32(n)
+       ja      L(r3)
+       jz      L(r0)
+
+
+L(r1): mov     X((up,n,8),8(up)), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, X(-8(rp,n,8),(rp))
+       add     w3, w0
+       adc     $0, w1
+       mov     X(8(up,n,8),16(up)), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     w0, X((rp,n,8),8(rp))
+       add     w1, w2
+       adc     $0, w3
+       mov     w2, X(8(rp,n,8),16(rp))
+       mov     w3, X(16(rp,n,8),24(rp))
+       add     $5, un
+       jmp     L(to0)
+
+L(r2): mov     X((up,n,8),(up)), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, X(-8(rp,n,8),-8(rp))
+       add     w3, w0
+       adc     $0, w1
+       mov     X(8(up,n,8),8(up)), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     w0, X((rp,n,8),(rp))
+       add     w1, w2
+       adc     $0, w3
+       mov     X(16(up,n,8),16(up)), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, X(8(rp,n,8),8(rp))
+       add     w3, w0
+       adc     $0, w1
+       mov     w0, X(16(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w1, X(24(rp,n,8),24(rp))
+       add     $6, un
+       jmp     L(to1)
+
+L(r3): mov     w2, X(-8(rp,n,8),16(rp))
+       mov     w3, X((rp,n,8),24(rp))
+       add     $3, un
+       jmp     L(to2)
+
+L(r0): mov     X((up,n,8),16(up)), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     w2, X(-8(rp,n,8),8(rp))
+       add     w3, w0
+       adc     $0, w1
+       mov     w0, X((rp,n,8),16(rp))
+       mov     w1, X(8(rp,n,8),24(rp))
+       add     $4, un
+C      jmp     L(to3)
+C fall through into main loop
+
+
+L(outer):
+       mov     un, n
+       mov     (up,un,8), v0
+       mov     8(up,un,8), %rax
+       lea     8(rp), rp
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       jmp     L(al3)
+
+       ALIGN(16)
+L(ta3):        add     w0, -16(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, -8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     w0, (rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+L(al3):        mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, 8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(ta3)
+
+       add     w0, X(-16(rp,n,8),8(rp))
+       adc     w1, w2
+       adc     $0, w3
+       add     w2, X(-8(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w3, X((rp,n,8),24(rp))
+
+
+L(to2):        mov     un, n
+       cmp     $-4, R32(un)
+       jnc     L(end)
+       add     $4, un
+       mov     8(up,n,8), v0
+       mov     16(up,n,8), %rax
+       lea     8(rp), rp
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       jmp     L(al2)
+
+       ALIGN(16)
+L(ta2):        add     w0, -16(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, -8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     w0, (rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, 8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+L(al2):        mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(ta2)
+
+       add     w0, X(-16(rp,n,8),8(rp))
+       adc     w1, w2
+       adc     $0, w3
+       add     w2, X(-8(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w3, X((rp,n,8),24(rp))
+
+
+L(to1):        mov     un, n
+       mov     -16(up,un,8), v0
+       mov     -8(up,un,8), %rax
+       lea     8(rp), rp
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       jmp     L(al1)
+
+       ALIGN(16)
+L(ta1):        add     w0, -16(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+L(al1):        mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, -8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     w0, (rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, 8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(ta1)
+
+       add     w0, X(-16(rp,n,8),8(rp))
+       adc     w1, w2
+       adc     $0, w3
+       add     w2, X(-8(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w3, X((rp,n,8),24(rp))
+
+
+L(to0):        mov     un, n
+       mov     -8(up,un,8), v0
+       mov     (up,un,8), %rax
+       lea     8(rp), rp
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       jmp     L(al0)
+
+       ALIGN(16)
+L(ta0):        add     w0, -16(rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     (up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, -8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+L(al0):        mov     8(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     w0, (rp,n,8)
+       adc     w1, w2
+       adc     $0, w3
+       mov     16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       add     w2, 8(rp,n,8)
+       adc     w3, w0
+       adc     $0, w1
+       mov     24(up,n,8), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     $4, n
+       js      L(ta0)
+
+       add     w0, X(-16(rp,n,8),8(rp))
+       adc     w1, w2
+       adc     $0, w3
+       add     w2, X(-8(rp,n,8),16(rp))
+       adc     $0, w3
+       mov     w3, X((rp,n,8),24(rp))
+       jmp     L(outer)
+
+
+L(end):        mov     X(8(up,un,8),(up)), v0
+       mov     X(16(up,un,8),8(up)), %rax
+       mul     v0
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     X(24(up,un,8),16(up)), %rax
+       mul     v0
+       mov     %rax, w2
+       mov     %rdx, w3
+       add     w0, X(24(rp,un,8),16(rp))
+       adc     w1, w2
+       adc     $0, w3
+       add     w2, X(32(rp,un,8),24(rp))
+       adc     $0, w3
+       mov     X(16(up,un,8),8(up)), v0
+       mov     X(24(up,un,8),16(up)), %rax
+       mul     v0
+       add     %rax, w3
+       mov     w3, X(40(rp,un,8),32(rp))
+       adc     $0, %rdx
+       mov     %rdx, X(48(rp,un,8),40(rp))
+
+
+C sqr_diag_addlsh1
+
+       lea     16(up), up
+       lea     40(rp), rp
+       pop     n
+       lea     2(n,n), n
+
+       mov     (up,n,4), %rax
+       mul     %rax
+       xor     R32(w2), R32(w2)
+
+       mov     8(rp,n,8), w0
+       mov     %rax, (rp,n,8)
+       jmp     L(lm)
+
+       ALIGN(8)
+L(tsd):        add     %rbx, w0
+       adc     %rax, w1
+       mov     w0, -8(rp,n,8)
+       mov     8(rp,n,8), w0
+       mov     w1, (rp,n,8)
+L(lm): mov     16(rp,n,8), w1
+       adc     w0, w0
+       adc     w1, w1
+       lea     (%rdx,w2), %rbx
+       mov     8(up,n,4), %rax
+       setc    R8(w2)
+       mul     %rax
+       add     $2, n
+       js      L(tsd)
+
+L(esd):        add     %rbx, w0
+       adc     %rax, w1
+       mov     w0, X(-8(rp,n,8),-8(rp))
+       mov     w1, X((rp,n,8),(rp))
+       adc     w2, %rdx
+       mov     %rdx, X(8(rp,n,8),8(rp))
+
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/com.asm b/mpn/x86_64/com.asm

index 699da11b69de05fb5eb24236245f5b190b0b743d..9c018cd0862d8ee1dad6b628b98b9d3cd0cc96fa 100644 (file)
--- a/mpn/x86_64/com.asm
+++ b/mpn/x86_64/com.asm
@@ -1,6 +1,6 @@
  dnl  AMD64 mpn_com.
  
-dnl  Copyright 2004, 2005, 2006 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2006, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,29 +21,35 @@ include(`../config.m4')
  
  
  C          cycles/limb
-C K8,K9:       1.25
-C K10:         1.25
-C P4:          2.78
-C P6-15:       1.1
+C AMD K8,K9     1.25
+C AMD K10       1.25
+C Intel P4      2.78
+C Intel core2   1.1
+C Intel corei   1.5
+C Intel atom    ?
+C VIA nano      2
  
  C INPUT PARAMETERS
  define(`rp',`%rdi')
  define(`up',`%rsi')
  define(`n',`%rdx')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(mpn_com)
+       FUNC_ENTRY(3)
         movq    (up), %r8
-       movl    %edx, %eax
+       movl    R32(%rdx), R32(%rax)
         leaq    (up,n,8), up
         leaq    (rp,n,8), rp
         negq    n
-       andl    $3, %eax
+       andl    $3, R32(%rax)
         je      L(b00)
-       cmpl    $2, %eax
+       cmpl    $2, R32(%rax)
         jc      L(b01)
         je      L(b10)
  
@@ -73,5 +79,6 @@ L(e10):       movq    24(up,n,8), %r9
         movq    %r9, 24(rp,n,8)
         addq    $4, n
         jnc     L(oop)
-L(ret):        ret
+L(ret):        FUNC_EXIT()
+       ret
  EPILOGUE()
diff --git a/mpn/x86_64/copyd.asm b/mpn/x86_64/copyd.asm

index f5c451cfebd788ec1a20004154250c470aaf2b83..556933f2df6dbc23e916ab57d4807b2a51c7910c 100644 (file)
--- a/mpn/x86_64/copyd.asm
+++ b/mpn/x86_64/copyd.asm
@@ -1,6 +1,6 @@
  dnl  AMD64 mpn_copyd -- copy limb vector, decrementing.
  
-dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,57 +19,64 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
+C           cycles/limb
+C AMD K8,K9     1
+C AMD K10       1
+C AMD bd1       1.36
+C AMD bobcat    1.71
+C Intel P4      2-3
+C Intel core2   1
+C Intel NHM     1
+C Intel SBR     1
+C Intel atom    2
+C VIA nano      2
  
-C          cycles/limb
-C K8,K9:       1
-C K10:         1
-C P4:          2.8
-C P6 core2:    1.2
-C P6 corei7:   1
  
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
  
-C INPUT PARAMETERS
-C rp   rdi
-C up   rsi
-C n    rdx
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
  
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
-       ALIGN(16)
+       ALIGN(64)
  PROLOGUE(mpn_copyd)
-       leaq    -8(up,n,8), up
-       leaq    (rp,n,8), rp
-       subq    $4, n
+       lea     -8(up,n,8), up
+       lea     (rp,n,8), rp
+       sub     $4, n
         jc      L(end)
-       ALIGN(16)
-L(oop):        movq    (up), %r8
-       movq    -8(up), %r9
-       leaq    -32(rp), rp
-       movq    -16(up), %r10
-       movq    -24(up), %r11
-       leaq    -32(up), up
-       movq    %r8, 24(rp)
-       movq    %r9, 16(rp)
-       subq    $4, n
-       movq    %r10, 8(rp)
-       movq    %r11, (rp)
-       jnc     L(oop)
+       nop
  
-L(end):        shrl    %edx                    C edx = lowpart(n)
+L(top):        mov     (up), %rax
+       mov     -8(up), %r9
+       lea     -32(rp), rp
+       mov     -16(up), %r10
+       mov     -24(up), %r11
+       lea     -32(up), up
+       mov     %rax, 24(rp)
+       mov     %r9, 16(rp)
+       sub     $4, n
+       mov     %r10, 8(rp)
+       mov     %r11, (rp)
+       jnc     L(top)
+
+L(end):        shr     R32(n)
         jnc     1f
-       movq    (up), %r8
-       movq    %r8, -8(rp)
-       leaq    -8(rp), rp
-       leaq    -8(up), up
-1:     shrl    %edx                    C edx = lowpart(n)
+       mov     (up), %rax
+       mov     %rax, -8(rp)
+       lea     -8(rp), rp
+       lea     -8(up), up
+1:     shr     R32(n)
         jnc     1f
-       movq    (up), %r8
-       movq    -8(up), %r9
-       movq    %r8, -8(rp)
-       movq    %r9, -16(rp)
+       mov     (up), %rax
+       mov     -8(up), %r9
+       mov     %rax, -8(rp)
+       mov     %r9, -16(rp)
  1:     ret
  EPILOGUE()
diff --git a/mpn/x86_64/copyi.asm b/mpn/x86_64/copyi.asm

index 506142be79ad13522ffffe3484fd67a1c5682177..32f9e14c189c3621027dbdbbfdf48a0054d68bb0 100644 (file)
--- a/mpn/x86_64/copyi.asm
+++ b/mpn/x86_64/copyi.asm
@@ -1,6 +1,6 @@
  dnl  AMD64 mpn_copyi -- copy limb vector, incrementing.
  
-dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,55 +19,63 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
+C           cycles/limb
+C AMD K8,K9     1
+C AMD K10       1
+C AMD bd1       1.36
+C AMD bobcat    1.71
+C Intel P4      2-3
+C Intel core2   1
+C Intel NHM     1
+C Intel SBR     1
+C Intel atom    2
+C VIA nano      2
  
-C          cycles/limb
-C K8,K9:       1
-C K10:         1
-C P4:          2.8
-C P6-15:       1.2
  
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
  
-C INPUT PARAMETERS
-C rp   rdi
-C up   rsi
-C n    rdx
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
  
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
-       ALIGN(16)
+       ALIGN(64)
+       .byte   0,0,0,0,0,0
  PROLOGUE(mpn_copyi)
-       leaq    -8(rp), rp
-       subq    $4, n
+       lea     -8(rp), rp
+       sub     $4, n
         jc      L(end)
-       ALIGN(16)
-L(oop):        movq    (up), %r8
-       movq    8(up), %r9
-       leaq    32(rp), rp
-       movq    16(up), %r10
-       movq    24(up), %r11
-       leaq    32(up), up
-       movq    %r8, -24(rp)
-       movq    %r9, -16(rp)
-       subq    $4, n
-       movq    %r10, -8(rp)
-       movq    %r11, (rp)
-       jnc     L(oop)
  
-L(end):        shrl    %edx                    C edx = lowpart(n)
+L(top):        mov     (up), %rax
+       mov     8(up), %r9
+       lea     32(rp), rp
+       mov     16(up), %r10
+       mov     24(up), %r11
+       lea     32(up), up
+       mov     %rax, -24(rp)
+       mov     %r9, -16(rp)
+       sub     $4, n
+       mov     %r10, -8(rp)
+       mov     %r11, (rp)
+       jnc     L(top)
+
+L(end):        shr     R32(n)
         jnc     1f
-       movq    (up), %r8
-       movq    %r8, 8(rp)
-       leaq    8(rp), rp
-       leaq    8(up), up
-1:     shrl    %edx                    C edx = lowpart(n)
+       mov     (up), %rax
+       mov     %rax, 8(rp)
+       lea     8(rp), rp
+       lea     8(up), up
+1:     shr     R32(n)
         jnc     1f
-       movq    (up), %r8
-       movq    8(up), %r9
-       movq    %r8, 8(rp)
-       movq    %r9, 16(rp)
+       mov     (up), %rax
+       mov     8(up), %r9
+       mov     %rax, 8(rp)
+       mov     %r9, 16(rp)
  1:     ret
  EPILOGUE()
diff --git a/mpn/x86_64/core2/aorrlsh1_n.asm b/mpn/x86_64/core2/aorrlsh1_n.asm

new file mode 100644 (file)

index 0000000..a272ead
--- /dev/null
+++ b/mpn/x86_64/core2/aorrlsh1_n.asm
@@ -0,0 +1,42 @@
+dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n', `
+       define(ADDSUB,  add)
+       define(ADCSBB,  adc)
+       define(func,    mpn_addlsh1_n)')
+ifdef(`OPERATION_rsblsh1_n', `
+       define(ADDSUB,  sub)
+       define(ADCSBB,  sbb)
+       define(func,    mpn_rsblsh1_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/aorrlshC_n.asm')
diff --git a/mpn/x86_64/core2/aorrlsh2_n.asm b/mpn/x86_64/core2/aorrlsh2_n.asm

new file mode 100644 (file)

index 0000000..d4e92ba
--- /dev/null
+++ b/mpn/x86_64/core2/aorrlsh2_n.asm
@@ -0,0 +1,42 @@
+dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
+dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n', `
+       define(ADDSUB,  add)
+       define(ADCSBB,  adc)
+       define(func,    mpn_addlsh2_n)')
+ifdef(`OPERATION_rsblsh2_n', `
+       define(ADDSUB,  sub)
+       define(ADCSBB,  sbb)
+       define(func,    mpn_rsblsh2_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/aorrlshC_n.asm')
diff --git a/mpn/x86_64/core2/aorrlsh_n.asm b/mpn/x86_64/core2/aorrlsh_n.asm

new file mode 100644 (file)

index 0000000..4fc177f
--- /dev/null
+++ b/mpn/x86_64/core2/aorrlsh_n.asm
@@ -0,0 +1,27 @@
+dnl  AMD64 mpn_addlsh_n and mpn_rsblsh_n.  R = V2^k +- U.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/coreinhm/aorrlsh_n.asm')
diff --git a/mpn/x86_64/core2/aors_err1_n.asm b/mpn/x86_64/core2/aors_err1_n.asm

new file mode 100644 (file)

index 0000000..6ebb2cf
--- /dev/null
+++ b/mpn/x86_64/core2/aors_err1_n.asm
@@ -0,0 +1,214 @@
+dnl  Core 2 mpn_add_err1_n, mpn_sub_err1_n
+
+dnl  Contributed by David Harvey.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   4.14
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      ?
+
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`ep',   `%rcx')
+define(`yp',   `%r8')
+define(`n',    `%r9')
+define(`cy_param',     `8(%rsp)')
+
+define(`el',   `%rbx')
+define(`eh',   `%rbp')
+define(`t0',   `%r10')
+define(`t1',   `%r11')
+define(`t2',   `%r12')
+define(`t3',   `%r13')
+define(`w0',   `%r14')
+define(`w1',   `%r15')
+
+ifdef(`OPERATION_add_err1_n', `
+       define(ADCSBB,        adc)
+       define(func,          mpn_add_err1_n)')
+ifdef(`OPERATION_sub_err1_n', `
+       define(ADCSBB,        sbb)
+       define(func,          mpn_sub_err1_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n)
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       mov     cy_param, %rax
+
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       lea     (up,n,8), up
+       lea     (vp,n,8), vp
+       lea     (rp,n,8), rp
+
+       mov     R32(n), R32(%r10)
+       and     $3, R32(%r10)
+       jz      L(0mod4)
+       cmp     $2, R32(%r10)
+       jc      L(1mod4)
+       jz      L(2mod4)
+L(3mod4):
+       xor     R32(el), R32(el)
+       xor     R32(eh), R32(eh)
+       xor     R32(t0), R32(t0)
+       xor     R32(t1), R32(t1)
+       lea     -24(yp,n,8), yp
+       neg     n
+
+        shr     $1, %al            C restore carry
+        mov     (up,n,8), w0
+        mov     8(up,n,8), w1
+        ADCSBB  (vp,n,8), w0
+       mov     w0, (rp,n,8)
+       cmovc   16(yp), el
+        ADCSBB  8(vp,n,8), w1
+       mov     w1, 8(rp,n,8)
+       cmovc   8(yp), t0
+        mov     16(up,n,8), w0
+        ADCSBB  16(vp,n,8), w0
+       mov     w0, 16(rp,n,8)
+       cmovc   (yp), t1
+       setc    %al                C save carry
+       add     t0, el
+       adc     $0, eh
+       add     t1, el
+       adc     $0, eh
+
+       add     $3, n
+       jnz     L(loop)
+       jmp     L(end)
+
+       ALIGN(16)
+L(0mod4):
+       xor     R32(el), R32(el)
+       xor     R32(eh), R32(eh)
+       lea     (yp,n,8), yp
+       neg     n
+       jmp     L(loop)
+
+       ALIGN(16)
+L(1mod4):
+       xor     R32(el), R32(el)
+       xor     R32(eh), R32(eh)
+       lea     -8(yp,n,8), yp
+       neg     n
+
+        shr     $1, %al            C restore carry
+        mov     (up,n,8), w0
+        ADCSBB  (vp,n,8), w0
+        mov     w0, (rp,n,8)
+       cmovc   (yp), el
+       setc    %al                C save carry
+
+       add     $1, n
+       jnz     L(loop)
+       jmp     L(end)
+
+       ALIGN(16)
+L(2mod4):
+       xor     R32(el), R32(el)
+       xor     R32(eh), R32(eh)
+       xor     R32(t0), R32(t0)
+       lea     -16(yp,n,8), yp
+       neg     n
+
+        shr     $1, %al            C restore carry
+        mov     (up,n,8), w0
+        mov     8(up,n,8), w1
+        ADCSBB  (vp,n,8), w0
+        mov     w0, (rp,n,8)
+       cmovc   8(yp), el
+        ADCSBB  8(vp,n,8), w1
+        mov     w1, 8(rp,n,8)
+       cmovc   (yp), t0
+       setc    %al                C save carry
+       add     t0, el
+       adc     $0, eh
+
+       add     $2, n
+       jnz     L(loop)
+       jmp     L(end)
+
+       ALIGN(32)
+L(loop):
+        mov     (up,n,8), w0
+        shr     $1, %al            C restore carry
+        mov     -8(yp), t0
+       mov     $0, R32(t3)
+        ADCSBB  (vp,n,8), w0
+        cmovnc  t3, t0
+        mov     w0, (rp,n,8)
+        mov     8(up,n,8), w1
+        mov     16(up,n,8), w0
+        ADCSBB  8(vp,n,8), w1
+        mov     -16(yp), t1
+        cmovnc  t3, t1
+        mov     -24(yp), t2
+        mov     w1, 8(rp,n,8)
+        ADCSBB  16(vp,n,8), w0
+        cmovnc  t3, t2
+        mov     24(up,n,8), w1
+        ADCSBB  24(vp,n,8), w1
+        cmovc   -32(yp), t3
+       setc    %al                C save carry
+        add     t0, el
+        adc     $0, eh
+        add     t1, el
+        adc     $0, eh
+        add     t2, el
+        adc     $0, eh
+        lea     -32(yp), yp
+        mov     w0, 16(rp,n,8)
+        add     t3, el
+        adc     $0, eh
+        add     $4, n
+        mov     w1, -8(rp,n,8)
+       jnz     L(loop)
+
+L(end):
+       mov     el, (ep)
+       mov     eh, 8(ep)
+
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/core2/aors_n.asm b/mpn/x86_64/core2/aors_n.asm

index 3dc04d0b736552234dc5c497ccd9496dcc19f33b..020b880b40028c087866d72216055d2d4de8899d 100644 (file)
--- a/mpn/x86_64/core2/aors_n.asm
+++ b/mpn/x86_64/core2/aors_n.asm
@@ -1,6 +1,6 @@
  dnl  Intel P6-15 mpn_add_n/mpn_sub_n -- mpn add or subtract.
  
-dnl  Copyright 2006, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2006, 2007, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,11 +21,14 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        2.25
-C K10:          2
-C P4:          10
-C P6 core2:     2.05
-C P6 corei7:    2.3
+C AMD K8,K9     2.25
+C AMD K10       2
+C Intel P4     10
+C Intel core2   2.05
+C Intel NHM     2.3
+C Intel SBR     1.9
+C Intel atom    ?
+C VIA nano      ?
  
  C INPUT PARAMETERS
  define(`rp',   `%rdi')
@@ -45,16 +48,20 @@ ifdef(`OPERATION_sub_n', `
  
  MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
  
-ASM_START()
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
+ASM_START()
         TEXT
         ALIGN(16)
-
  PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
         jmp     L(start)
  EPILOGUE()
  
  PROLOGUE(func)
+       FUNC_ENTRY(4)
         xor     %r8, %r8
  L(start):
         mov     (up), %r10
@@ -63,12 +70,12 @@ L(start):
         lea     -8(up,n,8), up
         lea     -8(vp,n,8), vp
         lea     -16(rp,n,8), rp
-       mov     %ecx, %eax
+       mov     R32(%rcx), R32(%rax)
         neg     n
-       and     $3, %eax
+       and     $3, R32(%rax)
         je      L(b00)
-       add     %rax, n         C clear low rcx bits for jrcxz
-       cmp     $2, %eax
+       add     %rax, n                 C clear low rcx bits for jrcxz
+       cmp     $2, R32(%rax)
         jl      L(b01)
         je      L(b10)
  
@@ -91,8 +98,9 @@ L(b10):       shr     %r8                     C set cy
  
  L(end):        ADCSBB  %r11, %r10
         mov     %r10, 8(rp)
-       mov     %ecx, %eax              C clear eax, ecx contains 0
-       adc     %eax, %eax
+       mov     R32(%rcx), R32(%rax)    C clear eax, ecx contains 0
+       adc     R32(%rax), R32(%rax)
+       FUNC_EXIT()
         ret
  
         ALIGN(16)
diff --git a/mpn/x86_64/core2/aorslsh1_n.asm b/mpn/x86_64/core2/aorslsh1_n.asm

deleted file mode 100644 (file)

index 18db7c9..0000000
--- a/mpn/x86_64/core2/aorslsh1_n.asm
+++ /dev/null
@@ -1,151 +0,0 @@
-dnl  x86-64 mpn_addlsh1_n and mpn_sublsh1_n, optimized for "Core" 2.
-
-dnl  Copyright 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of the GNU Lesser General Public License as published
-dnl  by the Free Software Foundation; either version 3 of the License, or (at
-dnl  your option) any later version.
-
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-dnl  License for more details.
-
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles/limb
-C K8,K9:        4.25
-C K10:          ?
-C P4:           ?
-C P6-15:        3
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`vp',`%rdx')
-define(`n', `%rcx')
-
-ifdef(`OPERATION_addlsh1_n', `
-       define(ADDSUB,  add)
-       define(ADCSBB,  adc)
-       define(func,    mpn_addlsh1_n)')
-ifdef(`OPERATION_sublsh1_n', `
-       define(ADDSUB,  sub)
-       define(ADCSBB,  sbb)
-       define(func,    mpn_sublsh1_n)')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
-       TEXT
-       ALIGN(8)
-PROLOGUE(func)
-       push    %rbx
-       push    %r12
-
-       mov     R32(%rcx), R32(%rax)
-       lea     24(up,n,8), up
-       lea     24(vp,n,8), vp
-       lea     24(rp,n,8), rp
-       neg     n
-
-       xor     R32(%r11), R32(%r11)
-
-       mov     -24(vp,n,8), %r8        C do first limb early
-       shrd    $63, %r8, %r11
-
-       and     $3, R32(%rax)
-       je      L(b0)
-       cmp     $2, R32(%rax)
-       jc      L(b1)
-       je      L(b2)
-
-L(b3): mov     -16(vp,n,8), %r9
-       shrd    $63, %r9, %r8
-       mov     -8(vp,n,8), %r10
-       shrd    $63, %r10, %r9
-       mov     -24(up,n,8), %r12
-       ADDSUB  %r11, %r12
-       mov     %r12, -24(rp,n,8)
-       mov     -16(up,n,8), %r12
-       ADCSBB  %r8, %r12
-       mov     %r12, -16(rp,n,8)
-       mov     -8(up,n,8), %r12
-       ADCSBB  %r9, %r12
-       mov     %r12, -8(rp,n,8)
-       mov     %r10, %r11
-       sbb     R32(%rax), R32(%rax)    C save cy
-       add     $3, n
-       js      L(top)
-       jmp     L(end)
-
-L(b1): mov     -24(up,n,8), %r12
-       ADDSUB  %r11, %r12
-       mov     %r12, -24(rp,n,8)
-       mov     %r8, %r11
-       sbb     R32(%rax), R32(%rax)    C save cy
-       inc     n
-       js      L(top)
-       jmp     L(end)
-
-L(b2): mov     -16(vp,n,8), %r9
-       shrd    $63, %r9, %r8
-       mov     -24(up,n,8), %r12
-       ADDSUB  %r11, %r12
-       mov     %r12, -24(rp,n,8)
-       mov     -16(up,n,8), %r12
-       ADCSBB  %r8, %r12
-       mov     %r12, -16(rp,n,8)
-       mov     %r9, %r11
-       sbb     R32(%rax), R32(%rax)    C save cy
-       add     $2, n
-       js      L(top)
-       jmp     L(end)
-
-       ALIGN(16)
-L(top):        mov     -24(vp,n,8), %r8
-       shrd    $63, %r8, %r11
-L(b0): mov     -16(vp,n,8), %r9
-       shrd    $63, %r9, %r8
-       mov     -8(vp,n,8), %r10
-       shrd    $63, %r10, %r9
-       mov     (vp,n,8), %rbx
-       shrd    $63, %rbx, %r10
-
-       add     R32(%rax), R32(%rax)    C restore cy
-
-       mov     -24(up,n,8), %r12
-       ADCSBB  %r11, %r12
-       mov     %r12, -24(rp,n,8)
-
-       mov     -16(up,n,8), %r12
-       ADCSBB  %r8, %r12
-       mov     %r12, -16(rp,n,8)
-
-       mov     -8(up,n,8), %r12
-       ADCSBB  %r9, %r12
-       mov     %r12, -8(rp,n,8)
-
-       mov     (up,n,8), %r12
-       ADCSBB  %r10, %r12
-       mov     %r12, (rp,n,8)
-
-       mov     %rbx, %r11
-       sbb     R32(%rax), R32(%rax)    C save cy
-
-       add     $4, n
-       js      L(top)
-
-L(end):        add     %r11, %r11
-       pop     %r12
-       pop     %rbx
-       sbb     $0, R32(%rax)
-       neg     R32(%rax)
-       ret
-EPILOGUE()
diff --git a/mpn/x86_64/core2/aorsmul_1.asm b/mpn/x86_64/core2/aorsmul_1.asm

index 8dcccd9948277c907f92ff50e95a437c53688f20..4f889cfe91ef62ac03508d1ae123072355d27fe1 100644 (file)
--- a/mpn/x86_64/core2/aorsmul_1.asm
+++ b/mpn/x86_64/core2/aorsmul_1.asm
@@ -1,6 +1,7 @@
  dnl  x86-64 mpn_addmul_1 and mpn_submul_1, optimized for "Core 2".
  
-dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -20,11 +21,16 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C K8,K9:        4
-C K10:          4
-C P4:           ?
-C P6 core2:     4.3-4.5 (fluctuating)
-C P6 corei7:    5
+C AMD K8,K9     4
+C AMD K10       4
+C AMD bd1       5.1
+C AMD bobcat
+C Intel P4      ?
+C Intel core2   4.3-4.5 (fluctuating)
+C Intel NHM     5.0
+C Intel SBR     4.1
+C Intel atom    ?
+C VIA nano      5.25
  
  C INPUT PARAMETERS
  define(`rp',   `%rdi')
@@ -34,19 +40,50 @@ define(`v0',        `%rcx')
  
  ifdef(`OPERATION_addmul_1',`
        define(`ADDSUB',        `add')
-      define(`func',  `mpn_addmul_1')
+      define(`func',     `mpn_addmul_1')
+      define(`func_1c',  `mpn_addmul_1c')
  ')
  ifdef(`OPERATION_submul_1',`
        define(`ADDSUB',        `sub')
-      define(`func',  `mpn_submul_1')
+      define(`func',     `mpn_submul_1')
+      define(`func_1c',  `mpn_submul_1c')
  ')
  
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+       C For DOS, on the stack we have four saved registers, return address,
+       C space for four register arguments, and finally the carry input.
+
+IFDOS(` define(`carry_in', `72(%rsp)')') dnl
+IFSTD(` define(`carry_in', `%r8')') dnl
  
  ASM_START()
         TEXT
+       ALIGN(16)
+PROLOGUE(func_1c)
+       FUNC_ENTRY(4)
+       push    %rbx
+       push    %rbp
+       lea     (%rdx), %rbx
+       neg     %rbx
+
+       mov     (up), %rax
+       mov     (rp), %r10
+
+       lea     -16(rp,%rdx,8), rp
+       lea     (up,%rdx,8), up
+       mul     %rcx
+       add     carry_in, %rax
+       adc     $0, %rdx
+       jmp     L(start_nc)
+EPILOGUE()
+
         ALIGN(16)
  PROLOGUE(func)
+       FUNC_ENTRY(4)
         push    %rbx
         push    %rbp
         lea     (%rdx), %rbx
@@ -59,6 +96,7 @@ PROLOGUE(func)
         lea     (up,%rdx,8), up
         mul     %rcx
  
+L(start_nc):
         bt      $0, R32(%rbx)
         jc      L(odd)
  
@@ -125,5 +163,6 @@ L(n1):      mov     8(rp), %r10
         adc     %rdx, %rax
         pop     %rbp
         pop     %rbx
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/core2/copyd.asm b/mpn/x86_64/core2/copyd.asm

new file mode 100644 (file)

index 0000000..6c6e9db
--- /dev/null
+++ b/mpn/x86_64/core2/copyd.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_copyd optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyd)
+include_mpn(`x86_64/fastsse/copyd-palignr.asm')
diff --git a/mpn/x86_64/core2/copyi.asm b/mpn/x86_64/core2/copyi.asm

new file mode 100644 (file)

index 0000000..4714449
--- /dev/null
+++ b/mpn/x86_64/core2/copyi.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_copyi optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyi)
+include_mpn(`x86_64/fastsse/copyi-palignr.asm')
diff --git a/mpn/x86_64/core2/divrem_1.asm b/mpn/x86_64/core2/divrem_1.asm

new file mode 100644 (file)

index 0000000..d5d2ad8
--- /dev/null
+++ b/mpn/x86_64/core2/divrem_1.asm
@@ -0,0 +1,227 @@
+dnl  x86-64 mpn_divrem_1 -- mpn by limb division.
+
+dnl  Copyright 2004, 2005, 2007, 2008, 2009, 2010, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C              norm    unorm   frac
+C AMD K8,K9    15      15      12
+C AMD K10      15      15      12
+C Intel P4     44      44      43
+C Intel core2  24      24      19.5
+C Intel corei  19      19      18
+C Intel atom   51      51      36
+C VIA nano     46      44      22.5
+
+C mp_limb_t
+C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
+C               mp_srcptr np, mp_size_t nn, mp_limb_t d)
+
+C mp_limb_t
+C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
+C                      mp_srcptr np, mp_size_t nn, mp_limb_t d,
+C                      mp_limb_t dinv, int cnt)
+
+C INPUT PARAMETERS
+define(`qp',           `%rdi')
+define(`fn_param',     `%rsi')
+define(`up_param',     `%rdx')
+define(`un_param',     `%rcx')
+define(`d',            `%r8')
+define(`dinv',         `%r9')          C only for mpn_preinv_divrem_1
+C       shift passed on stack          C only for mpn_preinv_divrem_1
+
+define(`cnt',          `%rcx')
+define(`up',           `%rsi')
+define(`fn',           `%r12')
+define(`un',           `%rbx')
+
+
+C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
+C         cnt         qp      d  dinv
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFSTD(`define(`CNTOFF',                `40($1)')')
+IFDOS(`define(`CNTOFF',                `104($1)')')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_preinv_divrem_1)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+IFDOS(`        mov     64(%rsp), %r9   ')
+       xor     R32(%rax), R32(%rax)
+       push    %r13
+       push    %r12
+       push    %rbp
+       push    %rbx
+
+       mov     fn_param, fn
+       mov     un_param, un
+       add     fn_param, un_param
+       mov     up_param, up
+
+       lea     -8(qp,un_param,8), qp
+
+       mov     CNTOFF(%rsp), R8(cnt)
+       shl     R8(cnt), d
+       jmp     L(ent)
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(mpn_divrem_1)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       xor     R32(%rax), R32(%rax)
+       push    %r13
+       push    %r12
+       push    %rbp
+       push    %rbx
+
+       mov     fn_param, fn
+       mov     un_param, un
+       add     fn_param, un_param
+       mov     up_param, up
+       je      L(ret)
+
+       lea     -8(qp,un_param,8), qp
+       xor     R32(%rbp), R32(%rbp)
+
+L(unnormalized):
+       test    un, un
+       je      L(44)
+       mov     -8(up,un,8), %rax
+       cmp     d, %rax
+       jae     L(44)
+       mov     %rbp, (qp)
+       mov     %rax, %rbp
+       lea     -8(qp), qp
+       je      L(ret)
+       dec     un
+L(44):
+       bsr     d, %rcx
+       not     R32(%rcx)
+       sal     R8(%rcx), d
+       sal     R8(%rcx), %rbp
+
+       push    %rcx
+IFSTD(`        push    %rdi            ')
+IFSTD(`        push    %rsi            ')
+       push    %r8
+IFSTD(`        mov     d, %rdi         ')
+IFDOS(`        mov     d, %rcx         ')
+       CALL(   mpn_invert_limb)
+       pop     %r8
+IFSTD(`        pop     %rsi            ')
+IFSTD(`        pop     %rdi            ')
+       pop     %rcx
+
+       mov     %rax, dinv
+       mov     %rbp, %rax
+       test    un, un
+       je      L(frac)
+L(ent):        mov     -8(up,un,8), %rbp
+       shr     R8(%rcx), %rax
+       shld    R8(%rcx), %rbp, %rax
+       sub     $2, un
+       js      L(end)
+
+       ALIGN(16)
+L(top):        lea     1(%rax), %r11
+       mul     dinv
+       mov     (up,un,8), %r10
+       shld    R8(%rcx), %r10, %rbp
+       mov     %rbp, %r13
+       add     %rax, %r13
+       adc     %r11, %rdx
+       mov     %rdx, %r11
+       imul    d, %rdx
+       sub     %rdx, %rbp
+       lea     (d,%rbp), %rax
+       sub     $8, qp
+       cmp     %r13, %rbp
+       cmovc   %rbp, %rax
+       adc     $-1, %r11
+       cmp     d, %rax
+       jae     L(ufx)
+L(uok):        dec     un
+       mov     %r11, 8(qp)
+       mov     %r10, %rbp
+       jns     L(top)
+
+L(end):        lea     1(%rax), %r11
+       sal     R8(%rcx), %rbp
+       mul     dinv
+       add     %rbp, %rax
+       adc     %r11, %rdx
+       mov     %rax, %r11
+       mov     %rdx, %r13
+       imul    d, %rdx
+       sub     %rdx, %rbp
+       mov     d, %rax
+       add     %rbp, %rax
+       cmp     %r11, %rbp
+       cmovc   %rbp, %rax
+       adc     $-1, %r13
+       cmp     d, %rax
+       jae     L(efx)
+L(eok):        mov     %r13, (qp)
+       sub     $8, qp
+       jmp     L(frac)
+
+L(ufx):        sub     d, %rax
+       inc     %r11
+       jmp     L(uok)
+L(efx):        sub     d, %rax
+       inc     %r13
+       jmp     L(eok)
+
+L(frac):mov    d, %rbp
+       neg     %rbp
+       jmp     L(fent)
+
+       ALIGN(16)                       C           K8-K10  P6-CNR P6-NHM  P4
+L(ftop):mul    dinv                    C             0,12   0,17   0,17
+       add     %r11, %rdx              C             5      8     10
+       mov     %rax, %r11              C             4      8      3
+       mov     %rdx, %r13              C             6      9     11
+       imul    %rbp, %rdx              C             6      9     11
+       mov     d, %rax                 C
+       add     %rdx, %rax              C            10     14     14
+       cmp     %r11, %rdx              C            10     14     14
+       cmovc   %rdx, %rax              C            11     15     15
+       adc     $-1, %r13               C
+       mov     %r13, (qp)              C
+       sub     $8, qp                  C
+L(fent):lea    1(%rax), %r11           C
+       dec     fn                      C
+       jns     L(ftop)                 C
+
+       shr     R8(%rcx), %rax
+L(ret):        pop     %rbx
+       pop     %rbp
+       pop     %r12
+       pop     %r13
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/core2/gcd_1.asm b/mpn/x86_64/core2/gcd_1.asm

new file mode 100644 (file)

index 0000000..2116555
--- /dev/null
+++ b/mpn/x86_64/core2/gcd_1.asm
@@ -0,0 +1,134 @@
+dnl  AMD64 mpn_gcd_1 optimised for Intel C2, NHM, SBR and AMD K10, BD.
+
+dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
+dnl  Granlund.
+
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/bit (approx)
+C AMD K8,K9     8.50
+C AMD K10       4.30
+C AMD bd1       5.00
+C AMD bobcat   10.0
+C Intel P4     18.6
+C Intel core2   3.83
+C Intel NHM     5.17
+C Intel SBR     4.69
+C Intel atom   17.0
+C VIA nano      5.44
+C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
+
+C TODO
+C  * Optimise inner-loop for specific CPUs.
+C  * Use DIV for 1-by-1 reductions, at least for some CPUs.
+
+C Threshold of when to call bmod when U is one limb.  Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 6)
+
+C INPUT PARAMETERS
+define(`up',    `%rdi')
+define(`n',     `%rsi')
+define(`v0',    `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(`define(`STACK_ALLOC', 40)')
+IFSTD(`define(`STACK_ALLOC', 8)')
+
+C Undo some configure cleverness.
+C The problem is that C only defines the '1c' variant, and that configure
+C therefore considers modexact_1c to be the base function.  It then adds a
+C special fat rule for mpn_modexact_1_odd, messing up things when a cpudep
+C gcd_1 exists without a corresponding cpudep mode1o.
+ifdef(`WANT_FAT_BINARY', `
+  define(`mpn_modexact_1_odd', `MPN_PREFIX`modexact_1_odd_x86_64'')')
+
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+       FUNC_ENTRY(3)
+       mov     (up), %rax      C U low limb
+       or      v0, %rax
+       bsf     %rax, %rax      C min(ctz(u0),ctz(v0))
+
+       bsf     v0, %rcx
+       shr     R8(%rcx), v0
+
+       push    %rax            C preserve common twos over call
+       push    v0              C preserve v0 argument over call
+       sub     $STACK_ALLOC, %rsp      C maintain ABI required rsp alignment
+
+       cmp     $1, n
+       jnz     L(reduce_nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+       mov     (up), %r8
+       mov     %r8, %rax
+       shr     $BMOD_THRES_LOG2, %r8
+       cmp     %r8, v0
+       ja      L(reduced)
+       jmp     L(bmod)
+
+L(reduce_nby1):
+       cmp     $BMOD_1_TO_MOD_1_THRESHOLD, n
+       jl      L(bmod)
+IFDOS(`        mov     %rdx, %r8       ')
+IFDOS(`        mov     %rsi, %rdx      ')
+IFDOS(`        mov     %rdi, %rcx      ')
+       CALL(   mpn_mod_1)
+       jmp     L(reduced)
+L(bmod):
+IFDOS(`        mov     %rdx, %r8       ')
+IFDOS(`        mov     %rsi, %rdx      ')
+IFDOS(`        mov     %rdi, %rcx      ')
+       CALL(   mpn_modexact_1_odd)
+L(reduced):
+
+       add     $STACK_ALLOC, %rsp
+       pop     %rdx
+
+       bsf     %rax, %rcx
+C      test    %rax, %rax      C FIXME: does this lower latency?
+       jnz     L(mid)
+       jmp     L(end)
+
+       ALIGN(16)               C               K10   BD    C2    NHM   SBR
+L(top):        cmovc   %r10, %rax      C if x-y < 0    0,3   0,3   0,6   0,5   0,5
+       cmovc   %r9, %rdx       C use x,y-x     0,3   0,3   2,8   1,7   1,7
+L(mid):        shr     R8(%rcx), %rax  C               1,7   1,6   2,8   2,8   2,8
+       mov     %rdx, %r10      C               1     1     4     3     3
+       sub     %rax, %r10      C               2     2     5     4     4
+       bsf     %r10, %rcx      C               3     3     6     5     5
+       mov     %rax, %r9       C               2     2     3     3     4
+       sub     %rdx, %rax      C               2     2     4     3     4
+       jnz     L(top)          C
+
+L(end):        pop     %rcx
+       mov     %rdx, %rax
+       shl     R8(%rcx), %rax
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/core2/gmp-mparam.h b/mpn/x86_64/core2/gmp-mparam.h

index 3c78e2963c4c9f21879a0ac4833b811c81b60ff5..f5593ea4165eaff7387793b9485f1c3d7facfe65 100644 (file)
--- a/mpn/x86_64/core2/gmp-mparam.h
+++ b/mpn/x86_64/core2/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* Core 2 gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,121 +25,127 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         5
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         8
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        15
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      6
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           26
+#define BMOD_1_TO_MOD_1_THRESHOLD           23
  
  #define MUL_TOOM22_THRESHOLD                23
  #define MUL_TOOM33_THRESHOLD                65
-#define MUL_TOOM44_THRESHOLD               183
-#define MUL_TOOM6H_THRESHOLD               254
-#define MUL_TOOM8H_THRESHOLD               381
+#define MUL_TOOM44_THRESHOLD               106
+#define MUL_TOOM6H_THRESHOLD               224
+#define MUL_TOOM8H_THRESHOLD                 0  /* always */
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD      69
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD      76
  #define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      74
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      72
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     100
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
  #define SQR_TOOM2_THRESHOLD                 28
-#define SQR_TOOM3_THRESHOLD                 97
+#define SQR_TOOM3_THRESHOLD                101
  #define SQR_TOOM4_THRESHOLD                148
-#define SQR_TOOM6_THRESHOLD                254
+#define SQR_TOOM6_THRESHOLD                206
  #define SQR_TOOM8_THRESHOLD                296
  
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               14
+#define MULMID_TOOM42_THRESHOLD             24
+
+#define MULMOD_BNM1_THRESHOLD               18
+#define SQRMOD_BNM1_THRESHOLD               17
  
  #define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
    { {    380, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
-    {      9, 5}, {     19, 6}, {     11, 5}, {     23, 6}, \
-    {     19, 7}, {     10, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 6}, {     27, 7}, {     24, 8}, \
+    {      9, 5}, {     19, 6}, {     19, 7}, {     10, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     13, 6}, \
+    {     27, 7}, {     21, 8}, {     11, 7}, {     23, 8}, \
      {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
      {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
      {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
      {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     63,10}, {     39, 9}, {     79,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     87,11}, \
-    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
-    {    255,10}, {    143,11}, {     79, 9}, {    319,11}, \
-    {     95,10}, {    207,11}, {    111,12}, {     63,11}, \
-    {    143,10}, {    287,11}, {    159,10}, {    319,11}, \
-    {    175,12}, {     95,11}, {    191,10}, {    383,11}, \
+    {     47, 9}, {     27,10}, {     15, 9}, {     43,10}, \
+    {     23, 9}, {     55,11}, {     15,10}, {     31, 9}, \
+    {     67,10}, {     39, 9}, {     79,10}, {     55,11}, \
+    {     31, 9}, {    127,10}, {     71, 8}, {    287,10}, \
+    {     79,11}, {     47,12}, {     31,11}, {     63, 9}, \
+    {    255,10}, {    135, 9}, {    271,11}, {     79, 9}, \
+    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
+    {    383,11}, {    111,12}, {     63,11}, {    127,10}, \
+    {    271, 9}, {    543,11}, {    143,10}, {    287,11}, \
+    {    159,10}, {    319, 9}, {    639,11}, {    175,10}, \
+    {    351,12}, {     95,11}, {    191,10}, {    383,11}, \
      {    207,10}, {    415,13}, {     63,12}, {    127,11}, \
-    {    287,10}, {    575,12}, {    159,11}, {    319,10}, \
-    {    639,11}, {    351,10}, {    703,11}, {    367,12}, \
-    {    191,11}, {    415,10}, {    831,12}, {    223,11}, \
-    {    447,10}, {    895,11}, {    479,13}, {    127,12}, \
-    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
-    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,12}, {    479,14}, {    127,13}, {    255,12}, \
-    {    543,11}, {   1087,12}, {    607,13}, {    319,12}, \
-    {    735,13}, {    383,12}, {    831,13}, {    447,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1087,13}, \
-    {    575,12}, {   1215,13}, {    639,12}, {   1279,13}, \
-    {    703,14}, {    383,13}, {    831,12}, {   1663,13}, \
-    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1215,14}, {    639,13}, {   1343,12}, \
-    {   2687,13}, {   1407,12}, {   2815,13}, {   1471,14}, \
-    {    767,13}, {   1535,12}, {   3071,13}, {   1663,14}, \
-    {    895,13}, {   1791,12}, {   3583,13}, {   1919,15}, \
+    {    271,10}, {    543,11}, {    287,12}, {    159,11}, \
+    {    319,10}, {    671,11}, {    351,12}, {    191,11}, \
+    {    415,12}, {    223,11}, {    447,10}, {    895,11}, \
+    {    479,13}, {    127,12}, {    287,11}, {    607,12}, \
+    {    319,11}, {    671,12}, {    351,13}, {    191,12}, \
+    {    415,11}, {    831,10}, {   1663,12}, {    479,14}, \
+    {    127,13}, {    255,12}, {    543,11}, {   1087,12}, \
+    {    607,13}, {    319,12}, {    703,13}, {    383,12}, \
+    {    767,10}, {   3071,12}, {    831,13}, {    447,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1023,13}, \
+    {    575,12}, {   1151,11}, {   2303,13}, {    639,12}, \
+    {   1343,13}, {    703,14}, {    383,13}, {    831,12}, \
+    {   1727,13}, {    959,15}, {    255,14}, {    511,13}, \
+    {   1087,12}, {   2175,13}, {   1215,14}, {    639,13}, \
+    {   1343,12}, {   2687,13}, {   1407,12}, {   2815,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1919,15}, \
      {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2303,12}, {   4607,13}, {   2431,12}, {   4863,14}, \
-    {   1279,13}, {   2559,14}, {   1407,15}, {    767,14}, \
-    {   1535,13}, {   3199,14}, {   1663,13}, {   3455,12}, \
-    {   6911,14}, {   1791,13}, {   3583,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 173
+    {   2431,12}, {   4863,13}, {   2495,14}, {   1279,13}, \
+    {   2687,14}, {   1407,15}, {    767,14}, {   1663,13}, \
+    {   3327,12}, {   6655,13}, {   3455,12}, {   6911,14}, \
+    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 170
  #define MUL_FFT_THRESHOLD                 4736
  
-#define SQR_FFT_MODF_THRESHOLD             256  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    256, 5}, {      8, 4}, {     17, 5}, {      9, 4}, \
-    {     19, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
      {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
-    {     25, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
+    {     25, 7}, {     21, 8}, {     11, 7}, {     24, 8}, \
      {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
      {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
      {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
      {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
-    {     63,10}, {     39, 9}, {     79,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63, 8}, {    511,10}, {    135, 9}, \
-    {    271,10}, {    143,11}, {     79,10}, {    159, 9}, \
-    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207,11}, {    111,12}, {     63,11}, \
-    {    127,10}, {    271,11}, {    143,10}, {    287, 9}, \
-    {    575,10}, {    303,11}, {    159,10}, {    319, 9}, \
-    {    639,12}, {     95,11}, {    191,10}, {    383,11}, \
-    {    207,13}, {     63,12}, {    127,11}, {    271,10}, \
-    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
-    {    351,12}, {    191,11}, {    415,12}, {    223,11}, \
-    {    447,10}, {    895,11}, {    479,13}, {    127,12}, \
-    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
-    {    319,11}, {    639,12}, {    351,13}, {    191,12}, \
-    {    415,11}, {    831,12}, {    479,11}, {    959,14}, \
-    {    127,13}, {    255,12}, {    607,13}, {    319,12}, \
-    {    703,13}, {    383,12}, {    831,13}, {    447,12}, \
-    {    895,14}, {    255,13}, {    511,12}, {   1023,13}, \
-    {    575,12}, {   1215,13}, {    639,12}, {   1279,13}, \
-    {    703,14}, {    383,13}, {    767,12}, {   1535,13}, \
-    {    831,12}, {   1663,13}, {    959,15}, {    255,14}, \
-    {    511,13}, {   1087,12}, {   2175,13}, {   1215,14}, \
-    {    639,13}, {   1343,12}, {   2687,13}, {   1407,12}, \
-    {   2815,14}, {    767,13}, {   1663,14}, {    895,13}, \
+    {     23, 9}, {     59,11}, {     15,10}, {     31, 8}, \
+    {    125, 9}, {     67,10}, {     39, 9}, {     79,10}, \
+    {     47, 9}, {    103,10}, {     79, 9}, {    159,10}, \
+    {     87, 9}, {    175, 8}, {    351,11}, {     47,10}, \
+    {     95,11}, {     63,10}, {    127, 8}, {    511, 9}, \
+    {    271, 8}, {    543,11}, {     79,10}, {    175,11}, \
+    {     95,10}, {    191, 9}, {    415,12}, {     63,11}, \
+    {    127,10}, {    255,11}, {    143,10}, {    287, 9}, \
+    {    575,11}, {    159,10}, {    319,11}, {    175,10}, \
+    {    351,12}, {     95,11}, {    191,10}, {    383,11}, \
+    {    207,10}, {    415,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511, 9}, {   1023,10}, {    543,11}, \
+    {    287,10}, {    575,11}, {    303,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    351,12}, {    191,11}, \
+    {    383,10}, {    767,11}, {    415,10}, {    831,12}, \
+    {    223,11}, {    479,13}, {    127,12}, {    255,11}, \
+    {    543,12}, {    287,11}, {    575,12}, {    319,11}, \
+    {    639,12}, {    351,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,12}, {    447,11}, \
+    {    895,12}, {    479,14}, {    127,13}, {    255,12}, \
+    {    543,11}, {   1087,12}, {    607,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    703,13}, {    383,12}, \
+    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1087,13}, {    575,12}, {   1215,13}, \
+    {    639,12}, {   1279,13}, {    703,14}, {    383,13}, \
+    {    767,12}, {   1535,13}, {    831,12}, {   1663,13}, \
+    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
+    {   2175,13}, {   1215,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1407,12}, {   2815,14}, {    767,13}, \
+    {   1535,12}, {   3071,13}, {   1663,14}, {    895,13}, \
      {   1791,15}, {    511,14}, {   1023,13}, {   2175,14}, \
      {   1151,13}, {   2303,12}, {   4607,13}, {   2431,12}, \
      {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
@@ -148,39 +154,47 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      {   6911,14}, {   1791,16}, {  65536,17}, { 131072,18}, \
      { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
      {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 166
-#define SQR_FFT_THRESHOLD                 3200
+#define SQR_FFT_TABLE3_SIZE 178
+#define SQR_FFT_THRESHOLD                 3520
  
-#define MULLO_BASECASE_THRESHOLD             3
-#define MULLO_DC_THRESHOLD                  20
-#define MULLO_MUL_N_THRESHOLD             8648
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  55
+#define MULLO_MUL_N_THRESHOLD             9174
  
-#define DC_DIV_QR_THRESHOLD                 46
-#define DC_DIVAPPR_Q_THRESHOLD             190
-#define DC_BDIV_QR_THRESHOLD                57
-#define DC_BDIV_Q_THRESHOLD                156
+#define DC_DIV_QR_THRESHOLD                 54
+#define DC_DIVAPPR_Q_THRESHOLD             179
+#define DC_BDIV_QR_THRESHOLD                53
+#define DC_BDIV_Q_THRESHOLD                125
  
-#define INV_MULMOD_BNM1_THRESHOLD           50
-#define INV_NEWTON_THRESHOLD               172
+#define INV_MULMOD_BNM1_THRESHOLD           62
+#define INV_NEWTON_THRESHOLD               173
  #define INV_APPR_THRESHOLD                 172
  
-#define BINV_NEWTON_THRESHOLD              240
+#define BINV_NEWTON_THRESHOLD              230
  #define REDC_1_TO_REDC_2_THRESHOLD          10
  #define REDC_2_TO_REDC_N_THRESHOLD          63
  
  #define MU_DIV_QR_THRESHOLD               1334
-#define MU_DIVAPPR_Q_THRESHOLD            1334
-#define MUPI_DIV_QR_THRESHOLD               81
-#define MU_BDIV_QR_THRESHOLD              1037
-#define MU_BDIV_Q_THRESHOLD               1334
-
-#define MATRIX22_STRASSEN_THRESHOLD         18
-#define HGCD_THRESHOLD                     138
-#define GCD_DC_THRESHOLD                   465
-#define GCDEXT_DC_THRESHOLD                365
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                 9
-#define GET_STR_PRECOMPUTE_THRESHOLD        20
+#define MU_DIVAPPR_Q_THRESHOLD            1210
+#define MUPI_DIV_QR_THRESHOLD               79
+#define MU_BDIV_QR_THRESHOLD              1057
+#define MU_BDIV_Q_THRESHOLD               1187
+
+#define POWM_SEC_TABLE  2,65,322,780
+
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                     135
+#define HGCD_APPR_THRESHOLD                178
+#define HGCD_REDUCE_THRESHOLD             2121
+#define GCD_DC_THRESHOLD                   416
+#define GCDEXT_DC_THRESHOLD                361
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        21
  #define SET_STR_DC_THRESHOLD               552
-#define SET_STR_PRECOMPUTE_THRESHOLD      1790
+#define SET_STR_PRECOMPUTE_THRESHOLD      1815
+
+#define FAC_DSC_THRESHOLD                  608
+#define FAC_ODD_THRESHOLD                   28
diff --git a/mpn/x86_64/core2/lshift.asm b/mpn/x86_64/core2/lshift.asm

index e3e4008746969f182f24e169c2726bf1f9a07f30..f076eb8316bba64fe9fcbb1fd6b06d10b07656d4 100644 (file)
--- a/mpn/x86_64/core2/lshift.asm
+++ b/mpn/x86_64/core2/lshift.asm
@@ -1,19 +1,19 @@
  dnl  x86-64 mpn_lshift optimized for "Core 2".
  
-dnl  Copyright 2007, 2009 Free Software Foundation, Inc.
-dnl
+dnl  Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
+
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
@@ -21,45 +21,52 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        4.25
-C K10:          4.25
-C P4:          14.7
-C P6 core2:     1.27
-C P6 corei7:    1.5
+C AMD K8,K9     4.25
+C AMD K10       4.25
+C Intel P4     14.7
+C Intel core2   1.27
+C Intel NHM     1.375  (up to about n = 260, then 1.5)
+C Intel SBR     1.87
+C Intel atom    ?
+C VIA nano      ?
  
  
  C INPUT PARAMETERS
  define(`rp',   `%rdi')
  define(`up',   `%rsi')
  define(`n',    `%rdx')
-define(`cnt',  `%cl')
+define(`cnt',  `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_lshift)
+       FUNC_ENTRY(4)
         lea     -8(rp,n,8), rp
         lea     -8(up,n,8), up
  
-       mov     %edx, %eax
-       and     $3, %eax
+       mov     R32(%rdx), R32(%rax)
+       and     $3, R32(%rax)
         jne     L(nb00)
  L(b00):        C n = 4, 8, 12, ...
         mov     (up), %r10
         mov     -8(up), %r11
-       xor     %eax, %eax
-       shld    %cl, %r10, %rax
+       xor     R32(%rax), R32(%rax)
+       shld    R8(cnt), %r10, %rax
         mov     -16(up), %r8
         lea     24(rp), rp
         sub     $4, n
         jmp     L(00)
  
  L(nb00):C n = 1, 5, 9, ...
-       cmp     $2, %eax
+       cmp     $2, R32(%rax)
         jae     L(nb01)
  L(b01):        mov     (up), %r9
-       xor     %eax, %eax
-       shld    %cl, %r9, %rax
+       xor     R32(%rax), R32(%rax)
+       shld    R8(cnt), %r9, %rax
         sub     $2, n
         jb      L(le1)
         mov     -8(up), %r10
@@ -67,50 +74,52 @@ L(b01):     mov     (up), %r9
         lea     -8(up), up
         lea     16(rp), rp
         jmp     L(01)
-L(le1):        shl     %cl, %r9
+L(le1):        shl     R8(cnt), %r9
         mov     %r9, (rp)
+       FUNC_EXIT()
         ret
  
  L(nb01):C n = 2, 6, 10, ...
         jne     L(b11)
  L(b10):        mov     (up), %r8
         mov     -8(up), %r9
-       xor     %eax, %eax
-       shld    %cl, %r8, %rax
+       xor     R32(%rax), R32(%rax)
+       shld    R8(cnt), %r8, %rax
         sub     $3, n
         jb      L(le2)
         mov     -16(up), %r10
         lea     -16(up), up
         lea     8(rp), rp
         jmp     L(10)
-L(le2):        shld    %cl, %r9, %r8
+L(le2):        shld    R8(cnt), %r9, %r8
         mov     %r8, (rp)
-       shl     %cl, %r9
+       shl     R8(cnt), %r9
         mov     %r9, -8(rp)
+       FUNC_EXIT()
         ret
  
         ALIGN(16)                       C performance critical!
  L(b11):        C n = 3, 7, 11, ...
         mov     (up), %r11
         mov     -8(up), %r8
-       xor     %eax, %eax
-       shld    %cl, %r11, %rax
+       xor     R32(%rax), R32(%rax)
+       shld    R8(cnt), %r11, %rax
         mov     -16(up), %r9
         lea     -24(up), up
         sub     $4, n
         jb      L(end)
  
         ALIGN(16)
-L(top):        shld    %cl, %r8, %r11
+L(top):        shld    R8(cnt), %r8, %r11
         mov     (up), %r10
         mov     %r11, (rp)
-L(10): shld    %cl, %r9, %r8
+L(10): shld    R8(cnt), %r9, %r8
         mov     -8(up), %r11
         mov     %r8, -8(rp)
-L(01): shld    %cl, %r10, %r9
+L(01): shld    R8(cnt), %r10, %r9
         mov     -16(up), %r8
         mov     %r9, -16(rp)
-L(00): shld    %cl, %r11, %r10
+L(00): shld    R8(cnt), %r11, %r10
         mov     -24(up), %r9
         mov     %r10, -24(rp)
         add     $-32, up
@@ -118,11 +127,12 @@ L(00):    shld    %cl, %r11, %r10
         sub     $4, n
         jnc     L(top)
  
-L(end):        shld    %cl, %r8, %r11
+L(end):        shld    R8(cnt), %r8, %r11
         mov     %r11, (rp)
-       shld    %cl, %r9, %r8
+       shld    R8(cnt), %r9, %r8
         mov     %r8, -8(rp)
-       shl     %cl, %r9
+       shl     R8(cnt), %r9
         mov     %r9, -16(rp)
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/core2/lshiftc.asm b/mpn/x86_64/core2/lshiftc.asm

index bc014c8558d6bd27438141c417a0d829b0b4891d..fb46f966cdc0a4a0d88ed3f0939dc2b31256310f 100644 (file)
--- a/mpn/x86_64/core2/lshiftc.asm
+++ b/mpn/x86_64/core2/lshiftc.asm
@@ -1,19 +1,19 @@
  dnl  x86-64 mpn_lshiftc optimized for "Core 2".
  
-dnl  Copyright 2007, 2009 Free Software Foundation, Inc.
-dnl
+dnl  Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
+
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
@@ -21,45 +21,52 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        ?
-C K10:          ?
-C P4:           ?
-C P6 core2:     1.5
-C P6 corei7:    1.75
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   1.5
+C Intel NHM     2.25   (up to about n = 260, then 1.875)
+C Intel SBR     2.25
+C Intel atom    ?
+C VIA nano      ?
  
  
  C INPUT PARAMETERS
  define(`rp',   `%rdi')
  define(`up',   `%rsi')
  define(`n',    `%rdx')
-define(`cnt',  `%cl')
+define(`cnt',  `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_lshiftc)
+       FUNC_ENTRY(4)
         lea     -8(rp,n,8), rp
         lea     -8(up,n,8), up
  
-       mov     %edx, %eax
-       and     $3, %eax
+       mov     R32(%rdx), R32(%rax)
+       and     $3, R32(%rax)
         jne     L(nb00)
  L(b00):        C n = 4, 8, 12, ...
         mov     (up), %r10
         mov     -8(up), %r11
-       xor     %eax, %eax
-       shld    %cl, %r10, %rax
+       xor     R32(%rax), R32(%rax)
+       shld    R8(cnt), %r10, %rax
         mov     -16(up), %r8
         lea     24(rp), rp
         sub     $4, n
         jmp     L(00)
  
  L(nb00):C n = 1, 5, 9, ...
-       cmp     $2, %eax
+       cmp     $2, R32(%rax)
         jae     L(nb01)
  L(b01):        mov     (up), %r9
-       xor     %eax, %eax
-       shld    %cl, %r9, %rax
+       xor     R32(%rax), R32(%rax)
+       shld    R8(cnt), %r9, %rax
         sub     $2, n
         jb      L(le1)
         mov     -8(up), %r10
@@ -67,56 +74,58 @@ L(b01):     mov     (up), %r9
         lea     -8(up), up
         lea     16(rp), rp
         jmp     L(01)
-L(le1):        shl     %cl, %r9
+L(le1):        shl     R8(cnt), %r9
         not     %r9
         mov     %r9, (rp)
+       FUNC_EXIT()
         ret
  
  L(nb01):C n = 2, 6, 10, ...
         jne     L(b11)
  L(b10):        mov     (up), %r8
         mov     -8(up), %r9
-       xor     %eax, %eax
-       shld    %cl, %r8, %rax
+       xor     R32(%rax), R32(%rax)
+       shld    R8(cnt), %r8, %rax
         sub     $3, n
         jb      L(le2)
         mov     -16(up), %r10
         lea     -16(up), up
         lea     8(rp), rp
         jmp     L(10)
-L(le2):        shld    %cl, %r9, %r8
+L(le2):        shld    R8(cnt), %r9, %r8
         not     %r8
         mov     %r8, (rp)
-       shl     %cl, %r9
+       shl     R8(cnt), %r9
         not     %r9
         mov     %r9, -8(rp)
+       FUNC_EXIT()
         ret
  
         ALIGN(16)                       C performance critical!
  L(b11):        C n = 3, 7, 11, ...
         mov     (up), %r11
         mov     -8(up), %r8
-       xor     %eax, %eax
-       shld    %cl, %r11, %rax
+       xor     R32(%rax), R32(%rax)
+       shld    R8(cnt), %r11, %rax
         mov     -16(up), %r9
         lea     -24(up), up
         sub     $4, n
         jb      L(end)
  
         ALIGN(16)
-L(top):        shld    %cl, %r8, %r11
+L(top):        shld    R8(cnt), %r8, %r11
         mov     (up), %r10
         not     %r11
         mov     %r11, (rp)
-L(10): shld    %cl, %r9, %r8
+L(10): shld    R8(cnt), %r9, %r8
         mov     -8(up), %r11
         not     %r8
         mov     %r8, -8(rp)
-L(01): shld    %cl, %r10, %r9
+L(01): shld    R8(cnt), %r10, %r9
         mov     -16(up), %r8
         not     %r9
         mov     %r9, -16(rp)
-L(00): shld    %cl, %r11, %r10
+L(00): shld    R8(cnt), %r11, %r10
         mov     -24(up), %r9
         not     %r10
         mov     %r10, -24(rp)
@@ -125,14 +134,15 @@ L(00):    shld    %cl, %r11, %r10
         sub     $4, n
         jnc     L(top)
  
-L(end):        shld    %cl, %r8, %r11
+L(end):        shld    R8(cnt), %r8, %r11
         not     %r11
         mov     %r11, (rp)
-       shld    %cl, %r9, %r8
+       shld    R8(cnt), %r9, %r8
         not     %r8
         mov     %r8, -8(rp)
-       shl     %cl, %r9
+       shl     R8(cnt), %r9
         not     %r9
         mov     %r9, -16(rp)
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/core2/rsh1aors_n.asm b/mpn/x86_64/core2/rsh1aors_n.asm

new file mode 100644 (file)

index 0000000..bbac0f0
--- /dev/null
+++ b/mpn/x86_64/core2/rsh1aors_n.asm
@@ -0,0 +1,158 @@
+dnl  X86-64 mpn_rsh1add_n, mpn_rsh1sub_n optimised for Intel Conroe/Penryn.
+
+dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   3.05
+C Intel NHM     3.3
+C Intel SBR     2.5
+C Intel atom    ?
+C VIA nano      ?
+
+C TODO
+C  * Loopmix to approach 2.5 c/l on NHM.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n',  `%rcx')
+
+ifdef(`OPERATION_rsh1add_n', `
+       define(ADDSUB,        add)
+       define(ADCSBB,        adc)
+       define(func_n,        mpn_rsh1add_n)
+       define(func_nc,       mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+       define(ADDSUB,        sub)
+       define(ADCSBB,        sbb)
+       define(func_n,        mpn_rsh1sub_n)
+       define(func_nc,       mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       push    %rbx
+       push    %rbp
+
+       neg     %r8                     C set C flag from parameter
+       mov     (up), %r8
+       ADCSBB  (vp), %r8
+       jmp     L(ent)
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(func_n)
+       FUNC_ENTRY(4)
+       push    %rbx
+       push    %rbp
+
+       mov     (up), %r8
+       ADDSUB  (vp), %r8
+L(ent):        sbb     R32(%rbx), R32(%rbx)    C save cy
+       mov     %r8, %rax
+       and     $1, R32(%rax)           C return value
+
+       lea     (up,n,8), up
+       lea     (vp,n,8), vp
+       lea     (rp,n,8), rp
+       mov     R32(n), R32(%rbp)
+       neg     n
+       and     $3, R32(%rbp)
+       jz      L(b0)
+       cmp     $2, R32(%rbp)
+       jae     L(n1)
+
+L(b1): mov     %r8, %rbp
+       inc     n
+       js      L(top)
+       jmp     L(end)
+
+L(n1): jnz     L(b3)
+       add     R32(%rbx), R32(%rbx)    C restore cy
+       mov     8(up,n,8), %r11
+       ADCSBB  8(vp,n,8), %r11
+       sbb     R32(%rbx), R32(%rbx)    C save cy
+       mov     %r8, %r10
+       add     $-2, n
+       jmp     L(2)
+
+L(b3): add     R32(%rbx), R32(%rbx)    C restore cy
+       mov     8(up,n,8), %r10
+       mov     16(up,n,8), %r11
+       ADCSBB  8(vp,n,8), %r10
+       ADCSBB  16(vp,n,8), %r11
+       sbb     R32(%rbx), R32(%rbx)    C save cy
+       mov     %r8, %r9
+       dec     n
+       jmp     L(3)
+
+L(b0): add     R32(%rbx), R32(%rbx)    C restore cy
+       mov     8(up,n,8), %r9
+       mov     16(up,n,8), %r10
+       mov     24(up,n,8), %r11
+       ADCSBB  8(vp,n,8), %r9
+       ADCSBB  16(vp,n,8), %r10
+       ADCSBB  24(vp,n,8), %r11
+       sbb     R32(%rbx), R32(%rbx)    C save cy
+       jmp     L(4)
+
+       ALIGN(16)
+
+L(top):        add     R32(%rbx), R32(%rbx)    C restore cy
+       mov     (up,n,8), %r8
+       mov     8(up,n,8), %r9
+       mov     16(up,n,8), %r10
+       mov     24(up,n,8), %r11
+       ADCSBB  (vp,n,8), %r8
+       ADCSBB  8(vp,n,8), %r9
+       ADCSBB  16(vp,n,8), %r10
+       ADCSBB  24(vp,n,8), %r11
+       sbb     R32(%rbx), R32(%rbx)    C save cy
+       shrd    $1, %r8, %rbp
+       mov     %rbp, -8(rp,n,8)
+L(4):  shrd    $1, %r9, %r8
+       mov     %r8, (rp,n,8)
+L(3):  shrd    $1, %r10, %r9
+       mov     %r9, 8(rp,n,8)
+L(2):  shrd    $1, %r11, %r10
+       mov     %r10, 16(rp,n,8)
+L(1):  add     $4, n
+       mov     %r11, %rbp
+       js      L(top)
+
+L(end):        shrd    $1, %rbx, %rbp
+       mov     %rbp, -8(rp)
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/core2/rshift.asm b/mpn/x86_64/core2/rshift.asm

index 485fd4b0d95099ad4a15b5706cb5966c9a0da2bc..e93e8a4b2b4b5d9f0fa5a4e1eb7b63b8e423deb0 100644 (file)
--- a/mpn/x86_64/core2/rshift.asm
+++ b/mpn/x86_64/core2/rshift.asm
@@ -1,19 +1,19 @@
  dnl  x86-64 mpn_rshift optimized for "Core 2".
  
-dnl  Copyright 2007, 2009 Free Software Foundation, Inc.
-dnl
+dnl  Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
+
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
@@ -21,31 +21,38 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        4.25
-C K10:          4.25
-C P4:          14.7
-C P6 core2:     1.27
-C P6 corei7:    1.5
+C AMD K8,K9     4.25
+C AMD K10       4.25
+C Intel P4     14.7
+C Intel core2   1.27
+C Intel NHM     1.375  (up to about n = 260, then 1.5)
+C Intel SBR     1.77
+C Intel atom    ?
+C VIA nano      ?
  
  
  C INPUT PARAMETERS
  define(`rp',   `%rdi')
  define(`up',   `%rsi')
  define(`n',    `%rdx')
-define(`cnt',  `%cl')
+define(`cnt',  `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_rshift)
-       mov     %edx, %eax
-       and     $3, %eax
+       FUNC_ENTRY(4)
+       mov     R32(%rdx), R32(%rax)
+       and     $3, R32(%rax)
         jne     L(nb00)
  L(b00):        C n = 4, 8, 12, ...
         mov     (up), %r10
         mov     8(up), %r11
-       xor     %eax, %eax
-       shrd    %cl, %r10, %rax
+       xor     R32(%rax), R32(%rax)
+       shrd    R8(cnt), %r10, %rax
         mov     16(up), %r8
         lea     8(up), up
         lea     -24(rp), rp
@@ -53,11 +60,11 @@ L(b00):     C n = 4, 8, 12, ...
         jmp     L(00)
  
  L(nb00):C n = 1, 5, 9, ...
-       cmp     $2, %eax
+       cmp     $2, R32(%rax)
         jae     L(nb01)
  L(b01):        mov     (up), %r9
-       xor     %eax, %eax
-       shrd    %cl, %r9, %rax
+       xor     R32(%rax), R32(%rax)
+       shrd    R8(cnt), %r9, %rax
         sub     $2, n
         jb      L(le1)
         mov     8(up), %r10
@@ -65,50 +72,52 @@ L(b01):     mov     (up), %r9
         lea     16(up), up
         lea     -16(rp), rp
         jmp     L(01)
-L(le1):        shr     %cl, %r9
+L(le1):        shr     R8(cnt), %r9
         mov     %r9, (rp)
+       FUNC_EXIT()
         ret
  
  L(nb01):C n = 2, 6, 10, ...
         jne     L(b11)
  L(b10):        mov     (up), %r8
         mov     8(up), %r9
-       xor     %eax, %eax
-       shrd    %cl, %r8, %rax
+       xor     R32(%rax), R32(%rax)
+       shrd    R8(cnt), %r8, %rax
         sub     $3, n
         jb      L(le2)
         mov     16(up), %r10
         lea     24(up), up
         lea     -8(rp), rp
         jmp     L(10)
-L(le2):        shrd    %cl, %r9, %r8
+L(le2):        shrd    R8(cnt), %r9, %r8
         mov     %r8, (rp)
-       shr     %cl, %r9
+       shr     R8(cnt), %r9
         mov     %r9, 8(rp)
+       FUNC_EXIT()
         ret
  
         ALIGN(16)
  L(b11):        C n = 3, 7, 11, ...
         mov     (up), %r11
         mov     8(up), %r8
-       xor     %eax, %eax
-       shrd    %cl, %r11, %rax
+       xor     R32(%rax), R32(%rax)
+       shrd    R8(cnt), %r11, %rax
         mov     16(up), %r9
         lea     32(up), up
         sub     $4, n
         jb      L(end)
  
         ALIGN(16)
-L(top):        shrd    %cl, %r8, %r11
+L(top):        shrd    R8(cnt), %r8, %r11
         mov     -8(up), %r10
         mov     %r11, (rp)
-L(10): shrd    %cl, %r9, %r8
+L(10): shrd    R8(cnt), %r9, %r8
         mov     (up), %r11
         mov     %r8, 8(rp)
-L(01): shrd    %cl, %r10, %r9
+L(01): shrd    R8(cnt), %r10, %r9
         mov     8(up), %r8
         mov     %r9, 16(rp)
-L(00): shrd    %cl, %r11, %r10
+L(00): shrd    R8(cnt), %r11, %r10
         mov     16(up), %r9
         mov     %r10, 24(rp)
         add     $32, up
@@ -116,11 +125,12 @@ L(00):    shrd    %cl, %r11, %r10
         sub     $4, n
         jnc     L(top)
  
-L(end):        shrd    %cl, %r8, %r11
+L(end):        shrd    R8(cnt), %r8, %r11
         mov     %r11, (rp)
-       shrd    %cl, %r9, %r8
+       shrd    R8(cnt), %r9, %r8
         mov     %r8, 8(rp)
-       shr     %cl, %r9
+       shr     R8(cnt), %r9
         mov     %r9, 16(rp)
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/core2/sublsh1_n.asm b/mpn/x86_64/core2/sublsh1_n.asm

new file mode 100644 (file)

index 0000000..c37ac2f
--- /dev/null
+++ b/mpn/x86_64/core2/sublsh1_n.asm
@@ -0,0 +1,36 @@
+dnl  AMD64 mpn_sublsh1_n optimised for Core 2 and Core iN.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+define(ADDSUB, sub)
+define(ADCSBB, sbb)
+define(func,   mpn_sublsh1_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh1_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/core2/sublshC_n.asm')
diff --git a/mpn/x86_64/core2/sublsh2_n.asm b/mpn/x86_64/core2/sublsh2_n.asm

new file mode 100644 (file)

index 0000000..9ad9ad4
--- /dev/null
+++ b/mpn/x86_64/core2/sublsh2_n.asm
@@ -0,0 +1,36 @@
+dnl  AMD64 mpn_sublsh2_n optimised for Core 2 and Core iN.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 62)
+
+define(ADDSUB, sub)
+define(ADCSBB, sbb)
+define(func,   mpn_sublsh2_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh2_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/core2/sublshC_n.asm')
diff --git a/mpn/x86_64/core2/sublshC_n.asm b/mpn/x86_64/core2/sublshC_n.asm

new file mode 100644 (file)

index 0000000..3430a06
--- /dev/null
+++ b/mpn/x86_64/core2/sublshC_n.asm
@@ -0,0 +1,147 @@
+dnl  AMD64 mpn_sublshC_n -- rp[] = up[] - (vp[] << 1), optimised for Core 2 and
+dnl  Core iN.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+C           cycles/limb
+C AMD K8,K9     4.25
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   3
+C Intel NHM     3.1
+C Intel SBR     2.47
+C Intel atom    ?
+C VIA nano      ?
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(8)
+PROLOGUE(func)
+       FUNC_ENTRY(4)
+       push    %rbx
+       push    %r12
+
+       mov     R32(%rcx), R32(%rax)
+       lea     24(up,n,8), up
+       lea     24(vp,n,8), vp
+       lea     24(rp,n,8), rp
+       neg     n
+
+       xor     R32(%r11), R32(%r11)
+
+       mov     -24(vp,n,8), %r8        C do first limb early
+       shrd    $RSH, %r8, %r11
+
+       and     $3, R32(%rax)
+       je      L(b0)
+       cmp     $2, R32(%rax)
+       jc      L(b1)
+       je      L(b2)
+
+L(b3): mov     -16(vp,n,8), %r9
+       shrd    $RSH, %r9, %r8
+       mov     -8(vp,n,8), %r10
+       shrd    $RSH, %r10, %r9
+       mov     -24(up,n,8), %r12
+       ADDSUB  %r11, %r12
+       mov     %r12, -24(rp,n,8)
+       mov     -16(up,n,8), %r12
+       ADCSBB  %r8, %r12
+       mov     %r12, -16(rp,n,8)
+       mov     -8(up,n,8), %r12
+       ADCSBB  %r9, %r12
+       mov     %r12, -8(rp,n,8)
+       mov     %r10, %r11
+       sbb     R32(%rax), R32(%rax)    C save cy
+       add     $3, n
+       js      L(top)
+       jmp     L(end)
+
+L(b1): mov     -24(up,n,8), %r12
+       ADDSUB  %r11, %r12
+       mov     %r12, -24(rp,n,8)
+       mov     %r8, %r11
+       sbb     R32(%rax), R32(%rax)    C save cy
+       inc     n
+       js      L(top)
+       jmp     L(end)
+
+L(b2): mov     -16(vp,n,8), %r9
+       shrd    $RSH, %r9, %r8
+       mov     -24(up,n,8), %r12
+       ADDSUB  %r11, %r12
+       mov     %r12, -24(rp,n,8)
+       mov     -16(up,n,8), %r12
+       ADCSBB  %r8, %r12
+       mov     %r12, -16(rp,n,8)
+       mov     %r9, %r11
+       sbb     R32(%rax), R32(%rax)    C save cy
+       add     $2, n
+       js      L(top)
+       jmp     L(end)
+
+       ALIGN(16)
+L(top):        mov     -24(vp,n,8), %r8
+       shrd    $RSH, %r8, %r11
+L(b0): mov     -16(vp,n,8), %r9
+       shrd    $RSH, %r9, %r8
+       mov     -8(vp,n,8), %r10
+       shrd    $RSH, %r10, %r9
+       mov     (vp,n,8), %rbx
+       shrd    $RSH, %rbx, %r10
+
+       add     R32(%rax), R32(%rax)    C restore cy
+
+       mov     -24(up,n,8), %r12
+       ADCSBB  %r11, %r12
+       mov     %r12, -24(rp,n,8)
+
+       mov     -16(up,n,8), %r12
+       ADCSBB  %r8, %r12
+       mov     %r12, -16(rp,n,8)
+
+       mov     -8(up,n,8), %r12
+       ADCSBB  %r9, %r12
+       mov     %r12, -8(rp,n,8)
+
+       mov     (up,n,8), %r12
+       ADCSBB  %r10, %r12
+       mov     %r12, (rp,n,8)
+
+       mov     %rbx, %r11
+       sbb     R32(%rax), R32(%rax)    C save cy
+
+       add     $4, n
+       js      L(top)
+
+L(end):        shr     $RSH, %r11
+       pop     %r12
+       pop     %rbx
+       sub     R32(%r11), R32(%rax)
+       neg     R32(%rax)
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/coreinhm/aorrlsh_n.asm b/mpn/x86_64/coreinhm/aorrlsh_n.asm

new file mode 100644 (file)

index 0000000..8854a24
--- /dev/null
+++ b/mpn/x86_64/coreinhm/aorrlsh_n.asm
@@ -0,0 +1,189 @@
+dnl  AMD64 mpn_addlsh_n -- rp[] = up[] + (vp[] << k)
+dnl  AMD64 mpn_rsblsh_n -- rp[] = (vp[] << k) - up[]
+dnl  Optimised for Nehalem.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       4.75
+C Intel P4      ?
+C Intel core2   2.8-3
+C Intel NHM     2.8
+C Intel SBR     3.55
+C Intel atom    ?
+C VIA nano      ?
+
+C The inner-loop probably runs close to optimally on Nehalem (using 4-way
+C unrolling).  The rest of the code is quite crude, and could perhaps be made
+C both smaller and faster.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cnt',  `%r8')
+define(`cy',   `%r9')                  C for _nc variant
+
+ifdef(`OPERATION_addlsh_n', `
+       define(ADDSUB,  add)
+       define(ADCSBB,  adc)
+       define(IFRSB,   )
+       define(func_n,  mpn_addlsh_n)
+       define(func_nc, mpn_addlsh_nc)')
+ifdef(`OPERATION_rsblsh_n', `
+       define(ADDSUB,  sub)
+       define(ADCSBB,  sbb)
+       define(IFRSB,   `$1')
+       define(func_n,  mpn_rsblsh_n)
+       define(func_nc, mpn_rsblsh_nc)')
+
+C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh_nc
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(func_n)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')      C cnt
+       push    %rbx
+       xor     R32(%rbx), R32(%rbx)    C clear CF save register
+L(ent):        push    %rbp
+       mov     R32(n), R32(%rbp)
+       mov     n, %rax
+
+       mov     R32(cnt), R32(%rcx)
+       neg     R32(%rcx)
+
+       lea     -8(up,%rax,8), up
+       lea     -8(vp,%rax,8), vp
+       lea     -40(rp,%rax,8), rp
+       neg     %rax
+
+       and     $3, R32(%rbp)
+       jz      L(b0)
+       cmp     $2, R32(%rbp)
+       jc      L(b1)
+       jz      L(b2)
+
+L(b3): xor     R32(%r9), R32(%r9)
+       mov     8(vp,%rax,8), %r10
+       mov     16(vp,%rax,8), %r11
+       shrd    %cl, %r10, %r9
+       shrd    %cl, %r11, %r10
+       add     R32(%rbx), R32(%rbx)
+       ADCSBB  8(up,%rax,8), %r9
+       mov     24(vp,%rax,8), %r8
+       ADCSBB  16(up,%rax,8), %r10
+       sbb     R32(%rbx), R32(%rbx)
+       add     $3, %rax
+       jmp     L(lo3)
+
+L(b0): mov     8(vp,%rax,8), %r9
+       xor     R32(%r8), R32(%r8)
+       shrd    %cl, %r9, %r8
+       mov     16(vp,%rax,8), %r10
+       mov     24(vp,%rax,8), %r11
+       shrd    %cl, %r10, %r9
+       shrd    %cl, %r11, %r10
+       add     R32(%rbx), R32(%rbx)
+       ADCSBB  8(up,%rax,8), %r8
+       mov     %r8, 40(rp,%rax,8)      C offset 40
+       ADCSBB  16(up,%rax,8), %r9
+       mov     32(vp,%rax,8), %r8
+       ADCSBB  24(up,%rax,8), %r10
+       sbb     R32(%rbx), R32(%rbx)
+       add     $4, %rax
+       jmp     L(lo0)
+
+L(b1): mov     8(vp,%rax,8), %r8
+       add     $1, %rax
+       jz      L(1)
+       mov     8(vp,%rax,8), %r9
+       xor     R32(%rbp), R32(%rbp)
+       jmp     L(lo1)
+L(1):  xor     R32(%r11), R32(%r11)
+       jmp     L(wd1)
+
+L(b2): xor     %r10, %r10
+       mov     8(vp,%rax,8), %r11
+       shrd    %cl, %r11, %r10
+       add     R32(%rbx), R32(%rbx)
+       mov     16(vp,%rax,8), %r8
+       ADCSBB  8(up,%rax,8), %r10
+       sbb     R32(%rbx), R32(%rbx)
+       add     $2, %rax
+       jz      L(end)
+
+       ALIGN(16)
+L(top):        mov     8(vp,%rax,8), %r9
+       mov     %r11, %rbp
+L(lo2):        mov     %r10, 24(rp,%rax,8)     C offset 24
+L(lo1):        shrd    %cl, %r8, %rbp
+       shrd    %cl, %r9, %r8
+       mov     16(vp,%rax,8), %r10
+       mov     24(vp,%rax,8), %r11
+       shrd    %cl, %r10, %r9
+       shrd    %cl, %r11, %r10
+       add     R32(%rbx), R32(%rbx)
+       ADCSBB  (up,%rax,8), %rbp
+       ADCSBB  8(up,%rax,8), %r8
+       mov     %r8, 40(rp,%rax,8)      C offset 40
+       ADCSBB  16(up,%rax,8), %r9
+       mov     32(vp,%rax,8), %r8
+       ADCSBB  24(up,%rax,8), %r10
+       sbb     R32(%rbx), R32(%rbx)
+       add     $4, %rax
+       mov     %rbp, (rp,%rax,8)       C offset 32
+L(lo0):
+L(lo3):        mov     %r9, 16(rp,%rax,8)      C offset 48
+       jnz     L(top)
+
+L(end):        mov     %r10, 24(rp,%rax,8)
+L(wd1):        shrd    %cl, %r8, %r11
+       add     R32(%rbx), R32(%rbx)
+       ADCSBB  (up,%rax,8), %r11
+       mov     %r11, 32(rp,%rax,8)     C offset 32
+       adc     R32(%rax), R32(%rax)    C rax is zero after loop
+       shr     R8(%rcx), %r8
+       ADDSUB  %r8, %rax
+IFRSB( neg     %rax)
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
+PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')      C cnt
+IFDOS(`        mov     64(%rsp), %r9   ')      C cy
+       push    %rbx
+       neg     cy
+       sbb     R32(%rbx), R32(%rbx)    C initialise CF save register
+       jmp     L(ent)
+EPILOGUE()
diff --git a/mpn/x86_64/coreinhm/gmp-mparam.h b/mpn/x86_64/coreinhm/gmp-mparam.h

index 69c1c31976811e6668a80ea2bec1b915a760f5bb..4763d5751673546a205a37d92c8efb4632d553e3 100644 (file)
--- a/mpn/x86_64/coreinhm/gmp-mparam.h
+++ b/mpn/x86_64/coreinhm/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* Nehalem gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,114 +25,157 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     19
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        16
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      9
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           18
+#define BMOD_1_TO_MOD_1_THRESHOLD           16
  
  #define MUL_TOOM22_THRESHOLD                18
-#define MUL_TOOM33_THRESHOLD                65
-#define MUL_TOOM44_THRESHOLD               166
-#define MUL_TOOM6H_THRESHOLD               254
-#define MUL_TOOM8H_THRESHOLD               333
+#define MUL_TOOM33_THRESHOLD                57
+#define MUL_TOOM44_THRESHOLD               169
+#define MUL_TOOM6H_THRESHOLD               222
+#define MUL_TOOM8H_THRESHOLD               288
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      69
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      96
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     108
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD      99
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     105
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD      82
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 28
-#define SQR_TOOM3_THRESHOLD                105
+#define SQR_TOOM2_THRESHOLD                 30
+#define SQR_TOOM3_THRESHOLD                101
  #define SQR_TOOM4_THRESHOLD                250
-#define SQR_TOOM6_THRESHOLD                366
-#define SQR_TOOM8_THRESHOLD                478
+#define SQR_TOOM6_THRESHOLD                306
+#define SQR_TOOM8_THRESHOLD                454
  
-#define MULMOD_BNM1_THRESHOLD               13
+#define MULMID_TOOM42_THRESHOLD             22
+
+#define MULMOD_BNM1_THRESHOLD               11
  #define SQRMOD_BNM1_THRESHOLD               13
  
  #define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
    { {    380, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     10, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
      {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
      {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
      {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
      {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
      {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
      {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
      {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
-    {    159, 9}, {    319, 8}, {    639,10}, {    167,11}, \
-    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 74
+    {    159,11}, {     95,10}, {    191, 9}, {    383,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
+    {    143,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
+    {    383,11}, {    207,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    271,10}, {    543,11}, \
+    {    287,10}, {    575,11}, {    303,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,12}, {    223,11}, {    447,10}, {    895,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,12}, {    287,11}, {    607,12}, {    319,11}, \
+    {    639,12}, {    351,11}, {    703,10}, {   1407,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,10}, {   1663,12}, {    447,11}, {    895,12}, \
+    {    479,14}, {    127,13}, {    255,12}, {    511,11}, \
+    {   1023,12}, {    543,11}, {   1087,12}, {    575,11}, \
+    {   1151,12}, {    607,13}, {    319,12}, {    703,11}, \
+    {   1407,13}, {    383,12}, {    831,11}, {   1663,13}, \
+    {    447,12}, {    959,11}, {   1919,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 137
  #define MUL_FFT_THRESHOLD                 3712
  
-#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             304  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+  { {    304, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
      {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
      {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
      {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
      {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
      {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     43,10}, {     23, 9}, {     47,11}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
      {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    135,11}, {     79,10}, {    159, 9}, \
-    {    319,11}, {     95,10}, {    191, 9}, {    383, 8}, \
-    {    767,12}, {     63,10}, {    255,11}, {    143, 9}, \
-    {    575, 8}, {   1151,11}, {    159,10}, {    319, 9}, \
-    {    639,11}, {    175,12}, {     95,11}, {    191,10}, \
-    {    383,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 76
+    {     79,10}, {     47,11}, {     31,10}, {     79,11}, \
+    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255,11}, {     79,10}, {    159, 9}, {    319,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,11}, {    143,10}, {    287, 9}, {    575,11}, \
+    {    159,10}, {    319,11}, {    175,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,12}, {    223,11}, {    447,10}, {    895,11}, \
+    {    479,13}, {    127,12}, {    255,11}, {    511,10}, \
+    {   1023,11}, {    543,12}, {    287,11}, {    575,10}, \
+    {   1151,12}, {    319,11}, {    639,12}, {    351,11}, \
+    {    703,13}, {    191,12}, {    383,11}, {    767,12}, \
+    {    415,11}, {    831,12}, {    447,11}, {    895,12}, \
+    {    479,11}, {    959,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
+    {    575,11}, {   1151,12}, {    607,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    703,11}, {   1407,13}, \
+    {    383,12}, {    767,11}, {   1535,12}, {    831,13}, \
+    {    447,12}, {    959,11}, {   1919,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 137
  #define SQR_FFT_THRESHOLD                 3200
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  21
+#define MULLO_DC_THRESHOLD                  45
  #define MULLO_MUL_N_THRESHOLD             6633
  
  #define DC_DIV_QR_THRESHOLD                 38
-#define DC_DIVAPPR_Q_THRESHOLD             133
-#define DC_BDIV_QR_THRESHOLD                32
-#define DC_BDIV_Q_THRESHOLD                 70
+#define DC_DIVAPPR_Q_THRESHOLD             123
+#define DC_BDIV_QR_THRESHOLD                36
+#define DC_BDIV_Q_THRESHOLD                 26
  
-#define INV_MULMOD_BNM1_THRESHOLD           46
-#define INV_NEWTON_THRESHOLD               195
+#define INV_MULMOD_BNM1_THRESHOLD           35
+#define INV_NEWTON_THRESHOLD               163
  #define INV_APPR_THRESHOLD                 147
  
  #define BINV_NEWTON_THRESHOLD              230
-#define REDC_1_TO_REDC_2_THRESHOLD          12
-#define REDC_2_TO_REDC_N_THRESHOLD          59
-
-#define MU_DIV_QR_THRESHOLD               1334
-#define MU_DIVAPPR_Q_THRESHOLD            1360
-#define MUPI_DIV_QR_THRESHOLD               74
-#define MU_BDIV_QR_THRESHOLD              1142
-#define MU_BDIV_Q_THRESHOLD               1308
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     125
-#define GCD_DC_THRESHOLD                   330
-#define GCDEXT_DC_THRESHOLD                382
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        24
-#define SET_STR_DC_THRESHOLD               230
-#define SET_STR_PRECOMPUTE_THRESHOLD      1660
+#define REDC_1_TO_REDC_2_THRESHOLD          10
+#define REDC_2_TO_REDC_N_THRESHOLD          54
+
+#define MU_DIV_QR_THRESHOLD               1187
+#define MU_DIVAPPR_Q_THRESHOLD            1187
+#define MUPI_DIV_QR_THRESHOLD               75
+#define MU_BDIV_QR_THRESHOLD              1078
+#define MU_BDIV_Q_THRESHOLD               1142
+
+#define POWM_SEC_TABLE  2,65,322,1036,2699
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     142
+#define HGCD_APPR_THRESHOLD                177
+#define HGCD_REDUCE_THRESHOLD             2121
+#define GCD_DC_THRESHOLD                   345
+#define GCDEXT_DC_THRESHOLD                372
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        20
+#define SET_STR_DC_THRESHOLD               378
+#define SET_STR_PRECOMPUTE_THRESHOLD      1585
+
+#define FAC_DSC_THRESHOLD                  351
+#define FAC_ODD_THRESHOLD                   43
diff --git a/mpn/x86_64/coreinhm/hamdist.asm b/mpn/x86_64/coreinhm/hamdist.asm

new file mode 100644 (file)

index 0000000..9e35f2d
--- /dev/null
+++ b/mpn/x86_64/coreinhm/hamdist.asm
@@ -0,0 +1,27 @@
+dnl  AMD64 mpn_hamdist -- hamming distance.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86_64/k10/hamdist.asm')
diff --git a/mpn/x86_64/coreinhm/popcount.asm b/mpn/x86_64/coreinhm/popcount.asm

new file mode 100644 (file)

index 0000000..c93c93b
--- /dev/null
+++ b/mpn/x86_64/coreinhm/popcount.asm
@@ -0,0 +1,27 @@
+dnl  AMD64 mpn_popcount -- population count.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86_64/k10/popcount.asm')
diff --git a/mpn/x86_64/coreisbr/addmul_2.asm b/mpn/x86_64/coreisbr/addmul_2.asm

new file mode 100644 (file)

index 0000000..8173e3d
--- /dev/null
+++ b/mpn/x86_64/coreisbr/addmul_2.asm
@@ -0,0 +1,206 @@
+dnl  X86-64 mpn_addmul_2 optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9
+C AMD K10       4.07
+C AMD bd1
+C AMD bobcat    5.25
+C Intel P4     16.1
+C Intel core2
+C Intel NHM
+C Intel SBR     3.2
+C Intel atom
+C VIA nano      5.23
+
+C This code is the result of running a code generation and optimisation tool
+C suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * Tune feed-in and wind-down code.
+
+C INPUT PARAMETERS
+define(`rp',     `%rdi')
+define(`up',     `%rsi')
+define(`n_param',`%rdx')
+define(`vp',     `%rcx')
+
+define(`v0', `%r12')
+define(`v1', `%r13')
+define(`n',  `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_addmul_2)
+       FUNC_ENTRY(4)
+       push    %rbx
+       push    %r12
+       push    %r13
+       push    %r14
+
+       mov     (up), %rax
+
+       mov     n_param, n
+       mov     0(vp), v0
+       mov     8(vp), v1
+       shr     $2, n
+       and     $3, R32(n_param)
+       jz      L(b0)
+       cmp     $2, R32(n_param)
+       jb      L(b1)
+       jz      L(b2)
+
+L(b3): mov     (rp), %r10
+       mov     $0, R32(%rcx)
+       mul     v0
+       add     %rax, %r10
+       mov     %rdx, %r14
+       adc     $0, %r14
+       lea     -16(rp), rp
+       lea     -16(up), up
+       mov     $0, R32(%r9)
+       mov     $0, R32(%rbx)
+       inc     n
+       jmp     L(L3)
+
+L(b0): mov     (rp), %r8
+       mul     v0
+       add     %rax, %r8
+       mov     %rdx, %r9
+       adc     $0, %r9
+       mov     $0, R32(%rbx)
+       lea     -8(rp), rp
+       lea     -8(up), up
+       jmp     L(L0)
+
+L(b1): mov     (rp), %r10
+       mov     $0, R32(%rcx)
+       mul     v0
+       add     %rax, %r10
+       mov     %rdx, %r14
+       adc     $0, %r14
+       mov     %r10, 0(rp)
+       jmp     L(L1)
+
+L(b2): mov     (rp), %r8
+       mul     v0
+       add     %rax, %r8
+       mov     $0, R32(%rbx)
+       mov     %rdx, %r9
+       adc     $0, %r9
+       lea     -24(rp), rp
+       lea     -24(up), up
+       inc     n
+       jmp     L(L2)
+
+       ALIGN(32)
+L(top):        mov     %r10, 32(rp)
+       adc     %rbx, %r14              C 10
+       lea     32(rp), rp
+L(L1): mov     0(up), %rax
+       adc     $0, R32(%rcx)
+       mul     v1
+       mov     $0, R32(%rbx)
+       mov     8(rp), %r8
+       add     %rax, %r8
+       mov     %rdx, %r9
+       mov     8(up), %rax
+       adc     $0, %r9
+       mul     v0
+       add     %rax, %r8
+       adc     %rdx, %r9
+       adc     $0, R32(%rbx)
+       add     %r14, %r8               C 0 12
+       adc     %rcx, %r9               C 1
+L(L0): mov     8(up), %rax
+       adc     $0, R32(%rbx)
+       mov     16(rp), %r10
+       mul     v1
+       add     %rax, %r10
+       mov     %rdx, %r14
+       mov     16(up), %rax
+       mov     $0, R32(%rcx)
+       adc     $0, %r14
+       mul     v0
+       add     %rax, %r10
+       adc     %rdx, %r14
+       adc     $0, R32(%rcx)
+       mov     %r8, 8(rp)
+L(L3): mov     24(rp), %r8
+       mov     16(up), %rax
+       mul     v1
+       add     %r9, %r10               C 3
+       adc     %rbx, %r14              C 4
+       adc     $0, R32(%rcx)
+       add     %rax, %r8
+       mov     %rdx, %r9
+       adc     $0, %r9
+       mov     24(up), %rax
+       mul     v0
+       add     %rax, %r8
+       mov     $0, R32(%rbx)
+       adc     %rdx, %r9
+       adc     $0, R32(%rbx)
+       add     %r14, %r8               C 6
+       adc     %rcx, %r9               C 7
+       mov     %r10, 16(rp)
+L(L2): mov     24(up), %rax
+       adc     $0, R32(%rbx)
+       mov     32(rp), %r10
+       mul     v1
+       add     %rax, %r10
+       mov     32(up), %rax
+       lea     32(up), up
+       mov     %rdx, %r14
+       adc     $0, %r14
+       mov     %r8, 24(rp)
+       mov     $0, R32(%rcx)
+       mul     v0
+       add     %rax, %r10
+       adc     %rdx, %r14
+       adc     $0, R32(%rcx)
+       add     %r9, %r10               C 9
+       dec     n
+       jnz     L(top)
+
+       mov     %r10, 32(rp)
+       adc     %rbx, %r14
+       mov     0(up), %rax
+       adc     $0, R32(%rcx)
+       mul     v1
+       mov     %rax, %r8
+       mov     %rdx, %rax
+       add     %r14, %r8
+       adc     %rcx, %rax
+       mov     %r8, 40(rp)
+
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86_64/coreisbr/aorrlsh1_n.asm b/mpn/x86_64/coreisbr/aorrlsh1_n.asm

new file mode 100644 (file)

index 0000000..00a0c33
--- /dev/null
+++ b/mpn/x86_64/coreisbr/aorrlsh1_n.asm
@@ -0,0 +1,43 @@
+dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n', `
+       define(ADDSUB,  add)
+       define(ADCSBB,  adc)
+       define(func_n,  mpn_addlsh1_n)
+       define(func_nc, mpn_addlsh1_nc)')
+ifdef(`OPERATION_rsblsh1_n', `
+       define(ADDSUB,  sub)
+       define(ADCSBB,  sbb)
+       define(func_n,  mpn_rsblsh1_n)
+       define(func_nc, mpn_rsblsh1_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff --git a/mpn/x86_64/coreisbr/aorrlsh2_n.asm b/mpn/x86_64/coreisbr/aorrlsh2_n.asm

new file mode 100644 (file)

index 0000000..cf907ec
--- /dev/null
+++ b/mpn/x86_64/coreisbr/aorrlsh2_n.asm
@@ -0,0 +1,45 @@
+dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 1)
+dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n', `
+       define(ADDSUB,  add)
+       define(ADCSBB,  adc)
+       define(func_n,  mpn_addlsh2_n)
+       define(func_nc, mpn_addlsh2_nc)')
+ifdef(`OPERATION_rsblsh2_n', `
+       define(ADDSUB,  sub)
+       define(ADCSBB,  sbb)
+       define(func_n,  mpn_rsblsh2_n)
+       define(func_nc, mpn_rsblsh2_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C mpn_rsblsh2_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh2_nc
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n)
+include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff --git a/mpn/x86_64/coreisbr/aorrlshC_n.asm b/mpn/x86_64/coreisbr/aorrlshC_n.asm

new file mode 100644 (file)

index 0000000..e3c8bb5
--- /dev/null
+++ b/mpn/x86_64/coreisbr/aorrlshC_n.asm
@@ -0,0 +1,162 @@
+dnl  AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
+dnl  AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
+
+dnl  Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   3.25
+C Intel NHM     4
+C Intel SBR     2  C (or 1.95 when L(top)'s alignment = 16 (mod 32))
+C Intel atom    ?
+C VIA nano      ?
+
+C This code probably runs close to optimally on Sandy Bridge (using 4-way
+C unrolling).  It also runs reasonably well on Core 2, but it runs poorly on
+C all other processors, including Nehalem.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cy',   `%r8')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       push    %rbp
+       mov     cy, %rax
+       neg     %rax                    C set msb on carry
+       xor     R32(%rbp), R32(%rbp)    C limb carry
+       mov     (vp), %r8
+       shrd    $RSH, %r8, %rbp
+       mov     R32(n), R32(%r9)
+       and     $3, R32(%r9)
+       je      L(b00)
+       cmp     $2, R32(%r9)
+       jc      L(b01)
+       je      L(b10)
+       jmp     L(b11)
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(func_n)
+       FUNC_ENTRY(4)
+       push    %rbp
+       xor     R32(%rbp), R32(%rbp)    C limb carry
+       mov     (vp), %r8
+       shrd    $RSH, %r8, %rbp
+       mov     R32(n), R32(%rax)
+       and     $3, R32(%rax)
+       je      L(b00)
+       cmp     $2, R32(%rax)
+       jc      L(b01)
+       je      L(b10)
+
+L(b11):        mov     8(vp), %r9
+       shrd    $RSH, %r9, %r8
+       mov     16(vp), %r10
+       shrd    $RSH, %r10, %r9
+       add     R32(%rax), R32(%rax)    C init carry flag
+       ADCSBB  (up), %rbp
+       ADCSBB  8(up), %r8
+       ADCSBB  16(up), %r9
+       mov     %rbp, (rp)
+       mov     %r8, 8(rp)
+       mov     %r9, 16(rp)
+       mov     %r10, %rbp
+       lea     24(up), up
+       lea     24(vp), vp
+       lea     24(rp), rp
+       sbb     R32(%rax), R32(%rax)    C save carry flag
+       sub     $3, n
+       ja      L(top)
+       jmp     L(end)
+
+L(b01):        add     R32(%rax), R32(%rax)    C init carry flag
+       ADCSBB  (up), %rbp
+       mov     %rbp, (rp)
+       mov     %r8, %rbp
+       lea     8(up), up
+       lea     8(vp), vp
+       lea     8(rp), rp
+       sbb     R32(%rax), R32(%rax)    C save carry flag
+       sub     $1, n
+       ja      L(top)
+       jmp     L(end)
+
+L(b10):        mov     8(vp), %r9
+       shrd    $RSH, %r9, %r8
+       add     R32(%rax), R32(%rax)    C init carry flag
+       ADCSBB  (up), %rbp
+       ADCSBB  8(up), %r8
+       mov     %rbp, (rp)
+       mov     %r8, 8(rp)
+       mov     %r9, %rbp
+       lea     16(up), up
+       lea     16(vp), vp
+       lea     16(rp), rp
+       sbb     R32(%rax), R32(%rax)    C save carry flag
+       sub     $2, n
+       ja      L(top)
+       jmp     L(end)
+
+       ALIGN(16)
+L(top):        mov     (vp), %r8
+       shrd    $RSH, %r8, %rbp
+L(b00):        mov     8(vp), %r9
+       shrd    $RSH, %r9, %r8
+       mov     16(vp), %r10
+       shrd    $RSH, %r10, %r9
+       mov     24(vp), %r11
+       shrd    $RSH, %r11, %r10
+       lea     32(vp), vp
+       add     R32(%rax), R32(%rax)    C restore carry flag
+       ADCSBB  (up), %rbp
+       ADCSBB  8(up), %r8
+       ADCSBB  16(up), %r9
+       ADCSBB  24(up), %r10
+       lea     32(up), up
+       mov     %rbp, (rp)
+       mov     %r8, 8(rp)
+       mov     %r9, 16(rp)
+       mov     %r10, 24(rp)
+       mov     %r11, %rbp
+       lea     32(rp), rp
+       sbb     R32(%rax), R32(%rax)    C save carry flag
+       sub     $4, n
+       jnz     L(top)
+
+L(end):        shr     $RSH, %rbp
+       add     R32(%rax), R32(%rax)    C restore carry flag
+       ADCSBB  $0, %rbp
+       mov     %rbp, %rax
+       pop     %rbp
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/coreisbr/aorrlsh_n.asm b/mpn/x86_64/coreisbr/aorrlsh_n.asm

new file mode 100644 (file)

index 0000000..a8b5b5d
--- /dev/null
+++ b/mpn/x86_64/coreisbr/aorrlsh_n.asm
@@ -0,0 +1,204 @@
+dnl  AMD64 mpn_addlsh_n -- rp[] = up[] + (vp[] << k)
+dnl  AMD64 mpn_rsblsh_n -- rp[] = (vp[] << k) - up[]
+dnl  Optimised for Sandy Bridge.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       5.25
+C Intel P4      ?
+C Intel core2   3.1
+C Intel NHM     3.95
+C Intel SBR     2.75
+C Intel atom    ?
+C VIA nano      ?
+
+C The inner-loop probably runs close to optimally on Sandy Bridge (using 4-way
+C unrolling).  The rest of the code is quite crude, and could perhaps be made
+C both smaller and faster.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cnt',  `%r8')
+define(`cy',   `%r9')                  C for _nc variant
+
+ifdef(`OPERATION_addlsh_n', `
+       define(ADDSUB,  add)
+       define(ADCSBB,  adc)
+       define(IFRSB,   )
+       define(func_n,  mpn_addlsh_n)
+       define(func_nc, mpn_addlsh_nc)')
+ifdef(`OPERATION_rsblsh_n', `
+       define(ADDSUB,  sub)
+       define(ADCSBB,  sbb)
+       define(IFRSB,   `$1')
+       define(func_n,  mpn_rsblsh_n)
+       define(func_nc, mpn_rsblsh_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh_nc
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(func_n)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')      C cnt
+       push    %rbx
+       xor     R32(%rbx), R32(%rbx)    C clear CF save register
+L(ent):        push    %rbp
+       mov     R32(n), R32(%rbp)
+       mov     n, %rax
+       mov     R32(cnt), R32(%rcx)
+       neg     R32(%rcx)
+       and     $3, R32(%rbp)
+       jz      L(b0)
+       lea     -32(vp,%rbp,8), vp
+       lea     -32(up,%rbp,8), up
+       lea     -32(rp,%rbp,8), rp
+       cmp     $2, R32(%rbp)
+       jc      L(b1)
+       jz      L(b2)
+
+L(b3): xor     %r8, %r8
+       mov     8(vp), %r9
+       mov     16(vp), %r10
+       shrd    R8(%rcx), %r9, %r8
+       shrd    R8(%rcx), %r10, %r9
+       mov     24(vp), %r11
+       shrd    R8(%rcx), %r11, %r10
+       sub     $3, %rax
+       jz      L(3)
+       add     R32(%rbx), R32(%rbx)
+       lea     32(vp), vp
+       ADCSBB  8(up), %r8
+       ADCSBB  16(up), %r9
+       ADCSBB  24(up), %r10
+       lea     32(up), up
+       jmp     L(lo3)
+L(3):  add     R32(%rbx), R32(%rbx)
+       lea     32(vp), vp
+       ADCSBB  8(up), %r8
+       ADCSBB  16(up), %r9
+       ADCSBB  24(up), %r10
+       jmp     L(wd3)
+
+L(b0): mov     (vp), %r8
+       mov     8(vp), %r9
+       xor     R32(%rbp), R32(%rbp)
+       jmp     L(lo0)
+
+L(b1): xor     %r10, %r10
+       mov     24(vp), %r11
+       shrd    R8(%rcx), %r11, %r10
+       sub     $1, %rax
+       jz      L(1)
+       add     R32(%rbx), R32(%rbx)
+       lea     32(vp), vp
+       ADCSBB  24(up), %r10
+       lea     32(up), up
+       mov     (vp), %r8
+       jmp     L(lo1)
+L(1):  add     R32(%rbx), R32(%rbx)
+       ADCSBB  24(up), %r10
+       jmp     L(wd1)
+
+L(b2): xor     %r9, %r9
+       mov     16(vp), %r10
+       shrd    R8(%rcx), %r10, %r9
+       mov     24(vp), %r11
+       shrd    R8(%rcx), %r11, %r10
+       sub     $2, %rax
+       jz      L(2)
+       add     R32(%rbx), R32(%rbx)
+       lea     32(vp), vp
+       ADCSBB  16(up), %r9
+       ADCSBB  24(up), %r10
+       lea     32(up), up
+       jmp     L(lo2)
+L(2):  add     R32(%rbx), R32(%rbx)
+       ADCSBB  16(up), %r9
+       ADCSBB  24(up), %r10
+       jmp     L(wd2)
+
+       ALIGN(32)                       C 16-byte alignment is not enough!
+L(top):        shrd    R8(%rcx), %r11, %r10
+       add     R32(%rbx), R32(%rbx)
+       lea     32(vp), vp
+       ADCSBB  (up), %rbp
+       ADCSBB  8(up), %r8
+       ADCSBB  16(up), %r9
+       ADCSBB  24(up), %r10
+       mov     %rbp, (rp)
+       lea     32(up), up
+L(lo3):        mov     %r8, 8(rp)
+L(lo2):        mov     %r9, 16(rp)
+       mov     (vp), %r8
+L(lo1):        mov     %r10, 24(rp)
+       mov     8(vp), %r9
+       mov     %r11, %rbp
+       lea     32(rp), rp
+       sbb     R32(%rbx), R32(%rbx)
+L(lo0):        shrd    R8(%rcx), %r8, %rbp
+       mov     16(vp), %r10
+       shrd    R8(%rcx), %r9, %r8
+       shrd    R8(%rcx), %r10, %r9
+       mov     24(vp), %r11
+       sub     $4, %rax
+       jg      L(top)
+
+       shrd    R8(%rcx), %r11, %r10
+       add     R32(%rbx), R32(%rbx)
+       ADCSBB  (up), %rbp
+       ADCSBB  8(up), %r8
+       ADCSBB  16(up), %r9
+       ADCSBB  24(up), %r10
+       mov     %rbp, (rp)
+L(wd3):        mov     %r8, 8(rp)
+L(wd2):        mov     %r9, 16(rp)
+L(wd1):        mov     %r10, 24(rp)
+       adc     R32(%rax), R32(%rax)    C rax is zero after loop
+       shr     R8(%rcx), %r11
+       ADDSUB  %r11, %rax
+IFRSB( neg     %rax)
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
+PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')      C cnt
+IFDOS(`        mov     64(%rsp), %r9   ')      C cy
+       push    %rbx
+       neg     cy
+       sbb     R32(%rbx), R32(%rbx)    C initialise CF save register
+       jmp     L(ent)
+EPILOGUE()
diff --git a/mpn/x86_64/coreisbr/aors_n.asm b/mpn/x86_64/coreisbr/aors_n.asm

new file mode 100644 (file)

index 0000000..1b22763
--- /dev/null
+++ b/mpn/x86_64/coreisbr/aors_n.asm
@@ -0,0 +1,156 @@
+dnl  X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Sandy Bridge.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C AMD K8,K9     1.85
+C AMD K10       ?
+C Intel P4      ?
+C Intel core2   5
+C Intel NHM     5.5
+C Intel SBR     1.61
+C Intel atom    3
+C VIA nano      3
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cy',   `%r8')          C (only for mpn_add_nc and mpn_sub_nc)
+
+ifdef(`OPERATION_add_n', `
+       define(ADCSBB,        adc)
+       define(func,          mpn_add_n)
+       define(func_nc,       mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+       define(ADCSBB,        sbb)
+       define(func,          mpn_sub_n)
+       define(func_nc,       mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       FUNC_ENTRY(4)
+       xor     %r8, %r8
+L(ent):        mov     R32(n), R32(%rax)
+       shr     $2, n
+       and     $3, R32(%rax)
+       jz      L(b0)
+       cmp     $2, R32(%rax)
+       jz      L(b2)
+       jg      L(b3)
+
+L(b1): mov     (up), %r10
+       test    n, n
+       jnz     L(gt1)
+       neg     R32(%r8)                C set CF from argument
+       ADCSBB  (vp), %r10
+       mov     %r10, (rp)
+       mov     R32(n), R32(%rax)       C zero rax
+       adc     R32(%rax), R32(%rax)
+       FUNC_EXIT()
+       ret
+L(gt1):        neg     R32(%r8)
+       ADCSBB  (vp), %r10
+       mov     8(up), %r11
+       lea     16(up), up
+       lea     -16(vp), vp
+       lea     -16(rp), rp
+       jmp     L(m1)
+
+L(b3): mov     (up), %rax
+       mov     8(up), %r9
+       mov     16(up), %r10
+       test    n, n
+       jnz     L(gt3)
+       neg     R32(%r8)
+       lea     -32(rp), rp
+       jmp     L(e3)
+L(gt3):        neg     R32(%r8)
+       ADCSBB  (vp), %rax
+       jmp     L(m3)
+
+       nop                             C alignment
+       nop                             C alignment
+L(b0): mov     (up), %r11
+       neg     R32(%r8)
+       lea     -24(vp), vp
+       lea     -24(rp), rp
+       lea     8(up), up
+       jmp     L(m0)
+
+L(b2): mov     (up), %r9
+       mov     8(up), %r10
+       lea     -8(vp), vp
+       test    n, n
+       jnz     L(gt2)
+       neg     R32(%r8)
+       lea     -40(rp), rp
+       jmp     L(e2)
+L(gt2):        neg     R32(%r8)
+       lea     -8(up), up
+       lea     -8(rp), rp
+       jmp     L(m2)
+
+       ALIGN(8)
+L(top):        mov     %r11, 24(rp)
+       ADCSBB  (vp), %rax
+       lea     32(rp), rp
+L(m3): mov     %rax, (rp)
+L(m2): ADCSBB  8(vp), %r9
+       mov     24(up), %r11
+       mov     %r9, 8(rp)
+       ADCSBB  16(vp), %r10
+       lea     32(up), up
+L(m1): mov     %r10, 16(rp)
+L(m0): ADCSBB  24(vp), %r11
+       mov     (up), %rax
+       mov     8(up), %r9
+       lea     32(vp), vp
+       dec     n
+       mov     16(up), %r10
+       jnz     L(top)
+
+       mov     %r11, 24(rp)
+L(e3): ADCSBB  (vp), %rax
+       mov     %rax, 32(rp)
+L(e2): ADCSBB  8(vp), %r9
+       mov     %r9, 40(rp)
+L(e1): ADCSBB  16(vp), %r10
+       mov     %r10, 48(rp)
+       mov     R32(n), R32(%rax)       C zero rax
+       adc     R32(%rax), R32(%rax)
+       FUNC_EXIT()
+       ret
+EPILOGUE()
+PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       jmp     L(ent)
+EPILOGUE()
diff --git a/mpn/x86_64/coreisbr/aorsmul_1.asm b/mpn/x86_64/coreisbr/aorsmul_1.asm

new file mode 100644 (file)

index 0000000..4df5939
--- /dev/null
+++ b/mpn/x86_64/coreisbr/aorsmul_1.asm
@@ -0,0 +1,183 @@
+dnl  X86-64 mpn_addmul_1 and mpn_submul_1 optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     4.77
+C AMD K10       4.77
+C AMD bd1       ?
+C AMD bobcat    5.78
+C Intel P4     15-17
+C Intel core2   5.4
+C Intel NHM     5.23
+C Intel SBR     3.25
+C Intel atom    ?
+C VIA nano      5.5
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * The loop is great, but the prologue code was quickly written.  Tune it!
+
+define(`rp',      `%rdi')   C rcx
+define(`up',      `%rsi')   C rdx
+define(`n_param', `%rdx')   C r8
+define(`v0',      `%rcx')   C r9
+
+define(`n',      `%rbx')
+
+ifdef(`OPERATION_addmul_1',`
+      define(`ADDSUB',        `add')
+      define(`func',  `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+      define(`ADDSUB',        `sub')
+      define(`func',  `mpn_submul_1')
+')
+
+dnl Disable until tested ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+IFDOS(`        define(`up', ``%rsi'')  ') dnl
+IFDOS(`        define(`rp', ``%rcx'')  ') dnl
+IFDOS(`        define(`v0', ``%r9'')   ') dnl
+IFDOS(`        define(`r9', ``rdi'')   ') dnl
+IFDOS(`        define(`n',  ``%r8'')   ') dnl
+IFDOS(`        define(`r8', ``r11'')   ') dnl
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+
+IFDOS(``push   %rsi            '')
+IFDOS(``push   %rdi            '')
+IFDOS(``mov    %rdx, %rsi      '')
+
+       mov     (up), %rax
+       push    %rbx
+IFSTD(`        mov     R32(n_param), R32(%rdx) ')
+IFDOS(`        mov     n, %rdx                 ')
+IFSTD(`        mov     R32(n_param), R32(n)    ')
+
+       lea     -8(up,n,8), up
+       and     $3, R32(%rdx)
+       jz      L(b0)
+       cmp     $2, R32(%rdx)
+       jz      L(b2)
+       jnc     L(b3)
+
+L(b1): mov     (rp), %r8
+       lea     -8(rp,n,8), rp
+       neg     n
+       mov     $0, R32(%r11)
+       add     $4, n
+       jc      L(end)
+       jmp     L(top)
+
+L(b2): mov     (rp), %r10
+       lea     -8(rp,n,8), rp
+       neg     n
+       add     $1, n
+       mul     v0
+       ADDSUB  %rax, %r10
+       mov     8(up,n,8), %rax
+       mov     %rdx, %r11
+       mov     $0, R32(%r9)
+       jmp     L(L2)
+
+L(b3): mov     (rp), %r8
+       lea     -8(rp,n,8), rp
+       neg     n
+       add     $2, n
+       mul     v0
+       mov     %rdx, %r9
+       mov     $0, R32(%r11)
+       jmp     L(L3)
+
+L(b0): mov     (rp), %r10
+       lea     -8(rp,n,8), rp
+       neg     n
+       add     $3, n
+       mul     v0
+       ADDSUB  %rax, %r10
+       mov     %rdx, %r11
+       mov     -8(up,n,8), %rax
+       adc     $0, %r11
+       mov     $0, R32(%r9)
+       jmp     L(L0)
+
+       ALIGN(16)
+L(top):        mul     v0
+       ADDSUB  %rax, %r8
+       mov     %rdx, %r9
+       adc     $0, %r9
+       mov     -16(up,n,8), %rax
+       ADDSUB  %r11, %r8
+       mov     -16(rp,n,8), %r10
+       adc     $0, %r9
+       mul     v0
+       ADDSUB  %rax, %r10
+       mov     %rdx, %r11
+       mov     -8(up,n,8), %rax
+       adc     $0, %r11
+       mov     %r8, -24(rp,n,8)
+L(L0): mul     v0
+       ADDSUB  %r9, %r10
+       mov     -8(rp,n,8), %r8
+       adc     $0, %r11
+       mov     %rdx, %r9
+       mov     %r10, -16(rp,n,8)
+L(L3): ADDSUB  %rax, %r8
+       adc     $0, %r9
+       mov     (up,n,8), %rax
+       ADDSUB  %r11, %r8
+       adc     $0, %r9
+       mov     (rp,n,8), %r10
+       mul     v0
+       ADDSUB  %rax, %r10
+       mov     8(up,n,8), %rax
+       mov     %rdx, %r11
+       mov     %r8, -8(rp,n,8)
+L(L2): adc     $0, %r11
+       mov     8(rp,n,8), %r8
+       ADDSUB  %r9, %r10
+       adc     $0, %r11
+       mov     %r10, (rp,n,8)
+       add     $4, n
+       jnc     L(top)
+
+L(end):        mul     v0
+       ADDSUB  %rax, %r8
+       mov     %rdx, %rax
+       adc     $0, %rax
+       ADDSUB  %r11, %r8
+       adc     $0, %rax
+       mov     %r8, (rp)
+
+       pop     %rbx
+IFDOS(``pop    %rdi            '')
+IFDOS(``pop    %rsi            '')
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/coreisbr/gmp-mparam.h b/mpn/x86_64/coreisbr/gmp-mparam.h

index f43388dd46db11d7689dc3e5a248cf938f257c96..2f1cb85ec96a3626f1682c7a0aaab8cd626f0f19 100644 (file)
--- a/mpn/x86_64/coreisbr/gmp-mparam.h
+++ b/mpn/x86_64/coreisbr/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* Sandy Bridge gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,167 +25,188 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        20
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           34
+#define BMOD_1_TO_MOD_1_THRESHOLD           30
  
-#define MUL_TOOM22_THRESHOLD                20
+#define MUL_TOOM22_THRESHOLD                18
  #define MUL_TOOM33_THRESHOLD                57
-#define MUL_TOOM44_THRESHOLD               166
-#define MUL_TOOM6H_THRESHOLD               387
-#define MUL_TOOM8H_THRESHOLD               527
+#define MUL_TOOM44_THRESHOLD               154
+#define MUL_TOOM6H_THRESHOLD               226
+#define MUL_TOOM8H_THRESHOLD               333
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     105
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     113
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     108
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
  #define MUL_TOOM42_TO_TOOM63_THRESHOLD     114
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     138
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                 93
-#define SQR_TOOM4_THRESHOLD                278
-#define SQR_TOOM6_THRESHOLD                369
-#define SQR_TOOM8_THRESHOLD                557
+#define SQR_TOOM2_THRESHOLD                 26
+#define SQR_TOOM3_THRESHOLD                 81
+#define SQR_TOOM4_THRESHOLD                250
+#define SQR_TOOM6_THRESHOLD                345
+#define SQR_TOOM8_THRESHOLD                381
  
-#define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               18
+#define MULMID_TOOM42_THRESHOLD             24
  
-#define MUL_FFT_MODF_THRESHOLD             376  /* k = 5 */
+#define MULMOD_BNM1_THRESHOLD               14
+#define SQRMOD_BNM1_THRESHOLD               14
+
+#define POWM_SEC_TABLE  4,35,516,1036,1222
+
+#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    376, 5}, {     17, 6}, {      9, 5}, {     21, 6}, \
+  { {    380, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
      {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     12, 6}, {     25, 7}, {     13, 6}, \
-    {     27, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
+    {     23, 7}, {     21, 8}, {     11, 7}, {     24, 8}, \
      {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
      {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
      {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
      {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
+    {     49, 9}, {     27,10}, {     15, 9}, {     39,10}, \
      {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
+    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
      {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
      {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    135,11}, {     79,10}, {    159,11}, {     95,10}, \
-    {    191, 8}, {    767,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,11}, {    143,10}, {    287,11}, \
-    {    159, 9}, {    639,12}, {     95,11}, {    191,13}, \
-    {     63,12}, {    127,10}, {    511,11}, {    271,10}, \
-    {    543, 9}, {   1087,10}, {    607,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
-    {    351,10}, {    703, 9}, {   1407,10}, {    735,12}, \
-    {    191,11}, {    415,10}, {    831,12}, {    223,11}, \
-    {    447,13}, {    127,12}, {    255,11}, {    543,12}, \
-    {    287,11}, {    607,12}, {    319,11}, {    639,12}, \
-    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,12}, {    479,14}, {    127,13}, {    255,12}, \
-    {    543,11}, {   1087,12}, {    607,13}, {    319,12}, \
-    {    735,13}, {    383,12}, {    831,11}, {   1663,13}, \
-    {    447,12}, {    959,11}, {   1919,13}, {    511,12}, \
-    {   1087,11}, {   2175,13}, {    575,12}, {   1215,11}, \
+    {    135,11}, {     79,10}, {    159, 9}, {    319,10}, \
+    {    167,11}, {     95,10}, {    191, 9}, {    383,12}, \
+    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
+    {    271,11}, {    143,10}, {    287, 9}, {    575,10}, \
+    {    303,11}, {    159,10}, {    319,12}, {     95,11}, \
+    {    191,10}, {    383,11}, {    207,10}, {    415,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
+    {    271,10}, {    543,11}, {    287,10}, {    575,11}, \
+    {    303,10}, {    607,12}, {    159,11}, {    319,10}, \
+    {    639,11}, {    351,10}, {    703, 9}, {   1407,11}, \
+    {    367,12}, {    191,11}, {    383,10}, {    767,11}, \
+    {    415,10}, {    831,12}, {    223,11}, {    447,10}, \
+    {    895,13}, {    127,12}, {    255,11}, {    543,10}, \
+    {   1087,12}, {    287,11}, {    575,10}, {   1151,11}, \
+    {    607,12}, {    319,11}, {    639,12}, {    351,11}, \
+    {    703,10}, {   1407,11}, {    735,13}, {    191,12}, \
+    {    383,11}, {    767,12}, {    415,11}, {    831,10}, \
+    {   1663,12}, {    447,11}, {    895,14}, {    127,13}, \
+    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
+    {   1087,12}, {    575,11}, {   1151,12}, {    607,11}, \
+    {   1215,13}, {    319,12}, {    639,11}, {   1279,12}, \
+    {    703,11}, {   1407,13}, {    383,12}, {    767,11}, \
+    {   1535,12}, {    831,11}, {   1663,13}, {    447,12}, \
+    {    959,11}, {   1919,14}, {    255,13}, {    511,12}, \
+    {   1087,13}, {    575,12}, {   1215,11}, {   2431,13}, \
+    {    639,12}, {   1279,13}, {    703,12}, {   1407,14}, \
+    {    383,13}, {    831,12}, {   1663,13}, {    959,12}, \
+    {   1919,14}, {    511,13}, {   1087,12}, {   2175,13}, \
+    {   1215,12}, {   2431,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1407,12}, {   2815,13}, {   1471,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1919,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,12}, {   4863,14}, {   1279,13}, {   2687,14}, \
+    {   1407,13}, {   2815,15}, {    767,14}, {   1663,13}, \
+    {   3455,14}, {   1919,13}, {   3839,16}, {    511,15}, \
+    {   1023,14}, {   2431,13}, {   4863,15}, {   1279,14}, \
+    {   2943,13}, {   5887,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 203
+#define MUL_FFT_THRESHOLD                 4736
+
+#define SQR_FFT_MODF_THRESHOLD             304  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    304, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
+    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
+    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
+    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
+    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
+    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
+    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
+    {     15,10}, {     31, 9}, {     63,10}, {     39, 9}, \
+    {     79,10}, {     47,11}, {     31,10}, {     79,11}, \
+    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    511,10}, {    135,11}, {     79,10}, \
+    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
+    {    383,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,11}, {    143,10}, \
+    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
+    {    639,12}, {     95,11}, {    191,10}, {    383, 9}, \
+    {    767,11}, {    207,13}, {     63,12}, {    127,11}, \
+    {    255,10}, {    511,11}, {    271,10}, {    543,11}, \
+    {    287,10}, {    575,11}, {    303,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,12}, {    223,11}, {    447,10}, {    895,11}, \
+    {    479,10}, {    959,13}, {    127,12}, {    255,11}, \
+    {    511,10}, {   1023,11}, {    543,12}, {    287,11}, \
+    {    575,10}, {   1151,11}, {    607,12}, {    319,11}, \
+    {    639,10}, {   1279,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,11}, {    895,12}, {    479,11}, \
+    {    959,10}, {   1919,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
+    {    575,11}, {   1151,12}, {    607,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    703,11}, {   1407,13}, \
+    {    383,12}, {    767,11}, {   1535,12}, {    831,13}, \
+    {    447,12}, {    959,11}, {   1919,14}, {    255,13}, \
+    {    511,12}, {   1087,13}, {    575,12}, {   1215,11}, \
      {   2431,13}, {    639,12}, {   1279,13}, {    703,12}, \
      {   1407,14}, {    383,13}, {    767,12}, {   1535,13}, \
-    {    831,12}, {   1727,13}, {    959,12}, {   1919,14}, \
-    {    511,13}, {   1087,12}, {   2175,13}, {   1215,12}, \
-    {   2431,14}, {    639,13}, {   1343,12}, {   2687,13}, \
-    {   1471,12}, {   2943,14}, {    767,13}, {   1663,14}, \
-    {    895,13}, {   1919,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
-    {   1279,13}, {   2687,14}, {   1407,13}, {   2943,15}, \
-    {    767,14}, {   1535,13}, {   3199,14}, {   1663,13}, \
+    {    831,12}, {   1663,13}, {    959,12}, {   1919,15}, \
+    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
+    {   1215,12}, {   2431,14}, {    639,13}, {   1343,12}, \
+    {   2687,13}, {   1407,12}, {   2815,13}, {   1471,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1919,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,12}, {   4863,14}, {   1279,13}, {   2687,14}, \
+    {   1407,13}, {   2815,15}, {    767,14}, {   1663,13}, \
      {   3455,14}, {   1919,16}, {    511,15}, {   1023,14}, \
      {   2431,13}, {   4863,15}, {   1279,14}, {   2943,13}, \
-    {   5887,15}, {   1535,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 184
-#define MUL_FFT_THRESHOLD                 3712
+    {   5887,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
+    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+    {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 198
+#define SQR_FFT_THRESHOLD                 2752
  
-#define SQR_FFT_MODF_THRESHOLD             336  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    336, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
-    {     25, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     63,10}, {     39, 9}, {     79,10}, {     47,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    135,11}, {     79, 8}, {    639,11}, {     95,10}, \
-    {    191, 9}, {    383,12}, {     63, 9}, {    511,10}, \
-    {    271,11}, {    143,10}, {    287, 9}, {    575,11}, \
-    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
-    {    383,11}, {    207,10}, {    415,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    575,11}, {    303,10}, \
-    {    639,11}, {    351,10}, {    703,12}, {    191,11}, \
-    {    383,10}, {    767,11}, {    415,10}, {    831,12}, \
-    {    223,11}, {    447,10}, {    959,13}, {    127,11}, \
-    {    511,10}, {   1023,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    671,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,12}, {    447,11}, {    895,12}, {    479,11}, \
-    {    959,14}, {    127,13}, {    255,12}, {    543,11}, \
-    {   1087,12}, {    575,11}, {   1151,12}, {    607,13}, \
-    {    319,12}, {    671,11}, {   1343,12}, {    703,13}, \
-    {    383,12}, {    831,13}, {    447,12}, {    959,11}, \
-    {   1919,13}, {    511,12}, {   1023,13}, {    575,12}, \
-    {   1215,13}, {    639,12}, {   1343,13}, {    703,14}, \
-    {    383,13}, {    767,12}, {   1535,13}, {    831,12}, \
-    {   1663,13}, {    959,12}, {   1919,14}, {    511,13}, \
-    {   1087,12}, {   2175,13}, {   1215,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1407,12}, {   2815,13}, \
-    {   1471,14}, {    767,13}, {   1599,12}, {   3199,13}, \
-    {   1663,14}, {    895,13}, {   1919,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
-    {   2815,15}, {    767,14}, {   1535,13}, {   3199,14}, \
-    {   1663,13}, {   3455,14}, {   1919,16}, {    511,15}, \
-    {   1023,14}, {   2431,13}, {   4863,15}, {   1279,14}, \
-    {   2943,13}, {   5887,15}, {   1535,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 177
-#define SQR_FFT_THRESHOLD                 3264
-
-#define MULLO_BASECASE_THRESHOLD             5
-#define MULLO_DC_THRESHOLD                  33
-#define MULLO_MUL_N_THRESHOLD             6633
-
-#define DC_DIV_QR_THRESHOLD                 39
-#define DC_DIVAPPR_Q_THRESHOLD             119
-#define DC_BDIV_QR_THRESHOLD                31
-#define DC_BDIV_Q_THRESHOLD                 78
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  51
+#define MULLO_MUL_N_THRESHOLD             8648
+
+#define DC_DIV_QR_THRESHOLD                 63
+#define DC_DIVAPPR_Q_THRESHOLD             196
+#define DC_BDIV_QR_THRESHOLD                59
+#define DC_BDIV_Q_THRESHOLD                134
  
  #define INV_MULMOD_BNM1_THRESHOLD           46
-#define INV_NEWTON_THRESHOLD               139
-#define INV_APPR_THRESHOLD                 131
+#define INV_NEWTON_THRESHOLD               202
+#define INV_APPR_THRESHOLD                 190
  
-#define BINV_NEWTON_THRESHOLD              198
-#define REDC_1_TO_REDC_2_THRESHOLD          23
-#define REDC_2_TO_REDC_N_THRESHOLD          59
+#define BINV_NEWTON_THRESHOLD              224
+#define REDC_1_TO_REDC_2_THRESHOLD          16
+#define REDC_2_TO_REDC_N_THRESHOLD          55
  
-#define MU_DIV_QR_THRESHOLD               1334
-#define MU_DIVAPPR_Q_THRESHOLD            1442
-#define MUPI_DIV_QR_THRESHOLD               66
-#define MU_BDIV_QR_THRESHOLD              1017
-#define MU_BDIV_Q_THRESHOLD               1442
+#define MU_DIV_QR_THRESHOLD               1442
+#define MU_DIVAPPR_Q_THRESHOLD            1528
+#define MUPI_DIV_QR_THRESHOLD               85
+#define MU_BDIV_QR_THRESHOLD              1187
+#define MU_BDIV_Q_THRESHOLD               1387
  
  #define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     125 /* hardwired, tuneup crashes */
-#define GCD_DC_THRESHOLD                   396
-#define GCDEXT_DC_THRESHOLD                368
-#define JACOBI_BASE_METHOD                   1
+#define HGCD_THRESHOLD                     113
+#define HGCD_APPR_THRESHOLD                 84
+#define HGCD_REDUCE_THRESHOLD             2681
+#define GCD_DC_THRESHOLD                   555
+#define GCDEXT_DC_THRESHOLD                396
+#define JACOBI_BASE_METHOD                   4
  
  #define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD               650
-#define SET_STR_PRECOMPUTE_THRESHOLD      1585
+#define GET_STR_PRECOMPUTE_THRESHOLD        20
+#define SET_STR_DC_THRESHOLD              1204
+#define SET_STR_PRECOMPUTE_THRESHOLD      2251
+
+#define FAC_DSC_THRESHOLD                  800
+#define FAC_ODD_THRESHOLD                   28
diff --git a/mpn/x86_64/coreisbr/lshift.asm b/mpn/x86_64/coreisbr/lshift.asm

new file mode 100644 (file)

index 0000000..72f02e4
--- /dev/null
+++ b/mpn/x86_64/coreisbr/lshift.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_lshift optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshift)
+include_mpn(`x86_64/fastsse/lshift-movdqu2.asm')
diff --git a/mpn/x86_64/coreisbr/lshiftc.asm b/mpn/x86_64/coreisbr/lshiftc.asm

new file mode 100644 (file)

index 0000000..7e96f49
--- /dev/null
+++ b/mpn/x86_64/coreisbr/lshiftc.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_lshiftc optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshiftc)
+include_mpn(`x86_64/fastsse/lshiftc-movdqu2.asm')
diff --git a/mpn/x86_64/coreisbr/mul_1.asm b/mpn/x86_64/coreisbr/mul_1.asm

new file mode 100644 (file)

index 0000000..c094803
--- /dev/null
+++ b/mpn/x86_64/coreisbr/mul_1.asm
@@ -0,0 +1,144 @@
+dnl  X86-64 mpn_mul_1 optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9
+C AMD K10
+C AMD bd1
+C AMD bobcat
+C Intel P4
+C Intel core2
+C Intel NHM
+C Intel SBR
+C Intel atom
+C VIA nano
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C  * The loop is great, but the prologue code was quickly written.  Tune it!
+C  * Add mul_1c entry point.
+
+define(`rp',      `%rdi')   C rcx
+define(`up',      `%rsi')   C rdx
+define(`n_param', `%rdx')   C r8
+define(`v0',      `%rcx')   C r9
+
+define(`n',      `%r11')
+
+dnl Disable until tested ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(`        define(`up', ``%rsi'')  ') dnl
+IFDOS(`        define(`rp', ``%rcx'')  ') dnl
+IFDOS(`        define(`v0', ``%r9'')   ') dnl
+IFDOS(`        define(`r9', ``rdi'')   ') dnl
+IFDOS(`        define(`n',  ``%r8'')   ') dnl
+IFDOS(`        define(`r8', ``r11'')   ') dnl
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+
+PROLOGUE(mpn_mul_1)
+IFDOS(``push   %rsi            '')
+IFDOS(``push   %rdi            '')
+IFDOS(``mov    %rdx, %rsi      '')
+
+       mov     (up), %rax
+IFSTD(`        mov     R32(n_param), R32(%r10) ')
+IFDOS(`        mov     n, %r10                 ')
+IFSTD(`        mov     R32(n_param), R32(n)    ')
+
+       lea     (up,n_param,8), up
+       lea     -8(rp,n_param,8), rp
+       neg     n
+       mul     v0
+       and     $3, R32(%r10)
+       jz      L(b0)
+       cmp     $2, R32(%r10)
+       jb      L(b1)
+       jz      L(b2)
+
+L(b3): add     $-1, n
+       mov     %rax, %r9
+       mov     %rdx, %r8
+       mov     16(up,n,8), %rax
+       jmp     L(L3)
+
+L(b1): mov     %rax, %r9
+       mov     %rdx, %r8
+       add     $1, n
+       jnc     L(L1)
+       mov     %rax, (rp)
+       mov     %rdx, %rax
+       ret
+
+L(b2): add     $-2, n
+       mov     %rax, %r8
+       mov     %rdx, %r9
+       mov     24(up,n,8), %rax
+       jmp     L(L2)
+
+L(b0): mov     %rax, %r8
+       mov     %rdx, %r9
+       mov     8(up,n,8), %rax
+       jmp     L(L0)
+
+       ALIGN(8)
+L(top):        mov     %rdx, %r8
+       add     %rax, %r9
+L(L1): mov     0(up,n,8), %rax
+       adc     $0, %r8
+       mul     v0
+       add     %rax, %r8
+       mov     %r9, 0(rp,n,8)
+       mov     8(up,n,8), %rax
+       mov     %rdx, %r9
+       adc     $0, %r9
+L(L0): mul     v0
+       mov     %r8, 8(rp,n,8)
+       add     %rax, %r9
+       mov     %rdx, %r8
+       mov     16(up,n,8), %rax
+       adc     $0, %r8
+L(L3): mul     v0
+       mov     %r9, 16(rp,n,8)
+       mov     %rdx, %r9
+       add     %rax, %r8
+       mov     24(up,n,8), %rax
+       adc     $0, %r9
+L(L2): mul     v0
+       mov     %r8, 24(rp,n,8)
+       add     $4, n
+       jnc     L(top)
+
+L(end):        add     %rax, %r9
+       mov     %rdx, %rax
+       adc     $0, %rax
+       mov     %r9, (rp)
+
+IFDOS(``pop    %rdi            '')
+IFDOS(``pop    %rsi            '')
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/coreisbr/rsh1aors_n.asm b/mpn/x86_64/coreisbr/rsh1aors_n.asm

new file mode 100644 (file)

index 0000000..daad0cc
--- /dev/null
+++ b/mpn/x86_64/coreisbr/rsh1aors_n.asm
@@ -0,0 +1,183 @@
+dnl  X86-64 mpn_rsh1add_n, mpn_rsh1sub_n optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2003, 2005, 2009, 2010, 2011, 2012 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       4.25
+C Intel P4      21.5
+C Intel core2   3.2
+C Intel NHM     3.87
+C Intel SBR     2.05
+C Intel atom    ?
+C VIA nano      44.9
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n',  `%rcx')
+
+ifdef(`OPERATION_rsh1add_n', `
+       define(ADDSUB,        add)
+       define(ADCSBB,        adc)
+       define(func_n,        mpn_rsh1add_n)
+       define(func_nc,       mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+       define(ADDSUB,        sub)
+       define(ADCSBB,        sbb)
+       define(func_n,        mpn_rsh1sub_n)
+       define(func_nc,       mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+
+       ALIGN(16)
+PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       push    %rbx
+       push    %rbp
+
+       neg     %r8                     C set C flag from parameter
+       mov     (up), %rbp
+       ADCSBB  (vp), %rbp
+
+       jmp     L(ent)
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(func_n)
+       FUNC_ENTRY(4)
+       push    %rbx
+       push    %rbp
+
+       mov     (up), %rbp
+       ADDSUB  (vp), %rbp
+L(ent):
+       sbb     R32(%rbx), R32(%rbx)    C save cy
+       mov     R32(%rbp), R32(%rax)
+       and     $1, R32(%rax)           C return value
+
+       mov     R32(n), R32(%r11)
+       and     $3, R32(%r11)
+
+       cmp     $1, R32(%r11)
+       je      L(do)                   C jump if n = 1 5 9 ...
+
+L(n1): cmp     $2, R32(%r11)
+       jne     L(n2)                   C jump unless n = 2 6 10 ...
+       add     R32(%rbx), R32(%rbx)    C restore cy
+       mov     8(up), %r10
+       ADCSBB  8(vp), %r10
+       lea     8(up), up
+       lea     8(vp), vp
+       lea     8(rp), rp
+       sbb     R32(%rbx), R32(%rbx)    C save cy
+
+       shrd    $1, %r10, %rbp
+       mov     %rbp, -8(rp)
+       jmp     L(cj1)
+
+L(n2): cmp     $3, R32(%r11)
+       jne     L(n3)                   C jump unless n = 3 7 11 ...
+       add     R32(%rbx), R32(%rbx)    C restore cy
+       mov     8(up), %r9
+       mov     16(up), %r10
+       ADCSBB  8(vp), %r9
+       ADCSBB  16(vp), %r10
+       lea     16(up), up
+       lea     16(vp), vp
+       lea     16(rp), rp
+       sbb     R32(%rbx), R32(%rbx)    C save cy
+
+       shrd    $1, %r9, %rbp
+       mov     %rbp, -16(rp)
+       jmp     L(cj2)
+
+L(n3): dec     n                       C come here for n = 4 8 12 ...
+       add     R32(%rbx), R32(%rbx)    C restore cy
+       mov     8(up), %r8
+       mov     16(up), %r9
+       ADCSBB  8(vp), %r8
+       ADCSBB  16(vp), %r9
+       mov     24(up), %r10
+       ADCSBB  24(vp), %r10
+       lea     24(up), up
+       lea     24(vp), vp
+       lea     24(rp), rp
+       sbb     R32(%rbx), R32(%rbx)    C save cy
+
+       shrd    $1, %r8, %rbp
+       mov     %rbp, -24(rp)
+       shrd    $1, %r9, %r8
+       mov     %r8, -16(rp)
+L(cj2):        shrd    $1, %r10, %r9
+       mov     %r9, -8(rp)
+L(cj1):        mov     %r10, %rbp
+
+L(do):
+       shr     $2, n                   C                               4
+       je      L(end)                  C                               2
+       ALIGN(16)
+L(top):        add     R32(%rbx), R32(%rbx)            C restore cy
+
+       mov     8(up), %r8
+       mov     16(up), %r9
+       ADCSBB  8(vp), %r8
+       ADCSBB  16(vp), %r9
+       mov     24(up), %r10
+       mov     32(up), %r11
+       ADCSBB  24(vp), %r10
+       ADCSBB  32(vp), %r11
+
+       lea     32(up), up
+       lea     32(vp), vp
+
+       sbb     R32(%rbx), R32(%rbx)    C save cy
+
+       shrd    $1, %r8, %rbp
+       mov     %rbp, (rp)
+       shrd    $1, %r9, %r8
+       mov     %r8, 8(rp)
+       shrd    $1, %r10, %r9
+       mov     %r9, 16(rp)
+       shrd    $1, %r11, %r10
+       mov     %r10, 24(rp)
+
+       dec     n
+       mov     %r11, %rbp
+       lea     32(rp), rp
+       jne     L(top)
+
+L(end):        shrd    $1, %rbx, %rbp
+       mov     %rbp, (rp)
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/coreisbr/rshift.asm b/mpn/x86_64/coreisbr/rshift.asm

new file mode 100644 (file)

index 0000000..5d16361
--- /dev/null
+++ b/mpn/x86_64/coreisbr/rshift.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_rshift optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_rshift)
+include_mpn(`x86_64/fastsse/rshift-movdqu2.asm')
diff --git a/mpn/x86_64/darwin.m4 b/mpn/x86_64/darwin.m4

index 247b7a6302e6002e4e06fe1435fb19131f0441b1..0530650cfa996967e5a32f4100049ff1c56f9954 100644 (file)
--- a/mpn/x86_64/darwin.m4
+++ b/mpn/x86_64/darwin.m4
@@ -1,25 +1,28 @@
  divert(-1)
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  define(`DARWIN')
  
-define(`LEA',`
-       lea     $1(%rip), $2
+define(`LEA',`dnl
+ifdef(`PIC',
+       `lea    $1(%rip), $2'
+,
+       `movabs `$'$1, $2')
  ')
  
  dnl  Usage: CALL(funcname)
@@ -29,6 +32,39 @@ dnl  Simply override the definition in x86_64-defs.m4.
  define(`CALL',`call    GSYM_PREFIX`'$1')
  
  
-define(`JUMPTABSECT', `DATA')
+dnl  Usage: JUMPTABSECT
+dnl
+dnl  CAUTION: Do not put anything sensible here, like RODATA.  That works with
+dnl  some Darwin tool chains, but silently breaks with other.  (Note that
+dnl  putting jump tables in the text segment is a really poor idea for PC many
+dnl  processors, since they cannot cache the same thing in both L1D and L2I.)
+
+define(`JUMPTABSECT', `.text')
+
+
+dnl  Usage: JMPENT(targlabel,tablabel)
+
+define(`JMPENT',`dnl
+ifdef(`PIC',
+       `.set   $1_tmp, $1-$2
+       .long   $1_tmp'
+,
+       `.quad  $1'
+)')
+
+dnl  Target ABI macros.  For Darwin we override IFELF (and leave default for
+dnl  IFDOS and IFSTD).
+
+define(`IFELF',   `')
+
+
+dnl  Usage: PROTECT(symbol)
+dnl
+dnl  Used for private GMP symbols that should never be overridden by users.
+dnl  This can save reloc entries and improve shlib sharing as well as
+dnl  application startup times
+
+define(`PROTECT',  `.private_extern $1')
+
  
  divert`'dnl
diff --git a/mpn/x86_64/div_qr_2n_pi1.asm b/mpn/x86_64/div_qr_2n_pi1.asm

new file mode 100644 (file)

index 0000000..aef6938
--- /dev/null
+++ b/mpn/x86_64/div_qr_2n_pi1.asm
@@ -0,0 +1,147 @@
+dnl  x86-64 mpn_div_qr_2n_pi1
+dnl  -- Divide an mpn number by a normalized 2-limb number,
+dnl     using a single-limb inverse.
+
+dnl  Copyright 2007, 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C              c/l
+C INPUT PARAMETERS
+define(`qp',           `%rdi')
+define(`rp',           `%rsi')
+define(`up_param',     `%rdx')
+define(`un',           `%rcx')
+define(`d1',           `%r8')
+define(`d0',           `%r9')
+define(`di_param',     `8(%rsp)')
+
+define(`di',           `%r10')
+define(`up',           `%r11')
+define(`u2',           `%rbx')
+define(`u1',           `%r12')
+define(`t1',           `%r13')
+define(`t0',           `%r14')
+define(`md1',          `%r15')
+
+C TODO
+C * Store qh in the same stack slot as di_param, instead of pushing
+C   it. (we could put it in register %rbp, but then we would need to
+C   save and restore that instead, which doesn't seem like a win).
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_div_qr_2n_pi1)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+IFDOS(`        mov     64(%rsp), %r9   ')
+IFDOS(`define(`di_param', `72(%rsp)')')
+       mov     di_param, di
+       mov     up_param, up
+       push    %r15
+       push    %r14
+       push    %r13
+       push    %r12
+       push    %rbx
+
+       mov     -16(up, un, 8), u1
+       mov     -8(up, un, 8), u2
+
+       mov     u1, t0
+       mov     u2, t1
+       sub     d0, t0
+       sbb     d1, t1
+       cmovnc  t0, u1
+       cmovnc  t1, u2
+       C push qh which is !carry
+       sbb     %rax, %rax
+       inc     %rax
+       push    %rax
+       lea     -2(un), un
+       mov     d1, md1
+       neg     md1
+
+       jmp     L(next)
+
+       ALIGN(16)
+L(loop):
+       C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
+       C Based on the optimized divrem_2.asm code.
+
+       mov     di, %rax
+       mul     u2
+       mov     u1, t0
+       add     %rax, t0        C q0 in t0
+       adc     u2, %rdx
+       mov     %rdx, t1        C q in t1
+       imul    md1, %rdx
+       mov     d0, %rax
+       lea     (%rdx, u1), u2
+       mul     t1
+       mov     (up, un, 8), u1
+       sub     d0, u1
+       sbb     d1, u2
+       sub     %rax, u1
+       sbb     %rdx, u2
+       xor     R32(%rax), R32(%rax)
+       xor     R32(%rdx), R32(%rdx)
+       cmp     t0, u2
+       cmovnc  d0, %rax
+       cmovnc  d1, %rdx
+       adc     $0, t1
+       nop
+       add     %rax, u1
+       adc     %rdx, u2
+       cmp     d1, u2
+       jae     L(fix)
+L(bck):
+       mov     t1, (qp, un, 8)
+L(next):
+       sub     $1, un
+       jnc     L(loop)
+L(end):
+       mov     u2, 8(rp)
+       mov     u1, (rp)
+
+       C qh on stack
+       pop     %rax
+
+       pop     %rbx
+       pop     %r12
+       pop     %r13
+       pop     %r14
+       pop     %r15
+       FUNC_EXIT()
+       ret
+
+L(fix):        C Unlikely update. u2 >= d1
+       seta    %dl
+       cmp     d0, u1
+       setae   %al
+       orb     %dl, %al                C "orb" form to placate Sun tools
+       je      L(bck)
+       inc     t1
+       sub     d0, u1
+       sbb     d1, u2
+       jmp     L(bck)
+EPILOGUE()
diff --git a/mpn/x86_64/div_qr_2u_pi1.asm b/mpn/x86_64/div_qr_2u_pi1.asm

new file mode 100644 (file)

index 0000000..bdb64c1
--- /dev/null
+++ b/mpn/x86_64/div_qr_2u_pi1.asm
@@ -0,0 +1,189 @@
+dnl  x86-64 mpn_div_qr_2u_pi1
+dnl  -- Divide an mpn number by an unnormalized 2-limb number,
+dnl     using a single-limb inverse and shifting the dividend on the fly.
+
+dnl  Copyright 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C              c/l
+C INPUT PARAMETERS
+define(`qp',           `%rdi')
+define(`rp',           `%rsi')
+define(`up_param',     `%rdx')
+define(`un_param',     `%rcx') dnl %rcx needed for shift count
+define(`d1',           `%r8')
+define(`d0',           `%r9')
+define(`shift_param',  `FRAME+8(%rsp)')
+define(`di_param',     `FRAME+16(%rsp)')
+
+define(`di',           `%r10')
+define(`up',           `%r11')
+define(`un',           `%rbp')
+define(`u2',           `%rbx')
+define(`u1',           `%r12')
+define(`u0',           `%rsi') dnl Same as rp, which is saved and restored.
+define(`t1',           `%r13')
+define(`t0',           `%r14')
+define(`md1',          `%r15')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+deflit(`FRAME', 0)
+PROLOGUE(mpn_div_qr_2u_pi1)
+       mov     di_param, di
+       mov     up_param, up
+       push    %r15
+       push    %r14
+       push    %r13
+       push    %r12
+       push    %rbx
+       push    %rbp
+       push    rp
+deflit(`FRAME', 56)
+       lea     -2(un_param), un
+       mov     d1, md1
+       neg     md1
+
+       C int parameter, 32 bits only
+       movl    shift_param, R32(%rcx)
+
+       C FIXME: Different code for SHLD_SLOW
+
+       xor     R32(u2), R32(u2)
+       mov     8(up, un, 8), u1
+       shld    %cl, u1, u2
+       C Remains to read (up, un, 8) and shift u1, u0
+       C udiv_qr_3by2 (qh,u2,u1,u2,u1,n0, d1,d0,di)
+       mov     di, %rax
+       mul     u2
+       mov     (up, un, 8), u0
+       shld    %cl, u0, u1
+       mov     u1, t0
+       add     %rax, t0        C q0 in t0
+       adc     u2, %rdx
+       mov     %rdx, t1        C q in t1
+       imul    md1, %rdx
+       mov     d0, %rax
+       lea     (%rdx, u1), u2
+       mul     t1
+       mov     u0, u1
+       shl     %cl, u1
+       sub     d0, u1
+       sbb     d1, u2
+       sub     %rax, u1
+       sbb     %rdx, u2
+       xor     R32(%rax), R32(%rax)
+       xor     R32(%rdx), R32(%rdx)
+       cmp     t0, u2
+       cmovnc  d0, %rax
+       cmovnc  d1, %rdx
+       adc     $0, t1
+       nop
+       add     %rax, u1
+       adc     %rdx, u2
+       cmp     d1, u2
+       jae     L(fix_qh)
+L(bck_qh):
+       push    t1      C push qh on stack
+
+       jmp     L(next)
+
+       ALIGN(16)
+L(loop):
+       C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
+       C Based on the optimized divrem_2.asm code.
+
+       mov     di, %rax
+       mul     u2
+       mov     (up, un, 8), u0
+       xor     R32(t1), R32(t1)
+       shld    %cl, u0, t1
+       or      t1, u1
+       mov     u1, t0
+       add     %rax, t0        C q0 in t0
+       adc     u2, %rdx
+       mov     %rdx, t1        C q in t1
+       imul    md1, %rdx
+       mov     d0, %rax
+       lea     (%rdx, u1), u2
+       mul     t1
+       mov     u0, u1
+       shl     %cl, u1
+       sub     d0, u1
+       sbb     d1, u2
+       sub     %rax, u1
+       sbb     %rdx, u2
+       xor     R32(%rax), R32(%rax)
+       xor     R32(%rdx), R32(%rdx)
+       cmp     t0, u2
+       cmovnc  d0, %rax
+       cmovnc  d1, %rdx
+       adc     $0, t1
+       nop
+       add     %rax, u1
+       adc     %rdx, u2
+       cmp     d1, u2
+       jae     L(fix)
+L(bck):
+       mov     t1, (qp, un, 8)
+L(next):
+       sub     $1, un
+       jnc     L(loop)
+L(end):
+       C qh on stack
+       pop     %rax
+       pop     rp
+       shrd    %cl, u2, u1
+       shr     %cl, u2
+       mov     u2, 8(rp)
+       mov     u1, (rp)
+
+       pop     %rbp
+       pop     %rbx
+       pop     %r12
+       pop     %r13
+       pop     %r14
+       pop     %r15
+       ret
+
+L(fix):        C Unlikely update. u2 >= d1
+       seta    %dl
+       cmp     d0, u1
+       setae   %al
+       orb     %dl, %al                C "orb" form to placate Sun tools
+       je      L(bck)
+       inc     t1
+       sub     d0, u1
+       sbb     d1, u2
+       jmp     L(bck)
+
+C Duplicated, just jumping back to a different address.
+L(fix_qh):     C Unlikely update. u2 >= d1
+       seta    %dl
+       cmp     d0, u1
+       setae   %al
+       orb     %dl, %al                C "orb" form to placate Sun tools
+       je      L(bck_qh)
+       inc     t1
+       sub     d0, u1
+       sbb     d1, u2
+       jmp     L(bck_qh)
+EPILOGUE()
diff --git a/mpn/x86_64/dive_1.asm b/mpn/x86_64/dive_1.asm

index f3b6ac8faf08f550ac6904d4a74920ade59e7c08..45cf63b81f13ba3723f401ec1e71748dc76f4416 100644 (file)
--- a/mpn/x86_64/dive_1.asm
+++ b/mpn/x86_64/dive_1.asm
@@ -1,6 +1,7 @@
  dnl  AMD64 mpn_divexact_1 -- mpn by limb exact division.
  
-dnl  Copyright 2001, 2002, 2004, 2005, 2006 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2002, 2004, 2005, 2006, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,12 +22,13 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:       10
-C K10:         10
-C P4:          33
-C P6 core2:    13.25
-C P6 corei7:   14
-C P6 atom:     42
+C AMD K8,K9    10
+C AMD K10      10
+C Intel P4     33
+C Intel core2  13.25
+C Intel corei  14
+C Intel atom   42
+C VIA nano     43
  
  C A quick adoption of the 32-bit K7 code.
  
@@ -37,10 +39,14 @@ C up                rsi
  C n            rdx
  C divisor      rcx
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_divexact_1)
+       FUNC_ENTRY(4)
         push    %rbx
  
         mov     %rcx, %rax
@@ -54,11 +60,7 @@ L(odd):      mov     %rax, %rbx
         shr     R32(%rax)
         and     $127, R32(%rax)         C d/2, 7 bits
  
-ifdef(`PIC',`
-       mov     binvert_limb_table@GOTPCREL(%rip), %rdx
-',`
-       movabs  $binvert_limb_table, %rdx
-')
+       LEA(    binvert_limb_table, %rdx)
  
         movzbl  (%rdx,%rax), R32(%rax)  C inv 8 bits
  
@@ -133,12 +135,14 @@ L(ent):   imul    %r10, %rax              C                       6
         imul    %r10, %rax
         mov     %rax, (%rdi)
         pop     %rbx
+       FUNC_EXIT()
         ret
  
  L(one):        shr     R8(%rcx), %rax
         imul    %r10, %rax
         mov     %rax, (%rdi)
         pop     %rbx
+       FUNC_EXIT()
         ret
  
  EPILOGUE()
diff --git a/mpn/x86_64/divrem_1.asm b/mpn/x86_64/divrem_1.asm

index da0a211743bcbc9d2caed3d809bec8cee00b90d5..64602d165921fbb7b1397a01ffb6f3c7a1000848 100644 (file)
--- a/mpn/x86_64/divrem_1.asm
+++ b/mpn/x86_64/divrem_1.asm
@@ -1,6 +1,7 @@
  dnl  x86-64 mpn_divrem_1 -- mpn by limb division.
  
-dnl  Copyright 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,23 +22,13 @@ include(`../config.m4')
  
  
  C              norm    unorm   frac
-C K8           13      13      12
-C P4           44.2    44.2    42.3
-C P6 core2     25      24.5    19.3
-C P6 corei7    21.5    20.7    18
-C P6 atom      42      52      37
-
-C TODO
-C  * Compute the inverse without relying on the div instruction.
-C    Newton's method and mulq, or perhaps the faster fdiv.
-C  * Tune prologue.
-C  * Optimize for Core 2.
-
-C The code for unnormalized divisors works also for normalized divisors, but
-C for some reason it runs really slowly (on K8) for that case.  Use special
-C code until we can address this.  The Intel Atom is also affected, but
-C understandably (shld slowness).
-define(`SPECIAL_CODE_FOR_NORMALIZED_DIVISOR',1)
+C AMD K8,K9    13      13      12
+C AMD K10      13      13      12
+C Intel P4     43      44      43
+C Intel core2  24.5    24.5    19.5
+C Intel corei  20.5    19.5    18
+C Intel atom   43      46      36
+C VIA nano     25.5    25.5    24
  
  C mp_limb_t
  C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
@@ -66,11 +57,20 @@ define(`un',                `%rbx')
  C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
  C         cnt         qp      d  dinv
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFSTD(`define(`CNTOFF',                `40($1)')')
+IFDOS(`define(`CNTOFF',                `104($1)')')
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_preinv_divrem_1)
-       xor     %eax, %eax
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+IFDOS(`        mov     64(%rsp), %r9   ')
+       xor     R32(%rax), R32(%rax)
         push    %r13
         push    %r12
         push    %rbp
@@ -85,14 +85,17 @@ PROLOGUE(mpn_preinv_divrem_1)
  
         test    d, d
         js      L(nent)
-       mov     40(%rsp), R8(cnt)
+
+       mov     CNTOFF(%rsp), R8(cnt)
         shl     R8(cnt), d
         jmp     L(uent)
  EPILOGUE()
  
         ALIGN(16)
  PROLOGUE(mpn_divrem_1)
-       xor     %eax, %eax
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+       xor     R32(%rax), R32(%rax)
         push    %r13
         push    %r12
         push    %rbp
@@ -107,8 +110,6 @@ PROLOGUE(mpn_divrem_1)
         lea     -8(qp,un_param,8), qp
         xor     R32(%rbp), R32(%rbp)
  
-
-ifdef(`SPECIAL_CODE_FOR_NORMALIZED_DIVISOR',`
         test    d, d
         jns     L(unnormalized)
  
@@ -119,50 +120,55 @@ L(normalized):
         dec     un
         mov     %rbp, %rax
         sub     d, %rbp
-       cmovb   %rax, %rbp
-       sbb     %eax, %eax
-       inc     %eax
+       cmovc   %rax, %rbp
+       sbb     R32(%rax), R32(%rax)
+       inc     R32(%rax)
         mov     %rax, (qp)
         lea     -8(qp), qp
  L(8):
-       mov     d, %rdx
-       mov     $-1, %rax
-       not     %rdx
-       div     d                       C FREE rax rdx rcx r9 r10 r11
+IFSTD(`        push    %rdi            ')
+IFSTD(`        push    %rsi            ')
+       push    %r8
+IFSTD(`        mov     d, %rdi         ')
+IFDOS(`        mov     d, %rcx         ')
+       CALL(   mpn_invert_limb)
+       pop     %r8
+IFSTD(`        pop     %rsi            ')
+IFSTD(`        pop     %rdi            ')
+
         mov     %rax, dinv
         mov     %rbp, %rax
         jmp     L(nent)
  
         ALIGN(16)
-L(nloop):                              C                   cycK8  cycP6  cycP4
+L(ntop):                               C           K8-K10  P6-CNR P6-NHM  P4
         mov     (up,un,8), %r10         C
-       lea     1(%rax), %rbp           C
-       mul     dinv                    C                    0,13   0,19  0,45
-       add     %r10, %rax              C                    4      8     12
-       adc     %rbp, %rdx              C                    5      9     13
-       mov     %rax, %rbp              C                    5      9     13
-       mov     %rdx, %r13              C                    6      11    23
-       imul    d, %rdx                 C                    6      11    23
-       sub     %rdx, %r10              C                    10     16    33
+       mul     dinv                    C             0,13   0,20   0,18   0,45
+       add     %r10, %rax              C             4      8      3     12
+       adc     %rbp, %rdx              C             5      9     10     13
+       mov     %rax, %rbp              C             5      9      4     13
+       mov     %rdx, %r13              C             6     11     12     23
+       imul    d, %rdx                 C             6     11     11     23
+       sub     %rdx, %r10              C            10     16     14     33
         mov     d, %rax                 C
-       add     %r10, %rax              C                    11     17    34
-       cmp     %rbp, %r10              C                    11     17    34
-       cmovb   %r10, %rax              C                    12     18    35
+       add     %r10, %rax              C            11     17     15     34
+       cmp     %rbp, %r10              C            11     17     15     34
+       cmovc   %r10, %rax              C            12     18     16     35
         adc     $-1, %r13               C
         cmp     d, %rax                 C
         jae     L(nfx)                  C
  L(nok):        mov     %r13, (qp)              C
         sub     $8, qp                  C
-L(nent):dec    un                      C
-       jns     L(nloop)                C
+L(nent):lea    1(%rax), %rbp           C
+       dec     un                      C
+       jns     L(ntop)                 C
  
-       xor     %ecx, %ecx
+       xor     R32(%rcx), R32(%rcx)
         jmp     L(87)
  
  L(nfx):        sub     d, %rax
         inc     %r13
         jmp     L(nok)
-')
  
  L(unnormalized):
         test    un, un
@@ -177,30 +183,42 @@ L(unnormalized):
         dec     un
  L(44):
         bsr     d, %rcx
-       not     %ecx
-       sal     %cl, d
-       sal     %cl, %rbp
-       mov     d, %rdx
-       mov     $-1, %rax
-       not     %rdx
-       div     d                       C FREE rax rdx r9 r10 r11
-       test    un, un
+       not     R32(%rcx)
+       shl     R8(%rcx), d
+       shl     R8(%rcx), %rbp
+
+       push    %rcx
+IFSTD(`        push    %rdi            ')
+IFSTD(`        push    %rsi            ')
+       push    %r8
+IFSTD(`        mov     d, %rdi         ')
+IFDOS(`        mov     d, %rcx         ')
+       CALL(   mpn_invert_limb)
+       pop     %r8
+IFSTD(`        pop     %rsi            ')
+IFSTD(`        pop     %rdi            ')
+       pop     %rcx
+
         mov     %rax, dinv
         mov     %rbp, %rax
+       test    un, un
         je      L(87)
-L(uent):
-       mov     -8(up,un,8), %rbp
-       shr     %cl, %rax
-       shld    %cl, %rbp, %rax
-       sub     $2, un
-       js      L(ulast)
+
+L(uent):dec    un
+       mov     (up,un,8), %rbp
+       neg     R32(%rcx)
+       shr     R8(%rcx), %rbp
+       neg     R32(%rcx)
+       or      %rbp, %rax
+       jmp     L(ent)
  
         ALIGN(16)
-L(uloop):
-       nop
-       mov     (up,un,8), %r10
-       lea     1(%rax), %r11
-       shld    %cl, %r10, %rbp
+L(utop):mov    (up,un,8), %r10
+       shl     R8(%rcx), %rbp
+       neg     R32(%rcx)
+       shr     R8(%rcx), %r10
+       neg     R32(%rcx)
+       or      %r10, %rbp
         mul     dinv
         add     %rbp, %rax
         adc     %r11, %rdx
@@ -211,18 +229,18 @@ L(uloop):
         mov     d, %rax
         add     %rbp, %rax
         cmp     %r11, %rbp
-       cmovb   %rbp, %rax
+       cmovc   %rbp, %rax
         adc     $-1, %r13
         cmp     d, %rax
         jae     L(ufx)
  L(uok):        mov     %r13, (qp)
         sub     $8, qp
+L(ent):        mov     (up,un,8), %rbp
         dec     un
-       mov     %r10, %rbp
-       jns     L(uloop)
-L(ulast):
         lea     1(%rax), %r11
-       sal     %cl, %rbp
+       jns     L(utop)
+
+L(uend):shl    R8(%rcx), %rbp
         mul     dinv
         add     %rbp, %rax
         adc     %r11, %rdx
@@ -233,48 +251,47 @@ L(ulast):
         mov     d, %rax
         add     %rbp, %rax
         cmp     %r11, %rbp
-       cmovb   %rbp, %rax
+       cmovc   %rbp, %rax
         adc     $-1, %r13
         cmp     d, %rax
-       jae     L(93)
-L(69): mov     %r13, (qp)
+       jae     L(efx)
+L(eok):        mov     %r13, (qp)
         sub     $8, qp
         jmp     L(87)
  
  L(ufx):        sub     d, %rax
         inc     %r13
         jmp     L(uok)
-
-L(93): sub     d, %rax
+L(efx):        sub     d, %rax
         inc     %r13
-       jmp     L(69)
+       jmp     L(eok)
  
  L(87): mov     d, %rbp
         neg     %rbp
-       jmp     L(87b)
-
-       ALIGN(16)
-L(floop):                              C                   cycK8  cycP6  cycP4
-       lea     1(%rax), %r11           C
-       mul     dinv                    C                    0,12
-       add     %r11, %rdx              C                    5
-       mov     %rax, %r11              C                    4
-       mov     %rdx, %r13              C                    6
-       imul    %rbp, %rdx              C                    6
+       jmp     L(fent)
+
+       ALIGN(16)                       C           K8-K10  P6-CNR P6-NHM  P4
+L(ftop):mul    dinv                    C             0,12   0,17   0,17
+       add     %r11, %rdx              C             5      8     10
+       mov     %rax, %r11              C             4      8      3
+       mov     %rdx, %r13              C             6      9     11
+       imul    %rbp, %rdx              C             6      9     11
         mov     d, %rax                 C
-       add     %rdx, %rax              C                    10
-       cmp     %r11, %rdx              C                    10
-       cmovb   %rdx, %rax              C                    11
+       add     %rdx, %rax              C            10     14     14
+       cmp     %r11, %rdx              C            10     14     14
+       cmovc   %rdx, %rax              C            11     15     15
         adc     $-1, %r13               C
         mov     %r13, (qp)              C
         sub     $8, qp                  C
-L(87b):        dec     fn                      C
-       jns     L(floop)                C
+L(fent):lea    1(%rax), %r11           C
+       dec     fn                      C
+       jns     L(ftop)                 C
  
-       shr     %cl, %rax
+       shr     R8(%rcx), %rax
  L(ret):        pop     %rbx
         pop     %rbp
         pop     %r12
         pop     %r13
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/divrem_2.asm b/mpn/x86_64/divrem_2.asm

index 2b3a34c48cde238ce7e6d6565e67ce787cbb639a..15914804f47caca9b62b3b0fd275757239a38cfe 100644 (file)
--- a/mpn/x86_64/divrem_2.asm
+++ b/mpn/x86_64/divrem_2.asm
@@ -20,19 +20,14 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C              norm    frac
-C K8           20      20
-C P4           73      73
-C P6 core2     37      37
-C P6 corei7    33      33
-
-C TODO
-C  * Perhaps compute the inverse without relying on divq?  Could either use
-C    Newton's method and mulq, or perhaps the faster fdiv.
-C  * The loop has not been carefully tuned, nor analysed for critical path
-C    length.  It seems that 20 c/l is a bit long, compared to the 13 c/l for
-C    mpn_divrem_1.
-C  * Clean up.  This code is really crude.
+C              c/l
+C AMD K8,K9    18
+C AMD K10      18
+C Intel P4     68
+C Intel core2  34
+C Intel corei  30.5
+C Intel atom   73
+C VIA nano     33
  
  
  C INPUT PARAMETERS
@@ -42,168 +37,117 @@ define(`up_param',        `%rdx')
  define(`un_param',     `%rcx')
  define(`dp',           `%r8')
  
-define(`dinv',         `%r9')
-
-
-C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
-C         cnt         qp      d  dinv
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_divrem_2)
-
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
         push    %r15
-       lea     (%rdx,%rcx,8), %rax
         push    %r14
         push    %r13
-       mov     %rsi, %r13
         push    %r12
-       lea     -24(%rax), %r12
+       lea     -24(%rdx,%rcx,8), %r12  C r12 = &up[un-1]
+       mov     %rsi, %r13
         push    %rbp
         mov     %rdi, %rbp
         push    %rbx
-       mov     8(%r8), %r11
-       mov     -8(%rax), %r9
-       mov     (%r8), %r8
-       mov     -16(%rax), %r10
+       mov     8(%r8), %r11            C d1
+       mov     16(%r12), %rbx
+       mov     (%r8), %r8              C d0
+       mov     8(%r12), %r10
+
         xor     R32(%r15), R32(%r15)
-       cmp     %r9, %r11
+       cmp     %rbx, %r11
         ja      L(2)
         setb    %dl
         cmp     %r10, %r8
         setbe   %al
-       orb     %al, %dl
-       jne     L(23)
+       orb     %al, %dl                C "orb" form to placate Sun tools
+       je      L(2)
+       inc     R32(%r15)
+       sub     %r8, %r10
+       sbb     %r11, %rbx
  L(2):
-       lea     -3(%rcx,%r13), %rbx     C un + fn - 3
-       test    %rbx, %rbx
-       js      L(6)
-       mov     %r11, %rdx
-       mov     $-1, %rax
-       not     %rdx
-       div     %r11
+       lea     -3(%rcx,%r13), %r14     C un + fn - 3
+       test    %r14, %r14
+       js      L(end)
+
+       push    %r8
+       push    %r10
+       push    %r11
+IFSTD(`        mov     %r11, %rdi      ')
+IFDOS(`        mov     %r11, %rcx      ')
+       CALL(   mpn_invert_limb)
+       pop     %r11
+       pop     %r10
+       pop     %r8
+
         mov     %r11, %rdx
         mov     %rax, %rdi
         imul    %rax, %rdx
-       mov     %rdx, %r14
+       mov     %rdx, %r9
         mul     %r8
-       mov     %rdx, %rcx
-       mov     $-1, %rdx
-       add     %r8, %r14
-       adc     $0, %rdx
-       add     %rcx, %r14
-       adc     $0, %rdx
-       js      L(8)
-L(18):
-       dec     %rdi
-       sub     %r11, %r14
-       sbb     $0, %rdx
-       jns     L(18)
-L(8):
-
-C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
-C n2      un      n1 dinv qp  d0        d1  up  fn      msl
-C     n2  un     -d1      n1    dinv XX              XX
-
-ifdef(`NEW',`
-       lea     (%rbp,%rbx,8), %rbp
-       mov     %rbx, %rcx              C un
-       mov     %r9, %rbx
-       mov     %rdi, %r9               C di
-       mov     %r10, %r14
+       xor     R32(%rcx), R32(%rcx)
+       add     %r8, %r9
+       adc     $-1, %rcx
+       add     %rdx, %r9
+       adc     $0, %rcx
+       js      2f
+1:     dec     %rdi
+       sub     %r11, %r9
+       sbb     $0, %rcx
+       jns     1b
+2:
+
+       lea     (%rbp,%r14,8), %rbp
         mov     %r11, %rsi
         neg     %rsi                    C -d1
+
+C rax rbx rcx rdx rsi rdi  rbp r8 r9 r10 r11 r12 r13 r14 r15
+C     n2  un      -d1 dinv qp  d0 q0     d1  up  fn      msl
+
         ALIGN(16)
-L(loop):
-       mov     %r9, %rax               C di            ncp
-       mul     %rbx                    C               0, 18
-       add     %r14, %rax              C               4
-       mov     %rax, %r10              C q0            5
+L(top):        mov     %rdi, %rax              C di            ncp
+       mul     %rbx                    C               0, 17
+       mov     %r10, %rcx              C
+       add     %rax, %rcx              C               4
         adc     %rbx, %rdx              C               5
-       mov     %rdx, %rdi              C q             6
+       mov     %rdx, %r9               C q             6
         imul    %rsi, %rdx              C               6
         mov     %r8, %rax               C               ncp
-       lea     (%rdx, %r14), %rbx      C n1 -= ...     7
-       mul     %rdi                    C               7
-       xor     R32(%r14), R32(%r14)    C
-       cmp     %rcx, %r13              C
+       lea     (%rdx, %r10), %rbx      C n1 -= ...     10
+       xor     R32(%r10), R32(%r10)    C
+       mul     %r9                     C               7
+       cmp     %r14, %r13              C
         jg      L(19)                   C
-       mov     (%r12), %r14            C
+       mov     (%r12), %r10            C
         sub     $8, %r12                C
-L(19): sub     %r8, %r14               C               ncp
-       sbb     %r11, %rbx              C               9
-       sub     %rax, %r14              C               11
+L(19): sub     %r8, %r10               C               ncp
+       sbb     %r11, %rbx              C               11
+       sub     %rax, %r10              C               11
         sbb     %rdx, %rbx              C               12
-       inc     %rdi                    C               7
+       xor     R32(%rax), R32(%rax)    C
         xor     R32(%rdx), R32(%rdx)    C
-       cmp     %r10, %rbx              C               13
-       mov     %r8, %rax               C d0            ncp
-       adc     $-1, %rdx               C mask          14
-       add     %rdx, %rdi              C q--           15
-       and     %rdx, %rax              C d0 or 0       15
-       and     %r11, %rdx              C d1 or 0       15
-       add     %rax, %r14              C               16
+       cmp     %rcx, %rbx              C               13
+       cmovnc  %r8, %rax               C               14
+       cmovnc  %r11, %rdx              C               14
+       adc     $0, %r9                 C adjust q      14
+       nop
+       add     %rax, %r10              C               15
         adc     %rdx, %rbx              C               16
-       cmp     %r11, %rbx              C               17
+       cmp     %r11, %rbx              C
         jae     L(fix)                  C
-L(bck):        mov     %rdi, (%rbp)            C
-       sub     $8, %rbp                C
-       dec     %rcx
-       jns     L(loop)
-
-       mov     %r14, %r10
-       mov     %rbx, %r9
-',`
-       lea     (%rbp,%rbx,8), %rbp
-       mov     %rbx, %rcx
-       mov     %r9, %rax
-       mov     %r10, %rsi
-       ALIGN(16)
-L(loop):
-       mov     %rax, %r14              C               0, 19
-       mul     %rdi                    C               0
-       mov     %r11, %r9               C               1
-       add     %rsi, %rax              C               4
-       mov     %rax, %rbx              C q0            5
-       adc     %r14, %rdx              C q             5
-       lea     1(%rdx), %r10           C               6
-       mov     %rdx, %rax              C               6
-       imul    %rdx, %r9               C               6
-       sub     %r9, %rsi               C               10
-       xor     R32(%r9), R32(%r9)      C
-       mul     %r8                     C               7
-       cmp     %rcx, %r13              C
-       jg      L(13)                   C
-       mov     (%r12), %r9             C
-       sub     $8, %r12                C
-L(13): sub     %r8, %r9                C               ncp
-       sbb     %r11, %rsi              C               11
-       sub     %rax, %r9               C               11
-       sbb     %rdx, %rsi              C               12
-       cmp     %rbx, %rsi              C               13
-       sbb     %rax, %rax              C               14
-       not     %rax                    C               15
-       add     %rax, %r10              C               16
-       mov     %r8, %rbx               C               ncp
-       and     %rax, %rbx              C               16
-       and     %r11, %rax              C               16
-       add     %rbx, %r9               C               17
-       adc     %rsi, %rax              C               18
-       cmp     %rax, %r11              C               19
-       jbe     L(fix)                  C
-L(bck):        mov     %r10, (%rbp)            C
+L(bck):        mov     %r9, (%rbp)             C
         sub     $8, %rbp                C
-       mov     %r9, %rsi               C               18
-       dec     %rcx
-       jns     L(loop)
-
-       mov     %rsi, %r10
-       mov     %rax, %r9
-')
-L(6):
-       mov     %r10, 8(%r12)
-       mov     %r9, 16(%r12)
+       dec     %r14
+       jns     L(top)
+
+L(end):        mov     %r10, 8(%r12)
+       mov     %rbx, 16(%r12)
         pop     %rbx
         pop     %rbp
         pop     %r12
@@ -211,30 +155,16 @@ L(6):
         pop     %r14
         mov     %r15, %rax
         pop     %r15
+       FUNC_EXIT()
         ret
  
-L(23): inc     R32(%r15)
-       sub     %r8, %r10
-       sbb     %r11, %r9
-       jmp     L(2)
-
-ifdef(`NEW',`
  L(fix):        seta    %dl
-       cmp     %r8, %r14
+       cmp     %r8, %r10
         setae   %al
-       orb     %dl, %al
+       orb     %dl, %al                C "orb" form to placate Sun tools
         je      L(bck)
-       inc     %rdi
-       sub     %r8, %r14
+       inc     %r9
+       sub     %r8, %r10
         sbb     %r11, %rbx
         jmp     L(bck)
-',`
-L(fix):        jb      L(88)
-       cmp     %r8, %r9
-       jb      L(bck)
-L(88): inc     %r10
-       sub     %r8, %r9
-       sbb     %r11, %rax
-       jmp     L(bck)
-')
  EPILOGUE()
diff --git a/mpn/x86_64/dos64.m4 b/mpn/x86_64/dos64.m4

new file mode 100644 (file)

index 0000000..6263f85
--- /dev/null
+++ b/mpn/x86_64/dos64.m4
@@ -0,0 +1,82 @@
+divert(-1)
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+define(`HOST_DOS64')
+
+
+dnl  On DOS64 we always generate position-independent-code
+dnl
+
+define(`PIC')
+
+
+define(`LEA',`
+       lea     $1(%rip), $2
+')
+
+
+dnl  Usage: JUMPTABSECT
+
+define(`JUMPTABSECT', `RODATA')
+
+
+dnl  Usage: JMPENT(targlabel,tablabel)
+
+define(`JMPENT', `.long        $1-$2')
+
+
+dnl  Usage: FUNC_ENTRY(nregparmas)
+dnl  Usage: FUNC_EXIT()
+
+dnl  FUNC_ENTRY and FUNC_EXIT provide an easy path for adoption of standard
+dnl  ABI assembly to the DOS64 ABI.
+
+define(`FUNC_ENTRY',
+       `push   %rdi
+       push    %rsi
+       mov     %rcx, %rdi
+ifelse(eval($1>=2),1,`dnl
+       mov     %rdx, %rsi
+ifelse(eval($1>=3),1,`dnl
+       mov     %r8, %rdx
+ifelse(eval($1>=4),1,`dnl
+       mov     %r9, %rcx
+')')')')
+
+define(`FUNC_EXIT',
+       `pop    %rsi
+       pop     %rdi')
+
+
+dnl  Target ABI macros.  For DOS64 we override the defaults.
+
+define(`IFDOS',   `$1')
+define(`IFSTD',   `')
+define(`IFELF',   `')
+
+
+dnl  Usage: PROTECT(symbol)
+dnl
+dnl  Used for private GMP symbols that should never be overridden by users.
+dnl  This can save reloc entries and improve shlib sharing as well as
+dnl  application startup times
+
+define(`PROTECT',  `')
+
+
+divert`'dnl
diff --git a/mpn/x86_64/fastsse/README b/mpn/x86_64/fastsse/README

new file mode 100644 (file)

index 0000000..8399efd
--- /dev/null
+++ b/mpn/x86_64/fastsse/README
@@ -0,0 +1,20 @@
+This directory contains code for x86-64 processors with fast
+implementations of SSE operations, hence the name "fastsse".
+
+Current processors that might benefit from this code are:
+
+  AMD K10
+  AMD Bulldozer
+  Intel Nocona
+  Intel Nehalem/Westmere
+  Intel Sandybridge/Ivybridge
+  VIA Nano
+
+Current processors that do not benefit from this code are:
+
+  AMD K8
+  AMD Bobcat
+  Intel Atom
+
+Intel Conroe/Penryn is a border case; its handling of non-aligned
+128-bit memory operands is poor.
diff --git a/mpn/x86_64/fastsse/com.asm b/mpn/x86_64/fastsse/com.asm

new file mode 100644 (file)

index 0000000..775b1c9
--- /dev/null
+++ b/mpn/x86_64/fastsse/com.asm
@@ -0,0 +1,150 @@
+dnl  AMD64 mpn_com optimised for CPUs with fast SSE.
+
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb     cycles/limb     cycles/limb      good
+C              aligned       unaligned       best seen      for cpu?
+C AMD K8,K9     2.0             2.0                            N
+C AMD K10       0.85            1.3                            Y/N
+C AMD bd1       1.40            1.40                           Y
+C AMD bobcat    3.1             3.1                            N
+C Intel P4      2.28            illop                          Y
+C Intel core2   1.02            1.02                           N
+C Intel NHM     0.53            0.68                           Y
+C Intel SBR     0.51            0.75                           Y
+C Intel atom    3.68            3.68                           N
+C VIA nano      1.17            5.09                           Y/N
+
+C We try to do as many 16-byte operations as possible.  The top-most and
+C bottom-most writes might need 8-byte operations.  We can always write using
+C aligned 16-byte operations, we read with both aligned and unaligned 16-byte
+C operations.
+
+C Instead of having separate loops for reading aligned and unaligned, we read
+C using MOVDQU.  This seems to work great except for core2; there performance
+C doubles when reading using MOVDQA (for aligned source).  It is unclear how to
+C best handle the unaligned case there.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n',  `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_com)
+       FUNC_ENTRY(3)
+
+       test    n, n
+       jz      L(don)
+
+       pcmpeqb %xmm7, %xmm7            C set to 111...111
+
+       test    $8, R8(rp)              C is rp 16-byte aligned?
+       jz      L(ali)                  C jump if rp aligned
+       mov     (up), %rax
+       lea     8(up), up
+       not     %rax
+       mov     %rax, (rp)
+       lea     8(rp), rp
+       dec     n
+
+       sub     $14, n
+       jc      L(sma)
+
+       ALIGN(16)
+L(top):        movdqu  (up), %xmm0
+       movdqu  16(up), %xmm1
+       movdqu  32(up), %xmm2
+       movdqu  48(up), %xmm3
+       movdqu  64(up), %xmm4
+       movdqu  80(up), %xmm5
+       movdqu  96(up), %xmm6
+       lea     112(up), up
+       pxor    %xmm7, %xmm0
+       pxor    %xmm7, %xmm1
+       pxor    %xmm7, %xmm2
+       pxor    %xmm7, %xmm3
+       pxor    %xmm7, %xmm4
+       pxor    %xmm7, %xmm5
+       pxor    %xmm7, %xmm6
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, 16(rp)
+       movdqa  %xmm2, 32(rp)
+       movdqa  %xmm3, 48(rp)
+       movdqa  %xmm4, 64(rp)
+       movdqa  %xmm5, 80(rp)
+       movdqa  %xmm6, 96(rp)
+       lea     112(rp), rp
+L(ali):        sub     $14, n
+       jnc     L(top)
+
+L(sma):        add     $14, n
+       test    $8, R8(n)
+       jz      1f
+       movdqu  (up), %xmm0
+       movdqu  16(up), %xmm1
+       movdqu  32(up), %xmm2
+       movdqu  48(up), %xmm3
+       lea     64(up), up
+       pxor    %xmm7, %xmm0
+       pxor    %xmm7, %xmm1
+       pxor    %xmm7, %xmm2
+       pxor    %xmm7, %xmm3
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, 16(rp)
+       movdqa  %xmm2, 32(rp)
+       movdqa  %xmm3, 48(rp)
+       lea     64(rp), rp
+1:
+       test    $4, R8(n)
+       jz      1f
+       movdqu  (up), %xmm0
+       movdqu  16(up), %xmm1
+       lea     32(up), up
+       pxor    %xmm7, %xmm0
+       pxor    %xmm7, %xmm1
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, 16(rp)
+       lea     32(rp), rp
+1:
+       test    $2, R8(n)
+       jz      1f
+       movdqu  (up), %xmm0
+       lea     16(up), up
+       pxor    %xmm7, %xmm0
+       movdqa  %xmm0, (rp)
+       lea     16(rp), rp
+1:
+       test    $1, R8(n)
+       jz      1f
+       mov     (up), %rax
+       not     %rax
+       mov     %rax, (rp)
+1:
+L(don):        FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fastsse/copyd-palignr.asm b/mpn/x86_64/fastsse/copyd-palignr.asm

new file mode 100644 (file)

index 0000000..0b058ad
--- /dev/null
+++ b/mpn/x86_64/fastsse/copyd-palignr.asm
@@ -0,0 +1,235 @@
+dnl  AMD64 mpn_copyd optimised for CPUs with fast SSE copying and SSSE3.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb     cycles/limb     cycles/limb      good
+C              aligned       unaligned       best seen      for cpu?
+C AMD K8,K9     2.0             illop          1.0/1.0         N
+C AMD K10       0.85            illop                          Y/N
+C AMD bd1       1.39            1.40                           Y
+C AMD bobcat    1.97            8.35           1.5/1.5         N
+C Intel P4      2.26            illop                          Y/N
+C Intel core2   0.52           0.68-0.80       opt/0.68        Y
+C Intel NHM     0.52            0.64           opt/opt         Y
+C Intel SBR     0.51            0.54           opt/0.51        Y
+C Intel atom    1.16            1.66           opt/opt         Y
+C VIA nano      1.09            1.07           opt/opt         Y
+
+C We use only 16-byte operations, except for unaligned top-most and bottom-most
+C limbs.  We use the SSSE3 palignr instruction when rp - up = 8 (mod 16).
+C
+C For operands of < COPYD_SSE_THRESHOLD limbs, we use a plain 64-bit loop,
+C taken from the x86_64 default code.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n',  `%rdx')
+
+C There are three instructions for loading an aligned 128-bit quantity.  We use
+C movaps, since it has the shortest coding.
+define(`movdqa', ``movaps'')
+
+ifdef(`COPYD_SSE_THRESHOLD',`',`define(`COPYD_SSE_THRESHOLD', 7)')
+
+ASM_START()
+       TEXT
+       ALIGN(64)
+PROLOGUE(mpn_copyd)
+       FUNC_ENTRY(3)
+
+       lea     -8(up,n,8), up
+       lea     -8(rp,n,8), rp
+
+       cmp     $COPYD_SSE_THRESHOLD, n
+       jbe     L(bc)
+
+       bt      $3, R32(rp)             C is rp 16-byte aligned?
+       jc      L(rp_aligned)           C jump if rp aligned
+
+       mov     (up), %rax              C copy one limb
+       mov     %rax, (rp)
+       lea     -8(up), up
+       lea     -8(rp), rp
+       dec     n
+
+L(rp_aligned):
+       bt      $3, R32(up)
+       jnc     L(uent)
+
+ifelse(eval(COPYD_SSE_THRESHOLD >= 8),1,
+`      sub     $8, n',
+`      jmp     L(am)')
+
+       ALIGN(16)
+L(atop):movdqa -8(up), %xmm0
+       movdqa  -24(up), %xmm1
+       movdqa  -40(up), %xmm2
+       movdqa  -56(up), %xmm3
+       lea     -64(up), up
+       movdqa  %xmm0, -8(rp)
+       movdqa  %xmm1, -24(rp)
+       movdqa  %xmm2, -40(rp)
+       movdqa  %xmm3, -56(rp)
+       lea     -64(rp), rp
+L(am): sub     $8, n
+       jnc     L(atop)
+
+       bt      $2, R32(n)
+       jnc     1f
+       movdqa  -8(up), %xmm0
+       movdqa  -24(up), %xmm1
+       lea     -32(up), up
+       movdqa  %xmm0, -8(rp)
+       movdqa  %xmm1, -24(rp)
+       lea     -32(rp), rp
+
+1:     bt      $1, R32(n)
+       jnc     1f
+       movdqa  -8(up), %xmm0
+       lea     -16(up), up
+       movdqa  %xmm0, -8(rp)
+       lea     -16(rp), rp
+
+1:     bt      $0, n
+       jnc     1f
+       mov     (up), %r8
+       mov     %r8, (rp)
+
+1:     FUNC_EXIT()
+       ret
+
+L(uent):sub    $16, n
+       movdqa  (up), %xmm0
+       jc      L(uend)
+
+       ALIGN(16)
+L(utop):sub    $16, n
+       movdqa  -16(up), %xmm1
+       palignr($8, %xmm1, %xmm0)
+       movdqa  %xmm0, -8(rp)
+       movdqa  -32(up), %xmm2
+       palignr($8, %xmm2, %xmm1)
+       movdqa  %xmm1, -24(rp)
+       movdqa  -48(up), %xmm3
+       palignr($8, %xmm3, %xmm2)
+       movdqa  %xmm2, -40(rp)
+       movdqa  -64(up), %xmm0
+       palignr($8, %xmm0, %xmm3)
+       movdqa  %xmm3, -56(rp)
+       movdqa  -80(up), %xmm1
+       palignr($8, %xmm1, %xmm0)
+       movdqa  %xmm0, -72(rp)
+       movdqa  -96(up), %xmm2
+       palignr($8, %xmm2, %xmm1)
+       movdqa  %xmm1, -88(rp)
+       movdqa  -112(up), %xmm3
+       palignr($8, %xmm3, %xmm2)
+       movdqa  %xmm2, -104(rp)
+       movdqa  -128(up), %xmm0
+       palignr($8, %xmm0, %xmm3)
+       movdqa  %xmm3, -120(rp)
+       lea     -128(up), up
+       lea     -128(rp), rp
+       jnc     L(utop)
+
+L(uend):bt     $3, R32(n)
+       jnc     1f
+       movdqa  -16(up), %xmm1
+       palignr($8, %xmm1, %xmm0)
+       movdqa  %xmm0, -8(rp)
+       movdqa  -32(up), %xmm0
+       palignr($8, %xmm0, %xmm1)
+       movdqa  %xmm1, -24(rp)
+       movdqa  -48(up), %xmm1
+       palignr($8, %xmm1, %xmm0)
+       movdqa  %xmm0, -40(rp)
+       movdqa  -64(up), %xmm0
+       palignr($8, %xmm0, %xmm1)
+       movdqa  %xmm1, -56(rp)
+       lea     -64(up), up
+       lea     -64(rp), rp
+
+1:     bt      $2, R32(n)
+       jnc     1f
+       movdqa  -16(up), %xmm1
+       palignr($8, %xmm1, %xmm0)
+       movdqa  %xmm0, -8(rp)
+       movdqa  -32(up), %xmm0
+       palignr($8, %xmm0, %xmm1)
+       movdqa  %xmm1, -24(rp)
+       lea     -32(up), up
+       lea     -32(rp), rp
+
+1:     bt      $1, R32(n)
+       jnc     1f
+       movdqa  -16(up), %xmm1
+       palignr($8, %xmm1, %xmm0)
+       movdqa  %xmm0, -8(rp)
+       lea     -16(up), up
+       lea     -16(rp), rp
+
+1:     bt      $0, n
+       jnc     1f
+       mov     (up), %r8
+       mov     %r8, (rp)
+
+1:     FUNC_EXIT()
+       ret
+
+C Basecase code.  Needed for good small operands speed, not for
+C correctness as the above code is currently written.
+
+L(bc): sub     $4, R32(n)
+       jc      L(end)
+
+       ALIGN(16)
+L(top):        mov     (up), %r8
+       mov     -8(up), %r9
+       lea     -32(rp), rp
+       mov     -16(up), %r10
+       mov     -24(up), %r11
+       lea     -32(up), up
+       mov     %r8, 32(rp)
+       mov     %r9, 24(rp)
+ifelse(eval(COPYD_SSE_THRESHOLD >= 8),1,
+`      sub     $4, R32(n)')
+       mov     %r10, 16(rp)
+       mov     %r11, 8(rp)
+ifelse(eval(COPYD_SSE_THRESHOLD >= 8),1,
+`      jnc     L(top)')
+
+L(end):        bt      $0, R32(n)
+       jnc     1f
+       mov     (up), %r8
+       mov     %r8, (rp)
+       lea     -8(rp), rp
+       lea     -8(up), up
+1:     bt      $1, R32(n)
+       jnc     1f
+       mov     (up), %r8
+       mov     -8(up), %r9
+       mov     %r8, (rp)
+       mov     %r9, -8(rp)
+1:     FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fastsse/copyd.asm b/mpn/x86_64/fastsse/copyd.asm

new file mode 100644 (file)

index 0000000..c5fd7b3
--- /dev/null
+++ b/mpn/x86_64/fastsse/copyd.asm
@@ -0,0 +1,134 @@
+dnl  AMD64 mpn_copyd optimised for CPUs with fast SSE.
+
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C          cycles/limb           good for cpu?
+C AMD K8,K9
+C AMD K10       0.85                   Y
+C AMD bd1       0.8                    Y
+C AMD bobcat
+C Intel P4      2.28                   Y
+C Intel core2   1
+C Intel NHM     0.5                    Y
+C Intel SBR     0.5                    Y
+C Intel atom
+C VIA nano      1.1                    Y
+
+C We try to do as many 16-byte operations as possible.  The top-most and
+C bottom-most writes might need 8-byte operations.  We can always write using
+C aligned 16-byte operations, we read with both aligned and unaligned 16-byte
+C operations.
+
+C Instead of having separate loops for reading aligned and unaligned, we read
+C using MOVDQU.  This seems to work great except for core2; there performance
+C doubles when reading using MOVDQA (for aligned source).  It is unclear how to
+C best handle the unaligned case there.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n',  `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_copyd)
+       FUNC_ENTRY(3)
+
+       test    n, n
+       jz      L(don)
+
+       lea     -16(rp,n,8), rp
+       lea     -16(up,n,8), up
+
+       test    $8, R8(rp)              C is rp 16-byte aligned?
+       jz      L(ali)                  C jump if rp aligned
+       mov     8(up), %rax
+       lea     -8(up), up
+       mov     %rax, 8(rp)
+       lea     -8(rp), rp
+       dec     n
+
+       sub     $16, n
+       jc      L(sma)
+
+       ALIGN(16)
+L(top):        movdqu  (up), %xmm0
+       movdqu  -16(up), %xmm1
+       movdqu  -32(up), %xmm2
+       movdqu  -48(up), %xmm3
+       movdqu  -64(up), %xmm4
+       movdqu  -80(up), %xmm5
+       movdqu  -96(up), %xmm6
+       movdqu  -112(up), %xmm7
+       lea     -128(up), up
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, -16(rp)
+       movdqa  %xmm2, -32(rp)
+       movdqa  %xmm3, -48(rp)
+       movdqa  %xmm4, -64(rp)
+       movdqa  %xmm5, -80(rp)
+       movdqa  %xmm6, -96(rp)
+       movdqa  %xmm7, -112(rp)
+       lea     -128(rp), rp
+L(ali):        sub     $16, n
+       jnc     L(top)
+
+L(sma):        test    $8, R8(n)
+       jz      1f
+       movdqu  (up), %xmm0
+       movdqu  -16(up), %xmm1
+       movdqu  -32(up), %xmm2
+       movdqu  -48(up), %xmm3
+       lea     -64(up), up
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, -16(rp)
+       movdqa  %xmm2, -32(rp)
+       movdqa  %xmm3, -48(rp)
+       lea     -64(rp), rp
+1:
+       test    $4, R8(n)
+       jz      1f
+       movdqu  (up), %xmm0
+       movdqu  -16(up), %xmm1
+       lea     -32(up), up
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, -16(rp)
+       lea     -32(rp), rp
+1:
+       test    $2, R8(n)
+       jz      1f
+       movdqu  (up), %xmm0
+       lea     -16(up), up
+       movdqa  %xmm0, (rp)
+       lea     -16(rp), rp
+1:
+       test    $1, R8(n)
+       jz      1f
+       mov     8(up), %r8
+       mov     %r8, 8(rp)
+1:
+L(don):        FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fastsse/copyi-palignr.asm b/mpn/x86_64/fastsse/copyi-palignr.asm

new file mode 100644 (file)

index 0000000..d968b5a
--- /dev/null
+++ b/mpn/x86_64/fastsse/copyi-palignr.asm
@@ -0,0 +1,252 @@
+dnl  AMD64 mpn_copyi optimised for CPUs with fast SSE copying and SSSE3.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb     cycles/limb     cycles/limb      good
+C              aligned       unaligned       best seen      for cpu?
+C AMD K8,K9     2.0             illop          1.0/1.0         N
+C AMD K10       0.85            illop                          Y/N
+C AMD bd1       1.39            ? 1.45                         Y/N
+C AMD bobcat    1.97            ? 8.17         1.5/1.5         N
+C Intel P4      2.26            illop                          Y/N
+C Intel core2   0.52            0.82           opt/0.74        Y
+C Intel NHM     0.52            0.65           opt/opt         Y
+C Intel SBR     0.51            0.55           opt/0.51        Y
+C Intel atom    1.16            1.70           opt/opt         Y
+C VIA nano      1.09            1.10           opt/opt         Y
+
+C We use only 16-byte operations, except for unaligned top-most and bottom-most
+C limbs.  We use the SSSE3 palignr instruction when rp - up = 8 (mod 16).  That
+C instruction is better adapted to mpn_copyd's needs, we need to contort the
+C code to use it here.
+C
+C For operands of < COPYI_SSE_THRESHOLD limbs, we use a plain 64-bit loop,
+C taken from the x86_64 default code.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n',  `%rdx')
+
+C There are three instructions for loading an aligned 128-bit quantity.  We use
+C movaps, since it has the shortest coding.
+define(`movdqa', ``movaps'')
+
+ifdef(`COPYI_SSE_THRESHOLD',`',`define(`COPYI_SSE_THRESHOLD', 7)')
+
+ASM_START()
+       TEXT
+       ALIGN(64)
+PROLOGUE(mpn_copyi)
+       FUNC_ENTRY(3)
+
+       cmp     $COPYI_SSE_THRESHOLD, n
+       jbe     L(bc)
+
+       bt      $3, R32(rp)             C is rp 16-byte aligned?
+       jnc     L(rp_aligned)           C jump if rp aligned
+
+       movsq                           C copy one limb
+       dec     n
+
+L(rp_aligned):
+       bt      $3, R32(up)
+       jc      L(uent)
+
+ifelse(eval(COPYI_SSE_THRESHOLD >= 8),1,
+`      sub     $8, n',
+`      jmp     L(am)')
+
+       ALIGN(16)
+L(atop):movdqa 0(up), %xmm0
+       movdqa  16(up), %xmm1
+       movdqa  32(up), %xmm2
+       movdqa  48(up), %xmm3
+       lea     64(up), up
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, 16(rp)
+       movdqa  %xmm2, 32(rp)
+       movdqa  %xmm3, 48(rp)
+       lea     64(rp), rp
+L(am): sub     $8, n
+       jnc     L(atop)
+
+       bt      $2, R32(n)
+       jnc     1f
+       movdqa  (up), %xmm0
+       movdqa  16(up), %xmm1
+       lea     32(up), up
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, 16(rp)
+       lea     32(rp), rp
+
+1:     bt      $1, R32(n)
+       jnc     1f
+       movdqa  (up), %xmm0
+       lea     16(up), up
+       movdqa  %xmm0, (rp)
+       lea     16(rp), rp
+
+1:     bt      $0, n
+       jnc     1f
+       mov     (up), %r8
+       mov     %r8, (rp)
+
+1:     FUNC_EXIT()
+       ret
+
+L(uent):
+C Code handling up - rp = 8 (mod 16)
+
+C FIXME: The code below only handles overlap if it is close to complete, or
+C quite separate: up-rp < 5 or up-up > 15 limbs
+       lea     -40(up), %rax           C 40 = 5 * GMP_LIMB_BYTES
+       sub     rp, %rax
+       cmp     $80, %rax               C 80 = (15-5) * GMP_LIMB_BYTES
+       jbe     L(bc)                   C deflect to plain loop
+
+       sub     $16, n
+       jc      L(uend)
+
+       movdqa  120(up), %xmm3
+
+       sub     $16, n
+       jmp     L(um)
+
+       ALIGN(16)
+L(utop):movdqa 120(up), %xmm3
+       movdqa  %xmm0, -128(rp)
+       sub     $16, n
+L(um): movdqa  104(up), %xmm2
+       palignr($8, %xmm2, %xmm3)
+       movdqa  88(up), %xmm1
+       movdqa  %xmm3, 112(rp)
+       palignr($8, %xmm1, %xmm2)
+       movdqa  72(up), %xmm0
+       movdqa  %xmm2, 96(rp)
+       palignr($8, %xmm0, %xmm1)
+       movdqa  56(up), %xmm3
+       movdqa  %xmm1, 80(rp)
+       palignr($8, %xmm3, %xmm0)
+       movdqa  40(up), %xmm2
+       movdqa  %xmm0, 64(rp)
+       palignr($8, %xmm2, %xmm3)
+       movdqa  24(up), %xmm1
+       movdqa  %xmm3, 48(rp)
+       palignr($8, %xmm1, %xmm2)
+       movdqa  8(up), %xmm0
+       movdqa  %xmm2, 32(rp)
+       palignr($8, %xmm0, %xmm1)
+       movdqa  -8(up), %xmm3
+       movdqa  %xmm1, 16(rp)
+       palignr($8, %xmm3, %xmm0)
+       lea     128(up), up
+       lea     128(rp), rp
+       jnc     L(utop)
+
+       movdqa  %xmm0, -128(rp)
+
+L(uend):bt     $3, R32(n)
+       jnc     1f
+       movdqa  56(up), %xmm3
+       movdqa  40(up), %xmm2
+       palignr($8, %xmm2, %xmm3)
+       movdqa  24(up), %xmm1
+       movdqa  %xmm3, 48(rp)
+       palignr($8, %xmm1, %xmm2)
+       movdqa  8(up), %xmm0
+       movdqa  %xmm2, 32(rp)
+       palignr($8, %xmm0, %xmm1)
+       movdqa  -8(up), %xmm3
+       movdqa  %xmm1, 16(rp)
+       palignr($8, %xmm3, %xmm0)
+       lea     64(up), up
+       movdqa  %xmm0, (rp)
+       lea     64(rp), rp
+
+1:     bt      $2, R32(n)
+       jnc     1f
+       movdqa  24(up), %xmm1
+       movdqa  8(up), %xmm0
+       palignr($8, %xmm0, %xmm1)
+       movdqa  -8(up), %xmm3
+       movdqa  %xmm1, 16(rp)
+       palignr($8, %xmm3, %xmm0)
+       lea     32(up), up
+       movdqa  %xmm0, (rp)
+       lea     32(rp), rp
+
+1:     bt      $1, R32(n)
+       jnc     1f
+       movdqa  8(up), %xmm0
+       movdqa  -8(up), %xmm3
+       palignr($8, %xmm3, %xmm0)
+       lea     16(up), up
+       movdqa  %xmm0, (rp)
+       lea     16(rp), rp
+
+1:     bt      $0, n
+       jnc     1f
+       mov     (up), %r8
+       mov     %r8, (rp)
+
+1:     FUNC_EXIT()
+       ret
+
+C Basecase code.  Needed for good small operands speed, not for
+C correctness as the above code is currently written.
+
+L(bc): lea     -8(rp), rp
+       sub     $4, R32(n)
+       jc      L(end)
+
+       ALIGN(16)
+L(top):        mov     (up), %r8
+       mov     8(up), %r9
+       lea     32(rp), rp
+       mov     16(up), %r10
+       mov     24(up), %r11
+       lea     32(up), up
+       mov     %r8, -24(rp)
+       mov     %r9, -16(rp)
+ifelse(eval(1 || COPYI_SSE_THRESHOLD >= 8),1,
+`      sub     $4, R32(n)')
+       mov     %r10, -8(rp)
+       mov     %r11, (rp)
+ifelse(eval(1 || COPYI_SSE_THRESHOLD >= 8),1,
+`      jnc     L(top)')
+
+L(end):        bt      $0, R32(n)
+       jnc     1f
+       mov     (up), %r8
+       mov     %r8, 8(rp)
+       lea     8(rp), rp
+       lea     8(up), up
+1:     bt      $1, R32(n)
+       jnc     1f
+       mov     (up), %r8
+       mov     8(up), %r9
+       mov     %r8, 8(rp)
+       mov     %r9, 16(rp)
+1:     FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fastsse/copyi.asm b/mpn/x86_64/fastsse/copyi.asm

new file mode 100644 (file)

index 0000000..60c5f9a
--- /dev/null
+++ b/mpn/x86_64/fastsse/copyi.asm
@@ -0,0 +1,153 @@
+dnl  AMD64 mpn_copyi optimised for CPUs with fast SSE.
+
+dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C          cycles/limb           good for cpu?
+C AMD K8,K9
+C AMD K10       0.85    1.64           Y/N
+C AMD bd1       1.4     1.4            Y
+C AMD bobcat
+C Intel P4      2.3     2.3            Y
+C Intel core2   1.0     1.0
+C Intel NHM     0.5     0.67           Y
+C Intel SBR     0.5     0.75           Y
+C Intel atom
+C VIA nano      1.16    5.16           Y/N
+
+C We try to do as many 16-byte operations as possible.  The top-most and
+C bottom-most writes might need 8-byte operations.  We can always write using
+C aligned 16-byte operations, we read with both aligned and unaligned 16-byte
+C operations.
+
+C Instead of having separate loops for reading aligned and unaligned, we read
+C using MOVDQU.  This seems to work great except for core2; there performance
+C doubles when reading using MOVDQA (for aligned source).  It is unclear how to
+C best handle the unaligned case there.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n',  `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+dnl define(`movdqu', lddqu)
+
+ASM_START()
+       TEXT
+       ALIGN(64)
+PROLOGUE(mpn_copyi)
+       FUNC_ENTRY(3)
+
+       cmp     $3, n
+       jc      L(bc)
+
+       test    $8, R8(rp)              C is rp 16-byte aligned?
+       jz      L(ali)                  C jump if rp aligned
+       movsq                           C copy single limb
+       dec     n
+
+       sub     $16, n
+       jc      L(sma)
+
+       ALIGN(16)
+L(top):        movdqu  (up), %xmm0
+       movdqu  16(up), %xmm1
+       movdqu  32(up), %xmm2
+       movdqu  48(up), %xmm3
+       movdqu  64(up), %xmm4
+       movdqu  80(up), %xmm5
+       movdqu  96(up), %xmm6
+       movdqu  112(up), %xmm7
+       lea     128(up), up
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, 16(rp)
+       movdqa  %xmm2, 32(rp)
+       movdqa  %xmm3, 48(rp)
+       movdqa  %xmm4, 64(rp)
+       movdqa  %xmm5, 80(rp)
+       movdqa  %xmm6, 96(rp)
+       movdqa  %xmm7, 112(rp)
+       lea     128(rp), rp
+L(ali):        sub     $16, n
+       jnc     L(top)
+
+L(sma):        test    $8, R8(n)
+       jz      1f
+       movdqu  (up), %xmm0
+       movdqu  16(up), %xmm1
+       movdqu  32(up), %xmm2
+       movdqu  48(up), %xmm3
+       lea     64(up), up
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, 16(rp)
+       movdqa  %xmm2, 32(rp)
+       movdqa  %xmm3, 48(rp)
+       lea     64(rp), rp
+1:
+       test    $4, R8(n)
+       jz      1f
+       movdqu  (up), %xmm0
+       movdqu  16(up), %xmm1
+       lea     32(up), up
+       movdqa  %xmm0, (rp)
+       movdqa  %xmm1, 16(rp)
+       lea     32(rp), rp
+1:
+       test    $2, R8(n)
+       jz      1f
+       movdqu  (up), %xmm0
+       lea     16(up), up
+       movdqa  %xmm0, (rp)
+       lea     16(rp), rp
+       ALIGN(16)
+1:
+L(end):        bt      $0, n
+       jnc     1f
+       mov     (up), %r8
+       mov     %r8, (rp)
+1:
+       FUNC_EXIT()
+       ret
+
+C Basecase code.  Needed for good small operands speed, not for
+C correctness as the above code is currently written.
+
+L(bc): sub     $2, n
+       jc      L(end)
+       ALIGN(16)
+1:     mov     (up), %rax
+       mov     8(up), %rcx
+       lea     16(up), up
+       mov     %rax, (rp)
+       mov     %rcx, 8(rp)
+       lea     16(rp), rp
+       sub     $2, n
+       jnc     1b
+
+       bt      $0, n
+       jnc     L(ret)
+       mov     (up), %rax
+       mov     %rax, (rp)
+L(ret):        FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fastsse/lshift-movdqu2.asm b/mpn/x86_64/fastsse/lshift-movdqu2.asm

new file mode 100644 (file)

index 0000000..ea34b39
--- /dev/null
+++ b/mpn/x86_64/fastsse/lshift-movdqu2.asm
@@ -0,0 +1,171 @@
+dnl  AMD64 mpn_lshift optimised for CPUs with fast SSE including fast movdqu.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb     cycles/limb     cycles/limb    good
+C              aligned       unaligned       best seen    for cpu?
+C AMD K8,K9     3               3               2.35     no, use shl/shr
+C AMD K10       1.5-1.8         1.5-1.8         1.33     yes
+C AMD bd1       1.7-1.9         1.7-1.9         1.33     yes
+C AMD bobcat    3.17            3.17                     yes, bad for n < 20
+C Intel P4      4.67            4.67            2.7      no, slow movdqu
+C Intel core2   2.15            2.15            1.25     no, use shld/shrd
+C Intel NHM     1.66            1.66            1.25     no, use shld/shrd
+C Intel SBR     1.3             1.3             1.25     yes, bad for n = 4-6
+C Intel atom   11.7            11.7             4.5      no
+C VIA nano      5.7             5.95            2.0      no, slow movdqu
+
+C We try to do as many aligned 16-byte operations as possible.  The top-most
+C and bottom-most writes might need 8-byte operations.
+C
+C This variant rely on fast load movdqu, and uses it even for aligned operands,
+C in order to avoid the need for two separate loops.
+C
+C TODO
+C  * Could 2-limb wind-down code be simplified?
+C  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts
+C    for other affected CPUs.
+
+C INPUT PARAMETERS
+define(`rp',  `%rdi')
+define(`ap',  `%rsi')
+define(`n',   `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(64)
+PROLOGUE(mpn_lshift)
+       FUNC_ENTRY(4)
+       movd    R32(%rcx), %xmm4
+       mov     $64, R32(%rax)
+       sub     R32(%rcx), R32(%rax)
+       movd    R32(%rax), %xmm5
+
+       neg     R32(%rcx)
+       mov     -8(ap,n,8), %rax
+       shr     R8(%rcx), %rax
+
+       cmp     $3, n
+       jle     L(bc)
+
+       lea     (rp,n,8), R32(%rcx)
+       bt      $3, R32(%rcx)
+       jnc     L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+       movq    -8(ap,n,8), %xmm0
+       movq    -16(ap,n,8), %xmm1
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       movq    %xmm0, -8(rp,n,8)
+       dec     n
+
+L(rp_aligned):
+       lea     1(n), %r8d
+
+       and     $6, R32(%r8)
+       jz      L(ba0)
+       cmp     $4, R32(%r8)
+       jz      L(ba4)
+       jc      L(ba2)
+L(ba6):        add     $-4, n
+       jmp     L(i56)
+L(ba0):        add     $-6, n
+       jmp     L(i70)
+L(ba4):        add     $-2, n
+       jmp     L(i34)
+L(ba2):        add     $-8, n
+       jle     L(end)
+
+       ALIGN(16)
+L(top):        movdqu  40(ap,n,8), %xmm1
+       movdqu  48(ap,n,8), %xmm0
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, 48(rp,n,8)
+L(i70):
+       movdqu  24(ap,n,8), %xmm1
+       movdqu  32(ap,n,8), %xmm0
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, 32(rp,n,8)
+L(i56):
+       movdqu  8(ap,n,8), %xmm1
+       movdqu  16(ap,n,8), %xmm0
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, 16(rp,n,8)
+L(i34):
+       movdqu  -8(ap,n,8), %xmm1
+       movdqu  (ap,n,8), %xmm0
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, (rp,n,8)
+       sub     $8, n
+       jg      L(top)
+
+L(end):        bt      $0, R32(n)
+       jc      L(end8)
+
+       movdqu  (ap), %xmm1
+       pxor    %xmm0, %xmm0
+       punpcklqdq  %xmm1, %xmm0
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, (rp)
+       FUNC_EXIT()
+       ret
+
+C Basecase
+       ALIGN(16)
+L(bc): dec     R32(n)
+       jz      L(end8)
+
+       movq    (ap,n,8), %xmm1
+       movq    -8(ap,n,8), %xmm0
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       movq    %xmm0, (rp,n,8)
+       sub     $2, R32(n)
+       jl      L(end8)
+       movq    8(ap), %xmm1
+       movq    (ap), %xmm0
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       movq    %xmm0, 8(rp)
+
+L(end8):movq   (ap), %xmm0
+       psllq   %xmm4, %xmm0
+       movq    %xmm0, (rp)
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fastsse/lshift.asm b/mpn/x86_64/fastsse/lshift.asm

new file mode 100644 (file)

index 0000000..d76241e
--- /dev/null
+++ b/mpn/x86_64/fastsse/lshift.asm
@@ -0,0 +1,158 @@
+dnl  AMD64 mpn_lshift optimised for CPUs with fast SSE.
+
+dnl  Contributed to the GNU project by David Harvey and Torbjorn Granlund.
+
+dnl  Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb             cycles/limb              good
+C          16-byte aligned         16-byte unaligned       for cpu?
+C AMD K8,K9     ?                       ?
+C AMD K10       1.68  (1.45)            1.75  (1.49)           Y
+C AMD bd1       1.82  (1.75)            1.82  (1.75)           Y
+C AMD bobcat    4                       4
+C Intel P4      3     (2.7)             3     (2.7)            Y
+C Intel core2   2.05  (1.67)            2.55  (1.75)
+C Intel NHM     2.05  (1.75)            2.09  (2)
+C Intel SBR     1.5   (1.3125)          1.5   (1.4375)         Y
+C Intel atom    ?                       ?
+C VIA nano      2.25  (2)               2.5   (2)              Y
+
+C We try to do as many 16-byte operations as possible.  The top-most and
+C bottom-most writes might need 8-byte operations.
+
+C There are two inner-loops, one for when rp = ap (mod 16) and one when this is
+C not true.  The aligned case reads 16+8 bytes, the unaligned case reads
+C 16+8+X bytes, where X is 8 or 16 depending on how punpcklqdq is implemented.
+
+C This is not yet great code:
+C   (1) The unaligned case makes many reads.
+C   (2) We should do some unrolling, at least 2-way.
+C With 2-way unrolling but no scheduling we reach 1.5 c/l on K10 and 2 c/l on
+C Nano.
+
+C INPUT PARAMETERS
+define(`rp',  `%rdi')
+define(`ap',  `%rsi')
+define(`n',   `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(64)
+PROLOGUE(mpn_lshift)
+       movd    R32(%rcx), %xmm4
+       mov     $64, R32(%rax)
+       sub     R32(%rcx), R32(%rax)
+       movd    R32(%rax), %xmm5
+
+       neg     R32(%rcx)
+       mov     -8(ap,n,8), %rax
+       shr     R8(%rcx), %rax
+
+       cmp     $2, n
+       jle     L(le2)
+
+       lea     (rp,n,8), R32(%rcx)
+       test    $8, R8(%rcx)
+       je      L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+       movq    -8(ap,n,8), %xmm0
+       movq    -16(ap,n,8), %xmm1
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       movq    %xmm0, -8(rp,n,8)
+       dec     n
+
+L(rp_aligned):
+       lea     (ap,n,8), R32(%rcx)
+       test    $8, R8(%rcx)
+       je      L(aent)
+       jmp     L(uent)
+C *****************************************************************************
+
+C Handle the case when ap != rp (mod 16).
+
+       ALIGN(16)
+L(utop):movdqa -8(ap,n,8), %xmm0
+       movq    (ap,n,8), %xmm1
+       punpcklqdq  8(ap,n,8), %xmm1
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, (rp,n,8)
+L(uent):sub    $2, n
+       ja      L(utop)
+
+       jne     L(end8)
+
+       movq    (ap), %xmm1
+       pxor    %xmm0, %xmm0
+       punpcklqdq  %xmm1, %xmm0
+       punpcklqdq  8(ap), %xmm1
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, (rp)
+       ret
+C *****************************************************************************
+
+C Handle the case when ap = rp (mod 16).
+
+       ALIGN(16)
+L(atop):movdqa (ap,n,8), %xmm0         C xmm0 = B*ap[n-1] + ap[n-2]
+       movq    -8(ap,n,8), %xmm1       C xmm1 = ap[n-3]
+       punpcklqdq  %xmm0, %xmm1        C xmm1 = B*ap[n-2] + ap[n-3]
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, (rp,n,8)
+L(aent):
+       sub     $2, n
+       ja      L(atop)
+       jne     L(end8)
+
+       movdqa  (ap), %xmm1
+       pxor    %xmm0, %xmm0
+       punpcklqdq  %xmm1, %xmm0
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, (rp)
+       ret
+C *****************************************************************************
+
+       ALIGN(16)
+L(le2):        jne     L(end8)
+
+       movq    8(ap), %xmm0
+       movq    (ap), %xmm1
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       movq    %xmm0, 8(rp)
+
+L(end8):movq   (ap), %xmm0
+       psllq   %xmm4, %xmm0
+       movq    %xmm0, (rp)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fastsse/lshiftc-movdqu2.asm b/mpn/x86_64/fastsse/lshiftc-movdqu2.asm

new file mode 100644 (file)

index 0000000..7e816ac
--- /dev/null
+++ b/mpn/x86_64/fastsse/lshiftc-movdqu2.asm
@@ -0,0 +1,182 @@
+dnl  AMD64 mpn_lshiftc optimised for CPUs with fast SSE including fast movdqu.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb     cycles/limb     cycles/limb    good
+C              aligned       unaligned       best seen    for cpu?
+C AMD K8,K9     3               3               ?        no, use shl/shr
+C AMD K10       1.8-2.0         1.8-2.0         ?        yes
+C AMD bd1       1.9             1.9             ?        yes
+C AMD bobcat    3.67            3.67                     yes, bad for n < 20
+C Intel P4      4.75            4.75            ?        no, slow movdqu
+C Intel core2   2.27            2.27            ?        no, use shld/shrd
+C Intel NHM     2.15            2.15            ?        no, use shld/shrd
+C Intel SBR     1.45            1.45            ?        yes, bad for n = 4-6
+C Intel atom   12.9            12.9             ?        no
+C VIA nano      6.18            6.44            ?        no, slow movdqu
+
+C We try to do as many aligned 16-byte operations as possible.  The top-most
+C and bottom-most writes might need 8-byte operations.
+C
+C This variant rely on fast load movdqu, and uses it even for aligned operands,
+C in order to avoid the need for two separate loops.
+C
+C TODO
+C  * Could 2-limb wind-down code be simplified?
+C  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts
+C    for other affected CPUs.
+
+C INPUT PARAMETERS
+define(`rp',  `%rdi')
+define(`ap',  `%rsi')
+define(`n',   `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(64)
+PROLOGUE(mpn_lshiftc)
+       FUNC_ENTRY(4)
+       movd    R32(%rcx), %xmm4
+       mov     $64, R32(%rax)
+       sub     R32(%rcx), R32(%rax)
+       movd    R32(%rax), %xmm5
+
+       neg     R32(%rcx)
+       mov     -8(ap,n,8), %rax
+       shr     R8(%rcx), %rax
+
+       pcmpeqb %xmm3, %xmm3            C set to 111...111
+
+       cmp     $3, n
+       jle     L(bc)
+
+       lea     (rp,n,8), R32(%rcx)
+       bt      $3, R32(%rcx)
+       jnc     L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+       movq    -8(ap,n,8), %xmm0
+       movq    -16(ap,n,8), %xmm1
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       pxor    %xmm3, %xmm0
+       movq    %xmm0, -8(rp,n,8)
+       dec     n
+
+L(rp_aligned):
+       lea     1(n), %r8d
+
+       and     $6, R32(%r8)
+       jz      L(ba0)
+       cmp     $4, R32(%r8)
+       jz      L(ba4)
+       jc      L(ba2)
+L(ba6):        add     $-4, n
+       jmp     L(i56)
+L(ba0):        add     $-6, n
+       jmp     L(i70)
+L(ba4):        add     $-2, n
+       jmp     L(i34)
+L(ba2):        add     $-8, n
+       jle     L(end)
+
+       ALIGN(16)
+L(top):        movdqu  40(ap,n,8), %xmm1
+       movdqu  48(ap,n,8), %xmm0
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       pxor    %xmm3, %xmm0
+       movdqa  %xmm0, 48(rp,n,8)
+L(i70):
+       movdqu  24(ap,n,8), %xmm1
+       movdqu  32(ap,n,8), %xmm0
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       pxor    %xmm3, %xmm0
+       movdqa  %xmm0, 32(rp,n,8)
+L(i56):
+       movdqu  8(ap,n,8), %xmm1
+       movdqu  16(ap,n,8), %xmm0
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       pxor    %xmm3, %xmm0
+       movdqa  %xmm0, 16(rp,n,8)
+L(i34):
+       movdqu  -8(ap,n,8), %xmm1
+       movdqu  (ap,n,8), %xmm0
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       pxor    %xmm3, %xmm0
+       movdqa  %xmm0, (rp,n,8)
+       sub     $8, n
+       jg      L(top)
+
+L(end):        bt      $0, R32(n)
+       jc      L(end8)
+
+       movdqu  (ap), %xmm1
+       pxor    %xmm0, %xmm0
+       punpcklqdq  %xmm1, %xmm0
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       pxor    %xmm3, %xmm0
+       movdqa  %xmm0, (rp)
+       FUNC_EXIT()
+       ret
+
+C Basecase
+       ALIGN(16)
+L(bc): dec     R32(n)
+       jz      L(end8)
+
+       movq    (ap,n,8), %xmm1
+       movq    -8(ap,n,8), %xmm0
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       pxor    %xmm3, %xmm0
+       movq    %xmm0, (rp,n,8)
+       sub     $2, R32(n)
+       jl      L(end8)
+       movq    8(ap), %xmm1
+       movq    (ap), %xmm0
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       pxor    %xmm3, %xmm0
+       movq    %xmm0, 8(rp)
+
+L(end8):movq   (ap), %xmm0
+       psllq   %xmm4, %xmm0
+       pxor    %xmm3, %xmm0
+       movq    %xmm0, (rp)
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fastsse/lshiftc.asm b/mpn/x86_64/fastsse/lshiftc.asm

new file mode 100644 (file)

index 0000000..6d9ec7b
--- /dev/null
+++ b/mpn/x86_64/fastsse/lshiftc.asm
@@ -0,0 +1,168 @@
+dnl  AMD64 mpn_lshiftc optimised for CPUs with fast SSE.
+
+dnl  Contributed to the GNU project by David Harvey and Torbjorn Granlund.
+
+dnl  Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb             cycles/limb              good
+C          16-byte aligned         16-byte unaligned       for cpu?
+C AMD K8,K9     ?                       ?
+C AMD K10       1.85  (1.635)           1.9   (1.67)           Y
+C AMD bd1       1.82  (1.75)            1.82  (1.75)           Y
+C AMD bobcat    4.5                     4.5
+C Intel P4      3.6   (3.125)           3.6   (3.125)          Y
+C Intel core2   2.05  (1.67)            2.55  (1.75)
+C Intel NHM     2.05  (1.875)           2.6   (2.25)
+C Intel SBR     1.55  (1.44)            2     (1.57)           Y
+C Intel atom    ?                       ?
+C VIA nano      2.5   (2.5)             2.5   (2.5)            Y
+
+C We try to do as many 16-byte operations as possible.  The top-most and
+C bottom-most writes might need 8-byte operations.  We always write using
+C 16-byte operations, we read with both 8-byte and 16-byte operations.
+
+C There are two inner-loops, one for when rp = ap (mod 16) and one when this is
+C not true.  The aligned case reads 16+8 bytes, the unaligned case reads
+C 16+8+X bytes, where X is 8 or 16 depending on how punpcklqdq is implemented.
+
+C This is not yet great code:
+C   (1) The unaligned case makes too many reads.
+C   (2) We should do some unrolling, at least 2-way.
+C With 2-way unrolling but no scheduling we reach 1.5 c/l on K10 and 2 c/l on
+C Nano.
+
+C INPUT PARAMETERS
+define(`rp',  `%rdi')
+define(`ap',  `%rsi')
+define(`n',   `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_lshiftc)
+       movd    R32(%rcx), %xmm4
+       mov     $64, R32(%rax)
+       sub     R32(%rcx), R32(%rax)
+       movd    R32(%rax), %xmm5
+
+       neg     R32(%rcx)
+       mov     -8(ap,n,8), %rax
+       shr     R8(%rcx), %rax
+
+       pcmpeqb %xmm7, %xmm7            C set to 111...111
+
+       cmp     $2, n
+       jle     L(le2)
+
+       lea     (rp,n,8), R32(%rcx)
+       test    $8, R8(%rcx)
+       je      L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+       movq    -8(ap,n,8), %xmm0
+       movq    -16(ap,n,8), %xmm1
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       pxor    %xmm7, %xmm0
+       movq    %xmm0, -8(rp,n,8)
+       dec     n
+
+L(rp_aligned):
+       lea     (ap,n,8), R32(%rcx)
+       test    $8, R8(%rcx)
+       je      L(aent)
+       jmp     L(uent)
+C *****************************************************************************
+
+C Handle the case when ap != rp (mod 16).
+
+       ALIGN(16)
+L(utop):movq   (ap,n,8), %xmm1
+       punpcklqdq  8(ap,n,8), %xmm1
+       movdqa  -8(ap,n,8), %xmm0
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       pxor    %xmm7, %xmm0
+       movdqa  %xmm0, (rp,n,8)
+L(uent):sub    $2, n
+       ja      L(utop)
+
+       jne     L(end8)
+
+       movq    (ap), %xmm1
+       pxor    %xmm0, %xmm0
+       punpcklqdq  %xmm1, %xmm0
+       punpcklqdq  8(ap), %xmm1
+       psllq   %xmm4, %xmm1
+       psrlq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       pxor    %xmm7, %xmm0
+       movdqa  %xmm0, (rp)
+       ret
+C *****************************************************************************
+
+C Handle the case when ap = rp (mod 16).
+
+       ALIGN(16)
+L(atop):movdqa (ap,n,8), %xmm0         C xmm0 = B*ap[n-1] + ap[n-2]
+       movq    -8(ap,n,8), %xmm1       C xmm1 = ap[n-3]
+       punpcklqdq  %xmm0, %xmm1        C xmm1 = B*ap[n-2] + ap[n-3]
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       pxor    %xmm7, %xmm0
+       movdqa  %xmm0, (rp,n,8)
+L(aent):sub    $2, n
+       ja      L(atop)
+
+       jne     L(end8)
+
+       movdqa  (ap), %xmm0
+       pxor    %xmm1, %xmm1
+       punpcklqdq  %xmm0, %xmm1
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       pxor    %xmm7, %xmm0
+       movdqa  %xmm0, (rp)
+       ret
+C *****************************************************************************
+
+       ALIGN(16)
+L(le2):        jne     L(end8)
+
+       movq    8(ap), %xmm0
+       movq    (ap), %xmm1
+       psllq   %xmm4, %xmm0
+       psrlq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       pxor    %xmm7, %xmm0
+       movq    %xmm0, 8(rp)
+
+L(end8):movq   (ap), %xmm0
+       psllq   %xmm4, %xmm0
+       pxor    %xmm7, %xmm0
+       movq    %xmm0, (rp)
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fastsse/rshift-movdqu2.asm b/mpn/x86_64/fastsse/rshift-movdqu2.asm

new file mode 100644 (file)

index 0000000..158124e
--- /dev/null
+++ b/mpn/x86_64/fastsse/rshift-movdqu2.asm
@@ -0,0 +1,190 @@
+dnl  AMD64 mpn_rshift optimised for CPUs with fast SSE including fast movdqu.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb     cycles/limb     cycles/limb    good
+C              aligned       unaligned       best seen    for cpu?
+C AMD K8,K9     3               3               2.35     no, use shl/shr
+C AMD K10       1.5-1.8         1.5-1.8         1.33     yes
+C AMD bd1       1.7-1.9         1.7-1.9         1.33     yes
+C AMD bobcat    3.17            3.17                     yes, bad for n < 20
+C Intel P4      4.67            4.67            2.7      no, slow movdqu
+C Intel core2   2.15            2.15            1.25     no, use shld/shrd
+C Intel NHM     1.66            1.66            1.25     no, use shld/shrd
+C Intel SBR     1.3             1.3             1.25     yes, bad for n = 4-6
+C Intel atom   11.7            11.7             4.5      no
+C VIA nano      5.7             5.95            2.0      no, slow movdqu
+
+C We try to do as many aligned 16-byte operations as possible.  The top-most
+C and bottom-most writes might need 8-byte operations.
+C
+C This variant rely on fast load movdqu, and uses it even for aligned operands,
+C in order to avoid the need for two separate loops.
+C
+C TODO
+C  * Could 2-limb wind-down code be simplified?
+C  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts
+C    for other affected CPUs.
+
+C INPUT PARAMETERS
+define(`rp',  `%rdi')
+define(`ap',  `%rsi')
+define(`n',   `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+       TEXT
+       ALIGN(64)
+PROLOGUE(mpn_rshift)
+       FUNC_ENTRY(4)
+       movd    R32(%rcx), %xmm4
+       mov     $64, R32(%rax)
+       sub     R32(%rcx), R32(%rax)
+       movd    R32(%rax), %xmm5
+
+       neg     R32(%rcx)
+       mov     (ap), %rax
+       shl     R8(%rcx), %rax
+
+       cmp     $3, n
+       jle     L(bc)
+
+       bt      $3, R32(rp)
+       jnc     L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+       movq    (ap), %xmm0
+       movq    8(ap), %xmm1
+       psrlq   %xmm4, %xmm0
+       psllq   %xmm5, %xmm1
+       por     %xmm1, %xmm0
+       movq    %xmm0, (rp)
+       lea     8(ap), ap
+       lea     8(rp), rp
+       dec     n
+
+L(rp_aligned):
+       lea     1(n), %r8d
+       lea     (ap,n,8), ap
+       lea     (rp,n,8), rp
+       neg     n
+
+       and     $6, R32(%r8)
+       jz      L(bu0)
+       cmp     $4, R32(%r8)
+       jz      L(bu4)
+       jc      L(bu2)
+L(bu6):        add     $4, n
+       jmp     L(i56)
+L(bu0):        add     $6, n
+       jmp     L(i70)
+L(bu4):        add     $2, n
+       jmp     L(i34)
+L(bu2):        add     $8, n
+       jge     L(end)
+
+       ALIGN(16)
+L(top):        movdqu  -64(ap,n,8), %xmm1
+       movdqu  -56(ap,n,8), %xmm0
+       psllq   %xmm5, %xmm0
+       psrlq   %xmm4, %xmm1
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, -64(rp,n,8)
+L(i70):
+       movdqu  -48(ap,n,8), %xmm1
+       movdqu  -40(ap,n,8), %xmm0
+       psllq   %xmm5, %xmm0
+       psrlq   %xmm4, %xmm1
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, -48(rp,n,8)
+L(i56):
+       movdqu  -32(ap,n,8), %xmm1
+       movdqu  -24(ap,n,8), %xmm0
+       psllq   %xmm5, %xmm0
+       psrlq   %xmm4, %xmm1
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, -32(rp,n,8)
+L(i34):
+       movdqu  -16(ap,n,8), %xmm1
+       movdqu  -8(ap,n,8), %xmm0
+       psllq   %xmm5, %xmm0
+       psrlq   %xmm4, %xmm1
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, -16(rp,n,8)
+       add     $8, n
+       jl      L(top)
+
+L(end):        bt      $0, R32(n)
+       jc      L(e1)
+
+       movdqu  -16(ap), %xmm1
+       movq    -8(ap), %xmm0
+       psrlq   %xmm4, %xmm1
+       psllq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       movdqa  %xmm0, -16(rp)
+       FUNC_EXIT()
+       ret
+
+L(e1): movq    -8(ap), %xmm0
+       psrlq   %xmm4, %xmm0
+       movq    %xmm0, -8(rp)
+       FUNC_EXIT()
+       ret
+
+C Basecase
+       ALIGN(16)
+L(bc): dec     R32(n)
+       jnz     1f
+       movq    (ap), %xmm0
+       psrlq   %xmm4, %xmm0
+       movq    %xmm0, (rp)
+       FUNC_EXIT()
+       ret
+
+1:     movq    (ap), %xmm1
+       movq    8(ap), %xmm0
+       psrlq   %xmm4, %xmm1
+       psllq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       movq    %xmm0, (rp)
+       dec     R32(n)
+       jnz     1f
+       movq    8(ap), %xmm0
+       psrlq   %xmm4, %xmm0
+       movq    %xmm0, 8(rp)
+       FUNC_EXIT()
+       ret
+
+1:     movq    8(ap), %xmm1
+       movq    16(ap), %xmm0
+       psrlq   %xmm4, %xmm1
+       psllq   %xmm5, %xmm0
+       por     %xmm1, %xmm0
+       movq    %xmm0,  8(rp)
+       movq    16(ap), %xmm0
+       psrlq   %xmm4, %xmm0
+       movq    %xmm0, 16(rp)
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/fat/diveby3.c b/mpn/x86_64/fat/diveby3.c

deleted file mode 100644 (file)

index 7ea0161..0000000
--- a/mpn/x86_64/fat/diveby3.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Fat binary fallback mpn_divexact_by3c.
-
-Copyright 2003, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/diveby3.c"
diff --git a/mpn/x86_64/fat/fat.c b/mpn/x86_64/fat/fat.c

index ec0f353caaf382075577b8bc93fb3645a77da3de..1e268f1643928eccaae8fef41eb4c1beda9992a4 100644 (file)
--- a/mpn/x86_64/fat/fat.c
+++ b/mpn/x86_64/fat/fat.c
@@ -1,4 +1,4 @@
-/* x86 fat binary initializers.
+/* x86_64 fat binary initializers.
  
     Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
     Torbjorn Granlund (port to x86_64)
@@ -7,7 +7,7 @@
     THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
     COMPLETELY IN FUTURE GNU MP RELEASES.
  
-Copyright 2003, 2004, 2009, 2011 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2009, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -39,7 +39,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* fat_entry.asm */
-long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id));
+long __gmpn_cpuid (char [12], int);
  
  
  #if WANT_FAKE_CPUID
@@ -69,6 +69,7 @@ static struct {
    { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
    { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
    { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
+  { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
  
    { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
  };
@@ -124,28 +125,44 @@ typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
  
  struct cpuvec_t __gmpn_cpuvec = {
    __MPN(add_n_init),
+  __MPN(addlsh1_n_init),
+  __MPN(addlsh2_n_init),
    __MPN(addmul_1_init),
+  __MPN(addmul_2_init),
+  __MPN(bdiv_dbm1c_init),
+  __MPN(com_init),
    __MPN(copyd_init),
    __MPN(copyi_init),
    __MPN(divexact_1_init),
-  __MPN(divexact_by3c_init),
    __MPN(divrem_1_init),
    __MPN(gcd_1_init),
    __MPN(lshift_init),
+  __MPN(lshiftc_init),
    __MPN(mod_1_init),
+  __MPN(mod_1_1p_init),
+  __MPN(mod_1_1p_cps_init),
+  __MPN(mod_1s_2p_init),
+  __MPN(mod_1s_2p_cps_init),
+  __MPN(mod_1s_4p_init),
+  __MPN(mod_1s_4p_cps_init),
    __MPN(mod_34lsub1_init),
    __MPN(modexact_1c_odd_init),
    __MPN(mul_1_init),
    __MPN(mul_basecase_init),
+  __MPN(mullo_basecase_init),
    __MPN(preinv_divrem_1_init),
    __MPN(preinv_mod_1_init),
+  __MPN(redc_1_init),
+  __MPN(redc_2_init),
    __MPN(rshift_init),
    __MPN(sqr_basecase_init),
    __MPN(sub_n_init),
+  __MPN(sublsh1_n_init),
    __MPN(submul_1_init),
    0
  };
  
+int __gmpn_cpuvec_initialized = 0;
  
  /* The following setups start with generic x86, then overwrite with
     specifics for a chip, and higher versions of that chip.
@@ -187,6 +204,11 @@ __gmpn_cpuvec_init (void)
    family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
    model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
  
+  /* Check extended feature flags */
+  __gmpn_cpuid (dummy_string, 0x80000001);
+  if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0)
+    abort (); /* longmode-capable-bit turned off! */
+
    /*********************************************************/
    /*** WARNING: keep this list in sync with config.guess ***/
    /*********************************************************/
@@ -194,30 +216,9 @@ __gmpn_cpuvec_init (void)
      {
        switch (family)
         {
-       case 4:
-       case 5:
-         abort ();             /* 32-bit processors */
-
         case 6:
           switch (model)
             {
-           case 0x00:
-           case 0x01:
-           case 0x02:
-           case 0x03:
-           case 0x04:
-           case 0x05:
-           case 0x06:
-           case 0x07:
-           case 0x08:
-           case 0x09:          /* Banias */
-           case 0x0a:
-           case 0x0b:
-           case 0x0c:
-           case 0x0d:          /* Dothan */
-           case 0x0e:          /* Yonah */
-             abort ();         /* 32-bit processors */
-
             case 0x0f:          /* Conroe Merom Kentsfield Allendale */
             case 0x10:
             case 0x11:
@@ -233,9 +234,10 @@ __gmpn_cpuvec_init (void)
               CPUVEC_SETUP_core2;
               break;
  
-           case 0x1c:          /* Silverthorne */
-           case 0x26:          /* Lincroft */
-           case 0x27:          /* Saltwell */
+           case 0x1c:          /* Atom Silverthorne */
+           case 0x26:          /* Atom Lincroft */
+           case 0x27:          /* Atom Saltwell? */
+           case 0x36:          /* Atom Cedarview/Saltwell */
               CPUVEC_SETUP_atom;
               break;
  
@@ -261,7 +263,10 @@ __gmpn_cpuvec_init (void)
  
             case 0x2a:          /* SB */
             case 0x2d:          /* SBC-EP */
+           case 0x3a:          /* IBR */
+           case 0x3c:          /* Haswell */
               CPUVEC_SETUP_core2;
+             CPUVEC_SETUP_coreinhm;
               CPUVEC_SETUP_coreisbr;
               break;
             }
@@ -276,28 +281,39 @@ __gmpn_cpuvec_init (void)
      {
        switch (family)
         {
-       case 5:
-       case 6:
-         abort ();
+       case 0x0f:              /* k8 */
+       case 0x11:              /* "fam 11h", mix of k8 and k10 */
+       case 0x13:
+       case 0x16:
+       case 0x17:
+         CPUVEC_SETUP_k8;
+         break;
  
-       case 15:                /* k8 */
-       case 16:                /* k10 */
-         /* CPUVEC_SETUP_athlon */
+       case 0x10:              /* k10 */
+       case 0x12:              /* k10 (llano) */
+         CPUVEC_SETUP_k8;
+         CPUVEC_SETUP_k10;
           break;
+
+       case 0x14:              /* bobcat */
+         CPUVEC_SETUP_k8;
+         CPUVEC_SETUP_k10;
+         CPUVEC_SETUP_bobcat;
+         break;
+
+       case 0x15:              /* bulldozer */
+         CPUVEC_SETUP_k8;
+         CPUVEC_SETUP_k10;
+         CPUVEC_SETUP_bd1;
         }
      }
    else if (strcmp (vendor_string, "CentaurHauls") == 0)
      {
        switch (family)
         {
-       case 5:
-         abort ();             /* 32-bit processors */
-
         case 6:
-         if (model < 15)
-           abort ();           /* 32-bit processors */
-
-         CPUVEC_SETUP_nano;
+         if (model >= 15)
+           CPUVEC_SETUP_nano;
           break;
         }
      }
@@ -315,5 +331,5 @@ __gmpn_cpuvec_init (void)
  
    /* Set this once the threshold fields are ready.
       Use volatile to prevent it getting moved.  */
-  ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;
+  *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
  }
diff --git a/mpn/x86_64/fat/fat_entry.asm b/mpn/x86_64/fat/fat_entry.asm

index db644401e73b40307ca5016c7919d99624aff07f..82849de53cc27cb79a1cf5dadca63ad338c73ff9 100644 (file)
--- a/mpn/x86_64/fat/fat_entry.asm
+++ b/mpn/x86_64/fat/fat_entry.asm
@@ -3,20 +3,20 @@ dnl  x86 fat binary entrypoints.
  dnl  Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
  dnl  Torbjorn Granlund (port to x86_64)
  
-dnl  Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-dnl
+dnl  Copyright 2003, 2009, 2011, 2012 Free Software Foundation, Inc.
+
  dnl  This file is part of the GNU MP Library.
-dnl
+
  dnl  The GNU MP Library is free software; you can redistribute it and/or
  dnl  modify it under the terms of the GNU Lesser General Public License as
  dnl  published by the Free Software Foundation; either version 3 of the
  dnl  License, or (at your option) any later version.
-dnl
+
  dnl  The GNU MP Library is distributed in the hope that it will be useful,
  dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
  dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  dnl  Lesser General Public License for more details.
-dnl
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
@@ -43,10 +43,11 @@ ifdef(`DARWIN',
  ifdef(`PIC',
  `define(`PIC_OR_DARWIN')')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
         TEXT
  
-
  dnl  Usage: FAT_ENTRY(name, offset)
  dnl
  dnl  Emit a fat binary entrypoint function of the given name.  This is the
@@ -58,16 +59,26 @@ dnl
  dnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
  dnl  fine for all x86s.
  dnl
-dnl  For PIC, the jumps are 20 bytes each, and are best aligned to 16 to
-dnl  ensure at least the first two instructions don't cross a cache line
+dnl  For ELF/DARWIN PIC, the jumps are 20 bytes each, and are best aligned to
+dnl  16 to ensure at least the first two instructions don't cross a cache line
  dnl  boundary.
  dnl
+dnl  For DOS64, the jumps are 6 bytes.  The same form works also for GNU/Linux
+dnl  (at least with certain assembler/linkers) but FreeBSD 8.2 crashes.  Not
+dnl  tested on Darwin, Slowaris, NetBSD, etc.
+dnl
  dnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
  dnl  grepping in configure, stopping that code trying to eval something with
  dnl  $1 in it.
  
  define(FAT_ENTRY,
  m4_assert_numargs(2)
+`ifdef(`HOST_DOS64',
+`      ALIGN(8)
+`'PROLOGUE($1)
+       jmp     *$2+GSYM_PREFIX`'__gmpn_cpuvec(%rip)
+EPILOGUE()
+',
  `      ALIGN(ifdef(`PIC',16,8))
  `'PROLOGUE($1)
  ifdef(`PIC_OR_DARWIN',
@@ -77,7 +88,7 @@ ifdef(`PIC_OR_DARWIN',
         jmp     *GSYM_PREFIX`'__gmpn_cpuvec+$2
  ')
  EPILOGUE()
-')
+')')
  
  
  dnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST
@@ -121,12 +132,21 @@ m4_assert_numargs(2)
  EPILOGUE()
  ')
  
+dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
+dnl
+
+define(`CPUVEC_offset',0)
+foreach(i,
+`FAT_INIT(MPN(i`'_init),CPUVEC_offset)
+define(`CPUVEC_offset',eval(CPUVEC_offset + 1))',
+CPUVEC_FUNCS_LIST)
+
  L(fat_init):
         C al    __gmpn_cpuvec byte offset
  
         movzbl  %al, %eax
-       push    %rdi
-       push    %rsi
+IFSTD(`        push    %rdi    ')
+IFSTD(`        push    %rsi    ')
         push    %rdx
         push    %rcx
         push    %r8
@@ -138,38 +158,33 @@ L(fat_init):
         pop     %r8
         pop     %rcx
         pop     %rdx
-       pop     %rsi
-       pop     %rdi
+IFSTD(`        pop     %rsi    ')
+IFSTD(`        pop     %rdi    ')
  ifdef(`PIC_OR_DARWIN',`
         LEA(    GSYM_PREFIX`'__gmpn_cpuvec, %r10)
-       jmp     *(%r10,%rax)
+       jmp     *(%r10,%rax,8)
  ',`dnl non-PIC
-       jmp     *GSYM_PREFIX`'__gmpn_cpuvec(%rax)
+       jmp     *GSYM_PREFIX`'__gmpn_cpuvec(,%rax,8)
  ')
  
-dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
-dnl
-
-define(`CPUVEC_offset',0)
-foreach(i,
-`FAT_INIT(MPN(i`'_init),CPUVEC_offset)
-define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
-CPUVEC_FUNCS_LIST)
-
-
  
  C long __gmpn_cpuid (char dst[12], int id);
  C
-C This is called only once, so just something simple and compact is fine.
+C This is called only 3 times, so just something simple and compact is fine.
+
  
+define(`rp',  `%rdi')
+define(`idx', `%rsi')
  
  PROLOGUE(__gmpn_cpuid)
+       FUNC_ENTRY(2)
         mov     %rbx, %r8
-       mov     %esi, %eax
+       mov     R32(idx), R32(%rax)
         cpuid
-       mov     %ebx, (%rdi)
-       mov     %edx, 4(%rdi)
-       mov     %ecx, 8(%rdi)
+       mov     %ebx, (rp)
+       mov     %edx, 4(rp)
+       mov     %ecx, 8(rp)
         mov     %r8, %rbx
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/fat/gcd_1.c b/mpn/x86_64/fat/gcd_1.c

deleted file mode 100644 (file)

index 5bd0006..0000000
--- a/mpn/x86_64/fat/gcd_1.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Fat binary fallback mpn_gcd_1.
-
-Copyright 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/gcd_1.c"
diff --git a/mpn/x86_64/fat/gmp-mparam.h b/mpn/x86_64/fat/gmp-mparam.h

index 6e744c0641ff373c33c59f5ce8faa800f01a9fe3..c71614d4a884531a18673ff3e4b10811006a25fe 100644 (file)
--- a/mpn/x86_64/fat/gmp-mparam.h
+++ b/mpn/x86_64/fat/gmp-mparam.h
@@ -1,6 +1,6 @@
  /* Fat binary x86_64 gmp-mparam.h -- Compiler/machine parameter header file.
  
-Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2009 Free Software
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2009, 2011 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -34,6 +34,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     preinv.  */
  #define USE_PREINV_DIVREM_1   1
  
+#define BMOD_1_TO_MOD_1_THRESHOLD           20
+
  /* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need
     for mpn_sqr to call the latter.  */
  #define SQR_BASECASE_THRESHOLD 0
diff --git a/mpn/x86_64/fat/mode1o.c b/mpn/x86_64/fat/mode1o.c

deleted file mode 100644 (file)

index a5244ca..0000000
--- a/mpn/x86_64/fat/mode1o.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Fat binary fallback mpn_modexact_1c_odd.
-
-Copyright 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/mode1o.c"
diff --git a/mpn/x86_64/fat/redc_2.c b/mpn/x86_64/fat/redc_2.c

new file mode 100644 (file)

index 0000000..f29d658
--- /dev/null
+++ b/mpn/x86_64/fat/redc_2.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_redc_2.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/redc_2.c"
diff --git a/mpn/x86_64/gcd_1.asm b/mpn/x86_64/gcd_1.asm

index 4fe9e1757b18a629e1d48b1d3f04555636b84c8f..a1fc3d93a551779cb05cc6abc85f736a7e2872e0 100644 (file)
--- a/mpn/x86_64/gcd_1.asm
+++ b/mpn/x86_64/gcd_1.asm
@@ -3,7 +3,8 @@ dnl  AMD64 mpn_gcd_1 -- mpn by 1 gcd.
  dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
  dnl  Granlund.
  
-dnl  Copyright 2000, 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl  Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -23,23 +24,22 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  
-C K8: 6.75 cycles/bit (approx)  1x1 gcd
-C     10.0 cycles/limb          Nx1 reduction (modexact_1_odd)
-
-
-dnl  Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
-dnl  where x is the larger of the two.  See tune/README for more.
-dnl
-dnl  div at 80 cycles compared to the gcd at about 7 cycles/bitpair
-dnl  suggests 80/7*2=23
-
-deflit(DIV_THRESHOLD, 23)
-
+C           cycles/bit (approx)
+C AMD K8,K9     5.21                 (4.95)
+C AMD K10       5.15                 (5.00)
+C AMD bd1       5.42                 (5.14)
+C AMD bobcat    6.71                 (6.56)
+C Intel P4     13.5                 (12.75)
+C Intel core2   6.20                 (6.16)
+C Intel NHM     6.49                 (6.25)
+C Intel SBR     7.75                 (7.57)
+C Intel atom    8.77                 (8.54)
+C VIA nano      6.60                 (6.20)
+C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
  
  C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
  
-
-deflit(MAXSHIFT, 6)
+deflit(MAXSHIFT, 7)
  deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
  
  DEF_OBJECT(ctz_table,64)
@@ -49,82 +49,105 @@ forloop(i,1,MASK,
  ')
  END_OBJECT(ctz_table)
  
-C mp_limb_t mpn_gcd_1 (mp_srcptr up, mp_size_t n, mp_limb_t vlimb);
-
+C Threshold of when to call bmod when U is one limb.  Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 8)
  
  C INPUT PARAMETERS
  define(`up',    `%rdi')
  define(`n',     `%rsi')
-define(`vlimb', `%rdx')
+define(`v0',    `%rdx')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(`define(`STACK_ALLOC', 40)')
+IFSTD(`define(`STACK_ALLOC', 8)')
+
+ASM_START()
         TEXT
         ALIGN(16)
-
  PROLOGUE(mpn_gcd_1)
-       mov     (%rdi), %r8             C src low limb
-       or      %rdx, %r8               C x | y
+       FUNC_ENTRY(3)
+       mov     (up), %rax              C U low limb
         mov     $-1, R32(%rcx)
+       or      v0, %rax                C x | y
  
  L(twos):
         inc     R32(%rcx)
-       shr     %r8
+       shr     %rax
         jnc     L(twos)
  
-       shr     R8(%rcx), %rdx
-       mov     R32(%rcx), R32(%r8)     C common twos
+       shr     R8(%rcx), v0
+       push    %rcx                    C common twos
  
  L(divide_strip_y):
-       shr     %rdx
+       shr     v0
         jnc     L(divide_strip_y)
-       adc     %rdx, %rdx
-
-       push    %r8
-       push    %rdx
-       sub     $8, %rsp                C maintain ABI required rsp alignment
-
+       adc     v0, v0
+
+       cmp     $1, n
+       jnz     L(reduce_nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+       mov     (up), %r8
+       mov     %r8, %rax
+       shr     $BMOD_THRES_LOG2, %r8
+       cmp     %r8, v0
+       ja      L(noreduce)
+       push    v0
+       sub     $STACK_ALLOC, %rsp      C maintain ABI required rsp alignment
+
+L(bmod):
+IFDOS(`        mov     %rdx, %r8       ')
+IFDOS(`        mov     %rsi, %rdx      ')
+IFDOS(`        mov     %rdi, %rcx      ')
         CALL(   mpn_modexact_1_odd)
  
-       add     $8, %rsp
+L(reduced):
+       add     $STACK_ALLOC, %rsp
         pop     %rdx
-       pop     %r8
  
+L(noreduce):
+       LEA(    ctz_table, %rsi)
         test    %rax, %rax
-
         mov     %rax, %rcx
-       jnz     L(strip_x)
-
+       jnz     L(mid)
+       jmp     L(end)
+
+L(reduce_nby1):
+       push    v0
+       sub     $STACK_ALLOC, %rsp      C maintain ABI required rsp alignment
+
+       cmp     $BMOD_1_TO_MOD_1_THRESHOLD, n
+       jl      L(bmod)
+IFDOS(`        mov     %rdx, %r8       ')
+IFDOS(`        mov     %rsi, %rdx      ')
+IFDOS(`        mov     %rdi, %rcx      ')
+       CALL(   mpn_mod_1)
+       jmp     L(reduced)
+
+       ALIGN(16)                       C               K8    BC    P4    NHM   SBR
+L(top):        cmovc   %rcx, %rax              C if x-y < 0    0
+       cmovc   %rdi, %rdx              C use x,y-x     0
+L(mid):        and     $MASK, R32(%rcx)        C               0
+       movzbl  (%rsi,%rcx), R32(%rcx)  C               1
+       jz      L(shift_alot)           C               1
+       shr     R8(%rcx), %rax          C               3
+       mov     %rax, %rdi              C               4
+       mov     %rdx, %rcx              C               3
+       sub     %rax, %rcx              C               4
+       sub     %rdx, %rax              C               4
+       jnz     L(top)                  C               5
+
+L(end):        pop     %rcx
         mov     %rdx, %rax
-       jmp     L(done)
-
-L(strip_x):
-       LEA(    ctz_table, %r9)
-       jmp     L(strip_x_top)
-
-       ALIGN(16)
-L(top):
-       cmovc   %r10, %rcx              C if x-y gave carry, use x,y-x  0
-       cmovc   %rax, %rdx              C                               0
-
-L(strip_x_top):
-       mov     %rcx, %rax              C                               1
-       and     $MASK, R32(%rcx)        C                               1
-
-       mov     (%r9,%rcx), R8(%rcx)    C                               1
-
-       shr     R8(%rcx), %rax          C                               4
-       cmp     $MAXSHIFT, R8(%rcx)     C                               4
-
-       mov     %rax, %rcx              C                               5
-       mov     %rdx, %r10              C                               5
-       je      L(strip_x_top)          C                               5
-
-       sub     %rax, %r10              C                               6
-       sub     %rdx, %rcx              C                               6
-       jnz     L(top)                  C                               6
-
-L(done):
-       mov     %r8, %rcx
         shl     R8(%rcx), %rax
+       FUNC_EXIT()
         ret
  
+L(shift_alot):
+       shr     $MAXSHIFT, %rax
+       mov     %rax, %rcx
+       jmp     L(mid)
  EPILOGUE()
diff --git a/mpn/x86_64/gmp-mparam.h b/mpn/x86_64/gmp-mparam.h

index 1c6988cc1c31939680b7b639c5653c1ef63754c8..8171c04d3a6e9d5aa9d7047a44fa7c156384502d 100644 (file)
--- a/mpn/x86_64/gmp-mparam.h
+++ b/mpn/x86_64/gmp-mparam.h
@@ -1,7 +1,7 @@
-/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
+/* AMD K8-K10 gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,36 +24,41 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         6
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        11
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     13
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        14
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        28
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           19
+#define BMOD_1_TO_MOD_1_THRESHOLD           15
  
-#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM22_THRESHOLD                27
  #define MUL_TOOM33_THRESHOLD                81
-#define MUL_TOOM44_THRESHOLD               232
-#define MUL_TOOM6H_THRESHOLD               369
-#define MUL_TOOM8H_THRESHOLD               478
+#define MUL_TOOM44_THRESHOLD               234
+#define MUL_TOOM6H_THRESHOLD               418
+#define MUL_TOOM8H_THRESHOLD               466
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
  #define MUL_TOOM32_TO_TOOM53_THRESHOLD     160
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     160
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     187
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     145
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     175
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 32
-#define SQR_TOOM3_THRESHOLD                113
+#define SQR_TOOM2_THRESHOLD                 36
+#define SQR_TOOM3_THRESHOLD                117
  #define SQR_TOOM4_THRESHOLD                327
  #define SQR_TOOM6_THRESHOLD                446
-#define SQR_TOOM8_THRESHOLD                597
+#define SQR_TOOM8_THRESHOLD                547
+
+#define MULMID_TOOM42_THRESHOLD             36
  
  #define MULMOD_BNM1_THRESHOLD               17
  #define SQRMOD_BNM1_THRESHOLD               17
  
+#define POWM_SEC_TABLE  2,67,322,991
+
  #define MUL_FFT_MODF_THRESHOLD             570  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
    { {    570, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
@@ -162,36 +167,42 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define SQR_FFT_TABLE3_SIZE 203
  #define SQR_FFT_THRESHOLD                 5248
  
-#define MULLO_BASECASE_THRESHOLD             0
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
  #define MULLO_DC_THRESHOLD                  35
-#define MULLO_MUL_N_THRESHOLD            14709
+#define MULLO_MUL_N_THRESHOLD            15604
  
  #define DC_DIV_QR_THRESHOLD                 56
  #define DC_DIVAPPR_Q_THRESHOLD             220
  #define DC_BDIV_QR_THRESHOLD                52
  #define DC_BDIV_Q_THRESHOLD                152
  
-#define INV_MULMOD_BNM1_THRESHOLD           74
-#define INV_NEWTON_THRESHOLD               260
-#define INV_APPR_THRESHOLD                 220
+#define INV_MULMOD_BNM1_THRESHOLD           54
+#define INV_NEWTON_THRESHOLD               226
+#define INV_APPR_THRESHOLD                 214
  
-#define BINV_NEWTON_THRESHOLD              345
-#define REDC_1_TO_REDC_2_THRESHOLD           6
+#define BINV_NEWTON_THRESHOLD              327
+#define REDC_1_TO_REDC_2_THRESHOLD           4
  #define REDC_2_TO_REDC_N_THRESHOLD          79
  
-#define MU_DIV_QR_THRESHOLD               1787
-#define MU_DIVAPPR_Q_THRESHOLD            1787
-#define MUPI_DIV_QR_THRESHOLD              126
-#define MU_BDIV_QR_THRESHOLD              1620
-#define MU_BDIV_Q_THRESHOLD               1787
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     139
-#define GCD_DC_THRESHOLD                   501
-#define GCDEXT_DC_THRESHOLD                474
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                17
-#define GET_STR_PRECOMPUTE_THRESHOLD        23
-#define SET_STR_DC_THRESHOLD               266
+#define MU_DIV_QR_THRESHOLD               1895
+#define MU_DIVAPPR_Q_THRESHOLD            1895
+#define MUPI_DIV_QR_THRESHOLD              106
+#define MU_BDIV_QR_THRESHOLD              1589
+#define MU_BDIV_Q_THRESHOLD               1718
+
+#define MATRIX22_STRASSEN_THRESHOLD         16
+#define HGCD_THRESHOLD                     125
+#define HGCD_APPR_THRESHOLD                173
+#define HGCD_REDUCE_THRESHOLD             3524
+#define GCD_DC_THRESHOLD                   555
+#define GCDEXT_DC_THRESHOLD                478
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        28
+#define SET_STR_DC_THRESHOLD               248
  #define SET_STR_PRECOMPUTE_THRESHOLD      1648
+
+#define FAC_DSC_THRESHOLD                 1075
+#define FAC_ODD_THRESHOLD                    0  /* always */
+
diff --git a/mpn/x86_64/invert_limb.asm b/mpn/x86_64/invert_limb.asm

index 8dcfae0b02505273ac699fb3892466d5108814fc..60b861a933184b37572b162ae419775066ee1615 100644 (file)
--- a/mpn/x86_64/invert_limb.asm
+++ b/mpn/x86_64/invert_limb.asm
@@ -2,7 +2,8 @@ dnl  AMD64 mpn_invert_limb -- Invert a normalized limb.
  
  dnl  Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
  
-dnl  Copyright 2004, 2007, 2008, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2007, 2008, 2009, 2011, 2012 Free Software Foundation,
+dnl  Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -23,30 +24,36 @@ include(`../config.m4')
  
  
  C           cycles/limb (approx)       div
-C K8,K9:        48                      71
-C K10:          48                      77
-C P4:          135                     161
-C P6 core2:     69                     116
-C P6 corei7:    55                      89
-C P6 atom:     129                     191
+C AMD K8,K9     48                      71
+C AMD K10       48                      77
+C Intel P4     135                     161
+C Intel core2   69                     116
+C Intel corei   55                      89
+C Intel atom   129                     191
+C VIA nano      79                     157
  
  C rax rcx rdx rdi rsi r8
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+PROTECT(`mpn_invert_limb_table')
  
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_invert_limb)              C                       Kn      C2      Ci
+       FUNC_ENTRY(1)
         mov     %rdi, %rax              C                        0       0       0
         shr     $55, %rax               C                        1       1       1
  ifdef(`PIC',`
  ifdef(`DARWIN',`
-       mov     approx_tab@GOTPCREL(%rip), %r8
+       mov     mpn_invert_limb_table@GOTPCREL(%rip), %r8
         add     $-512, %r8
  ',`
-       lea     -512+approx_tab(%rip), %r8
+       lea     -512+mpn_invert_limb_table(%rip), %r8
  ')',`
-       movabs  $-512+approx_tab, %r8
+       movabs  $-512+mpn_invert_limb_table, %r8
  ')
         movzwl  (%r8,%rax,2), R32(%rcx) C       %rcx = v0
  
@@ -62,7 +69,7 @@ ifdef(`DARWIN',`
         dec     R32(%rax)
         sub     R32(%rcx), R32(%rax)    C       %rax = v1
  
-       C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47
+       C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47)
         mov     $0x1000000000000000, %rcx
         imul    %rax, %rsi              C                       14      17      13
         sub     %rsi, %rcx
@@ -71,18 +78,18 @@ ifdef(`DARWIN',`
         shr     $47, %rcx
         add     %rax, %rcx              C       %rcx = v2
  
-       C v3 = (v2 << 31) + (v2 * (2^96 - v2 * d63 + (v2>>1) & mask) >> 65
+       C v3 = (v2 << 31) + (v2 * (2^96 - v2 * d63 + ((v2 >> 1) & mask)) >> 65
         mov     %rdi, %rsi              C                        0       0       0
-       shr     $1, %rsi                C d/2
+       shr     %rsi                    C d/2
         sbb     %rax, %rax              C -d0 = -(d mod 2)
         sub     %rax, %rsi              C d63 = ceil(d/2)
         imul    %rcx, %rsi              C v2 * d63
         and     %rcx, %rax              C v2 * d0
-       shr     $1, %rax                C (v2>>1) * d0
+       shr     %rax                    C (v2>>1) * d0
         sub     %rsi, %rax              C (v2>>1) * d0 - v2 * d63
         mul     %rcx
         sal     $31, %rcx
-       shr     $1, %rdx
+       shr     %rdx
         add     %rdx, %rcx              C       %rcx = v3
  
         mov     %rdi, %rax
@@ -92,42 +99,7 @@ ifdef(`DARWIN',`
         adc     %rdi, %rdx
         sub     %rdx, %rax
  
+       FUNC_EXIT()
         ret
  EPILOGUE()
-
-       RODATA
-       ALIGN(2)
-approx_tab:
-       .value  0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
-       .value  0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
-       .value  0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
-       .value  0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
-       .value  0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
-       .value  0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
-       .value  0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
-       .value  0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
-       .value  0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
-       .value  0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
-       .value  0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
-       .value  0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
-       .value  0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
-       .value  0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
-       .value  0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
-       .value  0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
-       .value  0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
-       .value  0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
-       .value  0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
-       .value  0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
-       .value  0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
-       .value  0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
-       .value  0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
-       .value  0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
-       .value  0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
-       .value  0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
-       .value  0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
-       .value  0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
-       .value  0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
-       .value  0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
-       .value  0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
-       .value  0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
  ASM_END()
diff --git a/mpn/x86_64/invert_limb_table.asm b/mpn/x86_64/invert_limb_table.asm

new file mode 100644 (file)

index 0000000..f3f8573
--- /dev/null
+++ b/mpn/x86_64/invert_limb_table.asm
@@ -0,0 +1,69 @@
+dnl  Table used for mpn_invert_limb
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
+
+dnl  Copyright 2004, 2007, 2008, 2009, 2011, 2012 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+PROTECT(`mpn_invert_limb_table')
+
+ASM_START()
+C Table entry X contains floor (0x7fd00 / (0x100 + X))
+
+       RODATA
+       ALIGN(2)
+       GLOBL mpn_invert_limb_table
+mpn_invert_limb_table:
+       .value  0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+       .value  0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+       .value  0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+       .value  0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+       .value  0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+       .value  0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+       .value  0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+       .value  0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+       .value  0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+       .value  0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+       .value  0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+       .value  0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+       .value  0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+       .value  0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+       .value  0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+       .value  0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+       .value  0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+       .value  0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+       .value  0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+       .value  0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+       .value  0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+       .value  0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+       .value  0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+       .value  0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+       .value  0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+       .value  0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+       .value  0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+       .value  0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+       .value  0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+       .value  0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+       .value  0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+       .value  0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
+ASM_END()
diff --git a/mpn/x86_64/k10/gcd_1.asm b/mpn/x86_64/k10/gcd_1.asm

new file mode 100644 (file)

index 0000000..bbbdbcd
--- /dev/null
+++ b/mpn/x86_64/k10/gcd_1.asm
@@ -0,0 +1,26 @@
+dnl  AMD64 mpn_gcd_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_1)
+include_mpn(`x86_64/core2/gcd_1.asm')
diff --git a/mpn/x86_64/k10/gmp-mparam.h b/mpn/x86_64/k10/gmp-mparam.h

new file mode 100644 (file)

index 0000000..d75cd25
--- /dev/null
+++ b/mpn/x86_64/k10/gmp-mparam.h
@@ -0,0 +1,214 @@
+/* AMD K10 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+#if 0
+#undef mpn_sublsh_n
+#define mpn_sublsh_n(rp,up,vp,n,c)                                     \
+  (((rp) == (up)) ? mpn_submul_1 (rp, vp, n, CNST_LIMB(1) << (c))      \
+   : MPN(mpn_sublsh_n)(rp,up,vp,n,c))
+#endif
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        17
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        28
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           15
+
+#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM33_THRESHOLD                81
+#define MUL_TOOM44_THRESHOLD               242
+#define MUL_TOOM6H_THRESHOLD               418
+#define MUL_TOOM8H_THRESHOLD               478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     155
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     149
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     163
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     226
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 34
+#define SQR_TOOM3_THRESHOLD                113
+#define SQR_TOOM4_THRESHOLD                336
+#define SQR_TOOM6_THRESHOLD                557
+#define SQR_TOOM8_THRESHOLD                  0  /* always */
+
+#define MULMID_TOOM42_THRESHOLD             36
+
+#define MULMOD_BNM1_THRESHOLD               15
+#define SQRMOD_BNM1_THRESHOLD               18
+
+#define MUL_FFT_MODF_THRESHOLD             525  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    570, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     25, 8}, {     13, 7}, {     29, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
+    {     23, 7}, {     47, 8}, {     25, 7}, {     51, 8}, \
+    {     29, 9}, {     15, 8}, {     37, 9}, {     19, 8}, \
+    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
+    {     55,10}, {     15, 9}, {     43,10}, {     23, 9}, \
+    {     55,10}, {     31, 9}, {     63, 5}, {   1023, 4}, \
+    {   2431, 5}, {   1279, 6}, {    671, 7}, {    367, 8}, \
+    {    189, 9}, {     95, 8}, {    195, 9}, {    111,11}, \
+    {     31, 9}, {    131,10}, {     71, 9}, {    155,10}, \
+    {     79, 9}, {    159,10}, {     87,11}, {     47,10}, \
+    {    111,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    167,11}, {     95,10}, {    191,11}, {    111,12}, \
+    {     63,11}, {    143,10}, {    287,11}, {    159,10}, \
+    {    319,11}, {    175,12}, {     95,11}, {    207,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    543,11}, \
+    {    287,12}, {    159,11}, {    319,10}, {    639,11}, \
+    {    335,10}, {    671,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
+    {    223,13}, {    127,12}, {    255,11}, {    543,12}, \
+    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
+    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,12}, {    447,14}, \
+    {    127,13}, {    255,12}, {    543,11}, {   1087,12}, \
+    {    607,11}, {   1215,13}, {    319,12}, {    671,11}, \
+    {   1343,12}, {    735,13}, {    383,12}, {    767,11}, \
+    {   1535,12}, {    799,11}, {   1599,12}, {    831,13}, \
+    {    447,12}, {    895,11}, {   1791,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
+    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
+    {   1407,14}, {    383,13}, {    767,12}, {   1599,13}, \
+    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
+    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
+    {   2175,13}, {   1215,14}, {    639,13}, {   1471,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1855,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,14}, {   1279,13}, {   2687,14}, {   1407,15}, \
+    {    767,14}, {   1535,13}, {   3071,14}, {   1791,16}, \
+    {    511,15}, {   1023,14}, {   2431,15}, {   1279,14}, \
+    {   2815,15}, {   1535,14}, {   3199,15}, {   1791,14}, \
+    {   3583,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 185
+#define MUL_FFT_THRESHOLD                 7552
+
+#define SQR_FFT_MODF_THRESHOLD             444  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    460, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     27, 7}, {     14, 6}, \
+    {     29, 7}, {     15, 6}, {     31, 7}, {     29, 8}, \
+    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
+    {     25, 7}, {     51, 8}, {     29, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
+    {     51, 9}, {     27, 8}, {     55,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     43,10}, {     23, 9}, \
+    {     55,11}, {     15,10}, {     31, 9}, {     71,10}, \
+    {     39, 9}, {     83,10}, {     47, 6}, {    767, 4}, \
+    {   3263, 5}, {   1727, 4}, {   3455, 5}, {   1791, 6}, \
+    {    927, 7}, {    479, 6}, {    959, 7}, {    511, 8}, \
+    {    271, 9}, {    147,10}, {     87,11}, {     47,10}, \
+    {     95,12}, {     31,11}, {     63,10}, {    135,11}, \
+    {     79,10}, {    167,11}, {     95,10}, {    191,11}, \
+    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
+    {    143,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,12}, {     95,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    399,11}, {    207,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,10}, {    831,11}, {    447,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,12}, {    287,11}, {    575,10}, {   1151,11}, \
+    {    607,10}, {   1215,12}, {    319,11}, {    639,10}, \
+    {   1279,11}, {    671,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
+    {    575,11}, {   1151,12}, {    607,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    671,11}, {   1343,12}, \
+    {    703,11}, {   1407,12}, {    735,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
+    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1087,13}, {    575,12}, {   1215,13}, \
+    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
+    {    383,13}, {    767,12}, {   1599,13}, {    831,12}, \
+    {   1663,13}, {    895,12}, {   1791,13}, {    959,15}, \
+    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
+    {   1215,14}, {    639,13}, {   1471,14}, {    767,13}, \
+    {   1663,14}, {    895,13}, {   1855,15}, {    511,14}, \
+    {   1023,13}, {   2175,14}, {   1151,13}, {   2303,14}, \
+    {   1279,13}, {   2559,14}, {   1407,15}, {    767,14}, \
+    {   1535,13}, {   3071,14}, {   1791,16}, {    511,15}, \
+    {   1023,14}, {   2303,15}, {   1279,14}, {   2687,15}, \
+    {   1535,14}, {   3199,15}, {   1791,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 203
+#define SQR_FFT_THRESHOLD                 5248
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  61
+#define MULLO_MUL_N_THRESHOLD            15150
+
+#define DC_DIV_QR_THRESHOLD                 56
+#define DC_DIVAPPR_Q_THRESHOLD             220
+#define DC_BDIV_QR_THRESHOLD                52
+#define DC_BDIV_Q_THRESHOLD                 44
+
+#define INV_MULMOD_BNM1_THRESHOLD           54
+#define INV_NEWTON_THRESHOLD               222
+#define INV_APPR_THRESHOLD                 214
+
+#define BINV_NEWTON_THRESHOLD              324
+#define REDC_1_TO_REDC_2_THRESHOLD          19
+#define REDC_2_TO_REDC_N_THRESHOLD          71
+
+#define MU_DIV_QR_THRESHOLD               1718
+#define MU_DIVAPPR_Q_THRESHOLD            1652
+#define MUPI_DIV_QR_THRESHOLD              102
+#define MU_BDIV_QR_THRESHOLD              1528
+#define MU_BDIV_Q_THRESHOLD               1589
+
+#define POWM_SEC_TABLE  2,23,322,2080
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                     144
+#define HGCD_APPR_THRESHOLD                175
+#define HGCD_REDUCE_THRESHOLD             3389
+#define GCD_DC_THRESHOLD                   501
+#define GCDEXT_DC_THRESHOLD                465
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                14
+#define GET_STR_PRECOMPUTE_THRESHOLD        29
+#define SET_STR_DC_THRESHOLD               248
+#define SET_STR_PRECOMPUTE_THRESHOLD      1648
+
+#define FAC_DSC_THRESHOLD                 1105
+#define FAC_ODD_THRESHOLD                   30
diff --git a/mpn/x86_64/k10/hamdist.asm b/mpn/x86_64/k10/hamdist.asm

new file mode 100644 (file)

index 0000000..ebfa70a
--- /dev/null
+++ b/mpn/x86_64/k10/hamdist.asm
@@ -0,0 +1,92 @@
+dnl  AMD64 mpn_hamdist -- hamming distance.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C AMD K8,K9             n/a
+C AMD K10               2
+C Intel P4              n/a
+C Intel core2           n/a
+C Intel corei           2.05
+C Intel atom            n/a
+C VIA nano              n/a
+
+C This is very straightforward 2-way unrolled code.
+
+C TODO
+C  * Write something less basic.  It should not be hard to reach 1.5 c/l with
+C    4-way unrolling.
+
+define(`ap',           `%rdi')
+define(`bp',           `%rsi')
+define(`n',            `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_hamdist)
+       FUNC_ENTRY(3)
+       mov     (ap), %r8
+       xor     (bp), %r8
+
+       lea     (ap,n,8), ap                    C point at A operand end
+       lea     (bp,n,8), bp                    C point at B operand end
+       neg     n
+
+       bt      $0, R32(n)
+       jnc     L(2)
+
+L(1):  .byte   0xf3,0x49,0x0f,0xb8,0xc0        C popcnt %r8, %rax
+       xor     R32(%r10), R32(%r10)
+       add     $1, n
+       js      L(top)
+       FUNC_EXIT()
+       ret
+
+       ALIGN(16)
+L(2):  mov     8(ap,n,8), %r9
+       .byte   0xf3,0x49,0x0f,0xb8,0xc0        C popcnt %r8, %rax
+       xor     8(bp,n,8), %r9
+       .byte   0xf3,0x4d,0x0f,0xb8,0xd1        C popcnt %r9, %r10
+       add     $2, n
+       js      L(top)
+       lea     (%r10, %rax), %rax
+       FUNC_EXIT()
+       ret
+
+       ALIGN(16)
+L(top):        mov     (ap,n,8), %r8
+       lea     (%r10, %rax), %rax
+       mov     8(ap,n,8), %r9
+       xor     (bp,n,8), %r8
+       xor     8(bp,n,8), %r9
+       .byte   0xf3,0x49,0x0f,0xb8,0xc8        C popcnt %r8, %rcx
+       lea     (%rcx, %rax), %rax
+       .byte   0xf3,0x4d,0x0f,0xb8,0xd1        C popcnt %r9, %r10
+       add     $2, n
+       js      L(top)
+
+       lea     (%r10, %rax), %rax
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/k10/lshift.asm b/mpn/x86_64/k10/lshift.asm

new file mode 100644 (file)

index 0000000..72f02e4
--- /dev/null
+++ b/mpn/x86_64/k10/lshift.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_lshift optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshift)
+include_mpn(`x86_64/fastsse/lshift-movdqu2.asm')
diff --git a/mpn/x86_64/k10/lshiftc.asm b/mpn/x86_64/k10/lshiftc.asm

new file mode 100644 (file)

index 0000000..7e96f49
--- /dev/null
+++ b/mpn/x86_64/k10/lshiftc.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_lshiftc optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshiftc)
+include_mpn(`x86_64/fastsse/lshiftc-movdqu2.asm')
diff --git a/mpn/x86_64/k10/popcount.asm b/mpn/x86_64/k10/popcount.asm

new file mode 100644 (file)

index 0000000..d266bd7
--- /dev/null
+++ b/mpn/x86_64/k10/popcount.asm
@@ -0,0 +1,127 @@
+dnl  AMD64 mpn_popcount -- population count.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                  cycles/limb
+C AMD K8,K9             n/a
+C AMD K10               1.125
+C Intel P4              n/a
+C Intel core2           n/a
+C Intel corei           1.25
+C Intel atom            n/a
+C VIA nano              n/a
+
+C * The zero-offset of popcount is misassembled to the offset-less form, which
+C   is one byte shorter and therefore will mess up the switching code.
+C * The outdated gas used in FreeBSD and NetBSD cannot handle the POPCNT insn,
+C   which is the main reason for our usage of '.byte'.
+
+C TODO
+C  * Improve switching code, the current code sucks.
+
+define(`up',           `%rdi')
+define(`n',            `%rsi')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_popcount)
+       FUNC_ENTRY(2)
+
+ifelse(1,1,`
+       lea     (up,n,8), up
+
+C      mov     R32(n), R32(%rcx)
+C      neg     R32(%rcx)
+       imul    $-1, R32(n), R32(%rcx)
+       and     $8-1, R32(%rcx)
+
+       neg     n
+
+       mov     R32(%rcx), R32(%rax)
+       neg     %rax
+       lea     (up,%rax,8),up
+
+       xor     R32(%rax), R32(%rax)
+
+       lea     (%rcx,%rcx,4), %rcx
+
+       lea     L(top)(%rip), %rdx
+       lea     (%rdx,%rcx,2), %rdx
+       jmp     *%rdx
+',`
+       lea     (up,n,8), up
+
+       mov     R32(n), R32(%rcx)
+       neg     R32(%rcx)
+       and     $8-1, R32(%rcx)
+
+       neg     n
+
+       mov     R32(%rcx), R32(%rax)
+       shl     $3, R32(%rax)
+       sub     %rax, up
+
+       xor     R32(%rax), R32(%rax)
+
+C      add     R32(%rcx), R32(%rcx)    C 2x
+C      lea     (%rcx,%rcx,4), %rcx     C 10x
+       imul    $10, R32(%rcx)
+
+       lea     L(top)(%rip), %rdx
+       add     %rcx, %rdx
+       jmp     *%rdx
+')
+
+       ALIGN(32)
+L(top):
+C 0 = n mod 8
+       .byte   0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x00      C popcnt 0(up,n,8), %r8
+       add     %r8, %rax
+C 7 = n mod 8
+       .byte   0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x08      C popcnt 8(up,n,8), %r9
+       add     %r9, %rax
+C 6 = n mod 8
+       .byte   0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x10      C popcnt 16(up,n,8), %r8
+       add     %r8, %rax
+C 5 = n mod 8
+       .byte   0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x18      C popcnt 24(up,n,8), %r9
+       add     %r9, %rax
+C 4 = n mod 8
+       .byte   0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x20      C popcnt 32(up,n,8), %r8
+       add     %r8, %rax
+C 3 = n mod 8
+       .byte   0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x28      C popcnt 40(up,n,8), %r9
+       add     %r9, %rax
+C 2 = n mod 8
+       .byte   0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x30      C popcnt 48(up,n,8), %r8
+       add     %r8, %rax
+C 1 = n mod 8
+       .byte   0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x38      C popcnt 56(up,n,8), %r9
+       add     %r9, %rax
+
+       add     $8, n
+       js      L(top)
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/k10/rshift.asm b/mpn/x86_64/k10/rshift.asm

new file mode 100644 (file)

index 0000000..5d16361
--- /dev/null
+++ b/mpn/x86_64/k10/rshift.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_rshift optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_rshift)
+include_mpn(`x86_64/fastsse/rshift-movdqu2.asm')
diff --git a/mpn/x86_64/k8/aorrlsh_n.asm b/mpn/x86_64/k8/aorrlsh_n.asm

new file mode 100644 (file)

index 0000000..395f699
--- /dev/null
+++ b/mpn/x86_64/k8/aorrlsh_n.asm
@@ -0,0 +1,206 @@
+dnl  AMD64 mpn_addlsh_n and mpn_rsblsh_n.  R = V2^k +- U.
+
+dnl  Copyright 2006, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     2.87   < 3.85 for lshift + add_n
+C AMD K10       2.75   < 3.85 for lshift + add_n
+C Intel P4     22      > 7.33 for lshift + add_n
+C Intel core2   4.1    > 3.27 for lshift + add_n
+C Intel NHM     4.4    > 3.75 for lshift + add_n
+C Intel SBR     3.17   < 3.46 for lshift + add_n
+C Intel atom    ?      ? 8.75 for lshift + add_n
+C VIA nano      4.7    < 6.25 for lshift + add_n
+
+C TODO
+C  * Can we propagate carry into rdx instead of using a special carry register?
+C    That could save enough insns to get to 10 cycles/iteration.
+
+define(`rp',       `%rdi')
+define(`up',       `%rsi')
+define(`vp_param', `%rdx')
+define(`n_param',  `%rcx')
+define(`cnt',      `%r8')
+
+define(`vp',    `%r12')
+define(`n',     `%rbp')
+
+ifdef(`OPERATION_addlsh_n',`
+  define(ADDSUB,       `add')
+  define(ADCSBB,       `adc')
+  define(func, mpn_addlsh_n)
+')
+ifdef(`OPERATION_rsblsh_n',`
+  define(ADDSUB,       `sub')
+  define(ADCSBB,       `sbb')
+  define(func, mpn_rsblsh_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')
+       push    %r12
+       push    %rbp
+       push    %rbx
+
+       mov     (vp_param), %rax        C load first V limb early
+
+       mov     $0, R32(n)
+       sub     n_param, n
+
+       lea     -16(up,n_param,8), up
+       lea     -16(rp,n_param,8), rp
+       lea     16(vp_param,n_param,8), vp
+
+       mov     n_param, %r9
+
+       mov     %r8, %rcx
+       mov     $1, R32(%r8)
+       shl     R8(%rcx), %r8
+
+       mul     %r8                     C initial multiply
+
+       and     $3, R32(%r9)
+       jz      L(b0)
+       cmp     $2, R32(%r9)
+       jc      L(b1)
+       jz      L(b2)
+
+L(b3): mov     %rax, %r11
+       ADDSUB  16(up,n,8), %r11
+       mov     -8(vp,n,8), %rax
+       sbb     R32(%rcx), R32(%rcx)
+       mov     %rdx, %rbx
+       mul     %r8
+       or      %rax, %rbx
+       mov     (vp,n,8), %rax
+       mov     %rdx, %r9
+       mul     %r8
+       or      %rax, %r9
+       add     $3, n
+       jnz     L(lo3)
+       jmp     L(cj3)
+
+L(b2): mov     %rax, %rbx
+       mov     -8(vp,n,8), %rax
+       mov     %rdx, %r9
+       mul     %r8
+       or      %rax, %r9
+       add     $2, n
+       jz      L(cj2)
+       mov     %rdx, %r10
+       mov     -16(vp,n,8), %rax
+       mul     %r8
+       or      %rax, %r10
+       xor     R32(%rcx), R32(%rcx)    C clear carry register
+       jmp     L(lo2)
+
+L(b1): mov     %rax, %r9
+       mov     %rdx, %r10
+       add     $1, n
+       jnz     L(gt1)
+       ADDSUB  8(up,n,8), %r9
+       jmp     L(cj1)
+L(gt1):        mov     -16(vp,n,8), %rax
+       mul     %r8
+       or      %rax, %r10
+       mov     %rdx, %r11
+       mov     -8(vp,n,8), %rax
+       mul     %r8
+       or      %rax, %r11
+       ADDSUB  8(up,n,8), %r9
+       ADCSBB  16(up,n,8), %r10
+       ADCSBB  24(up,n,8), %r11
+       mov     (vp,n,8), %rax
+       sbb     R32(%rcx), R32(%rcx)
+       jmp     L(lo1)
+
+L(b0): mov     %rax, %r10
+       mov     %rdx, %r11
+       mov     -8(vp,n,8), %rax
+       mul     %r8
+       or      %rax, %r11
+       ADDSUB  16(up,n,8), %r10
+       ADCSBB  24(up,n,8), %r11
+       mov     (vp,n,8), %rax
+       sbb     R32(%rcx), R32(%rcx)
+       mov     %rdx, %rbx
+       mul     %r8
+       or      %rax, %rbx
+       mov     8(vp,n,8), %rax
+       add     $4, n
+       jz      L(end)
+
+       ALIGN(8)
+L(top):        mov     %rdx, %r9
+       mul     %r8
+       or      %rax, %r9
+       mov     %r10, -16(rp,n,8)
+L(lo3):        mov     %rdx, %r10
+       mov     -16(vp,n,8), %rax
+       mul     %r8
+       or      %rax, %r10
+       mov     %r11, -8(rp,n,8)
+L(lo2):        mov     %rdx, %r11
+       mov     -8(vp,n,8), %rax
+       mul     %r8
+       or      %rax, %r11
+       add     R32(%rcx), R32(%rcx)
+       ADCSBB  (up,n,8), %rbx
+       ADCSBB  8(up,n,8), %r9
+       ADCSBB  16(up,n,8), %r10
+       ADCSBB  24(up,n,8), %r11
+       mov     (vp,n,8), %rax
+       sbb     R32(%rcx), R32(%rcx)
+       mov     %rbx, (rp,n,8)
+L(lo1):        mov     %rdx, %rbx
+       mul     %r8
+       or      %rax, %rbx
+       mov     %r9, 8(rp,n,8)
+L(lo0):        mov     8(vp,n,8), %rax
+       add     $4, n
+       jnz     L(top)
+
+L(end):        mov     %rdx, %r9
+       mul     %r8
+       or      %rax, %r9
+       mov     %r10, -16(rp,n,8)
+L(cj3):        mov     %r11, -8(rp,n,8)
+L(cj2):        add     R32(%rcx), R32(%rcx)
+       ADCSBB  (up,n,8), %rbx
+       ADCSBB  8(up,n,8), %r9
+       mov     %rbx, (rp,n,8)
+L(cj1):        mov     %r9, 8(rp,n,8)
+       mov     %rdx, %rax
+       ADCSBB  $0, %rax
+       pop     %rbx
+       pop     %rbp
+       pop     %r12
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/k8/gmp-mparam.h b/mpn/x86_64/k8/gmp-mparam.h

new file mode 100644 (file)

index 0000000..fb15705
--- /dev/null
+++ b/mpn/x86_64/k8/gmp-mparam.h
@@ -0,0 +1,212 @@
+/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+// #undef mpn_sublsh_n
+// #define mpn_sublsh_n(rp,up,vp,n,c)                                  \
+//    (((rp) == (up)) ? mpn_submul_1 (rp, vp, n, CNST_LIMB(1) << (c))  \
+//      : MPN(mpn_sublsh_n)(rp,up,vp,n,c))
+
+#define MOD_1_NORM_THRESHOLD                 0  /* always */
+#define MOD_1_UNNORM_THRESHOLD               0  /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        13
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        35
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           17
+
+#define MUL_TOOM22_THRESHOLD                27
+#define MUL_TOOM33_THRESHOLD                81
+#define MUL_TOOM44_THRESHOLD               242
+#define MUL_TOOM6H_THRESHOLD               369
+#define MUL_TOOM8H_THRESHOLD               482
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     153
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     113
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     187
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     166
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 34
+#define SQR_TOOM3_THRESHOLD                115
+#define SQR_TOOM4_THRESHOLD                527
+#define SQR_TOOM6_THRESHOLD                587
+#define SQR_TOOM8_THRESHOLD                  0  /* always */
+
+#define MULMID_TOOM42_THRESHOLD             36
+
+#define MULMOD_BNM1_THRESHOLD               18
+#define SQRMOD_BNM1_THRESHOLD               19
+
+#define MUL_FFT_MODF_THRESHOLD             642  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    570, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     25, 8}, {     13, 7}, {     29, 8}, \
+    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
+    {     23, 7}, {     47, 8}, {     25, 7}, {     51, 8}, \
+    {     29, 9}, {     15, 8}, {     37, 9}, {     19, 8}, \
+    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
+    {     55,10}, {     15, 9}, {     43,10}, {     23, 9}, \
+    {     55,10}, {     31, 9}, {     63, 5}, {   1023, 4}, \
+    {   2431, 5}, {   1279, 6}, {    671, 7}, {    367, 8}, \
+    {    189, 9}, {     95, 8}, {    195, 9}, {    111,11}, \
+    {     31, 9}, {    131,10}, {     71, 9}, {    155,10}, \
+    {     79, 9}, {    159,10}, {     87,11}, {     47,10}, \
+    {    111,11}, {     63,10}, {    135,11}, {     79,10}, \
+    {    167,11}, {     95,10}, {    191,11}, {    111,12}, \
+    {     63,11}, {    143,10}, {    287,11}, {    159,10}, \
+    {    319,11}, {    175,12}, {     95,11}, {    207,13}, \
+    {     63,12}, {    127,11}, {    255,10}, {    543,11}, \
+    {    287,12}, {    159,11}, {    319,10}, {    639,11}, \
+    {    335,10}, {    671,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
+    {    223,13}, {    127,12}, {    255,11}, {    543,12}, \
+    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
+    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
+    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
+    {    767,12}, {    415,11}, {    831,12}, {    447,14}, \
+    {    127,13}, {    255,12}, {    543,11}, {   1087,12}, \
+    {    607,11}, {   1215,13}, {    319,12}, {    671,11}, \
+    {   1343,12}, {    735,13}, {    383,12}, {    767,11}, \
+    {   1535,12}, {    799,11}, {   1599,12}, {    831,13}, \
+    {    447,12}, {    895,11}, {   1791,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
+    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
+    {   1407,14}, {    383,13}, {    767,12}, {   1599,13}, \
+    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
+    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
+    {   2175,13}, {   1215,14}, {    639,13}, {   1471,14}, \
+    {    767,13}, {   1663,14}, {    895,13}, {   1855,15}, \
+    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
+    {   2431,14}, {   1279,13}, {   2687,14}, {   1407,15}, \
+    {    767,14}, {   1535,13}, {   3071,14}, {   1791,16}, \
+    {    511,15}, {   1023,14}, {   2431,15}, {   1279,14}, \
+    {   2815,15}, {   1535,14}, {   3199,15}, {   1791,14}, \
+    {   3583,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 185
+#define MUL_FFT_THRESHOLD                11520
+
+#define SQR_FFT_MODF_THRESHOLD             565  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    460, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     27, 7}, {     14, 6}, \
+    {     29, 7}, {     15, 6}, {     31, 7}, {     29, 8}, \
+    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
+    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
+    {     25, 7}, {     51, 8}, {     29, 9}, {     15, 8}, \
+    {     35, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
+    {     51, 9}, {     27, 8}, {     55,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     43,10}, {     23, 9}, \
+    {     55,11}, {     15,10}, {     31, 9}, {     71,10}, \
+    {     39, 9}, {     83,10}, {     47, 6}, {    767, 4}, \
+    {   3263, 5}, {   1727, 4}, {   3455, 5}, {   1791, 6}, \
+    {    927, 7}, {    479, 6}, {    959, 7}, {    511, 8}, \
+    {    271, 9}, {    147,10}, {     87,11}, {     47,10}, \
+    {     95,12}, {     31,11}, {     63,10}, {    135,11}, \
+    {     79,10}, {    167,11}, {     95,10}, {    191,11}, \
+    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
+    {    143,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,12}, {     95,11}, {    191,10}, {    383, 9}, \
+    {    767,10}, {    399,11}, {    207,13}, {     63,12}, \
+    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
+    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
+    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
+    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
+    {    767,11}, {    415,10}, {    831,11}, {    447,13}, \
+    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
+    {    543,12}, {    287,11}, {    575,10}, {   1151,11}, \
+    {    607,10}, {   1215,12}, {    319,11}, {    639,10}, \
+    {   1279,11}, {    671,12}, {    351,11}, {    703,13}, \
+    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
+    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
+    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
+    {    575,11}, {   1151,12}, {    607,13}, {    319,12}, \
+    {    639,11}, {   1279,12}, {    671,11}, {   1343,12}, \
+    {    703,11}, {   1407,12}, {    735,13}, {    383,12}, \
+    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
+    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
+    {    511,12}, {   1087,13}, {    575,12}, {   1215,13}, \
+    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
+    {    383,13}, {    767,12}, {   1599,13}, {    831,12}, \
+    {   1663,13}, {    895,12}, {   1791,13}, {    959,15}, \
+    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
+    {   1215,14}, {    639,13}, {   1471,14}, {    767,13}, \
+    {   1663,14}, {    895,13}, {   1855,15}, {    511,14}, \
+    {   1023,13}, {   2175,14}, {   1151,13}, {   2303,14}, \
+    {   1279,13}, {   2559,14}, {   1407,15}, {    767,14}, \
+    {   1535,13}, {   3071,14}, {   1791,16}, {    511,15}, \
+    {   1023,14}, {   2303,15}, {   1279,14}, {   2687,15}, \
+    {   1535,14}, {   3199,15}, {   1791,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 203
+#define SQR_FFT_THRESHOLD                 5568
+
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  63
+#define MULLO_MUL_N_THRESHOLD            22239
+
+#define DC_DIV_QR_THRESHOLD                 40
+#define DC_DIVAPPR_Q_THRESHOLD             252
+#define DC_BDIV_QR_THRESHOLD                38
+#define DC_BDIV_Q_THRESHOLD                168
+
+#define INV_MULMOD_BNM1_THRESHOLD           67
+#define INV_NEWTON_THRESHOLD               246
+#define INV_APPR_THRESHOLD                 236
+
+#define BINV_NEWTON_THRESHOLD              252
+#define REDC_1_TO_REDC_2_THRESHOLD          11
+#define REDC_2_TO_REDC_N_THRESHOLD          84
+
+#define MU_DIV_QR_THRESHOLD               1932
+#define MU_DIVAPPR_Q_THRESHOLD            1895
+#define MUPI_DIV_QR_THRESHOLD               99
+#define MU_BDIV_QR_THRESHOLD              1528
+#define MU_BDIV_Q_THRESHOLD               1787
+
+#define POWM_SEC_TABLE  3,35,322,1926
+
+#define MATRIX22_STRASSEN_THRESHOLD         21
+#define HGCD_THRESHOLD                     140
+#define HGCD_APPR_THRESHOLD                190
+#define HGCD_REDUCE_THRESHOLD             4120
+#define GCD_DC_THRESHOLD                   606
+#define GCDEXT_DC_THRESHOLD                492
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                18
+#define GET_STR_PRECOMPUTE_THRESHOLD        32
+#define SET_STR_DC_THRESHOLD               266
+#define SET_STR_PRECOMPUTE_THRESHOLD      2105
+
+#define FAC_DSC_THRESHOLD                 1474
+#define FAC_ODD_THRESHOLD                   24
diff --git a/mpn/x86_64/logops_n.asm b/mpn/x86_64/logops_n.asm

index 1022b61376dc6887eb7dd27205c2b4847f3459c4..c6a7b1cba09966e40cbd8fa02a13e21bd9266b14 100644 (file)
--- a/mpn/x86_64/logops_n.asm
+++ b/mpn/x86_64/logops_n.asm
@@ -1,6 +1,6 @@
  dnl  AMD64 logops.
  
-dnl  Copyright 2004, 2005, 2006 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2006, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,10 +21,14 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        1.5
-C K10:          1.75-2 (fluctuating)
-C P4:           2.8/3.35/3.60 (variant1/variant2/variant3)
-C P6-15:        2.0
+C AMD K8,K9     1.5    with fluctuations for variant 2 and 3
+C AMD K10       1.5    with fluctuations for all variants
+C Intel P4      2.8/3.35/3.60 (variant1/variant2/variant3)
+C Intel core2   2
+C Intel NHM     2
+C Intel SBR     1.5/1.75/1.75
+C Intel atom    3.75
+C VIA nano      3.25
  
  ifdef(`OPERATION_and_n',`
    define(`func',`mpn_and_n')
@@ -68,6 +72,8 @@ define(`up',`%rsi')
  define(`vp',`%rdx')
  define(`n',`%rcx')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
  
@@ -75,15 +81,16 @@ ifdef(`VARIANT_1',`
         TEXT
         ALIGN(32)
  PROLOGUE(func)
+       FUNC_ENTRY(4)
         movq    (vp), %r8
-       movl    %ecx, %eax
+       movl    R32(%rcx), R32(%rax)
         leaq    (vp,n,8), vp
         leaq    (up,n,8), up
         leaq    (rp,n,8), rp
         negq    n
-       andl    $3, %eax
+       andl    $3, R32(%rax)
         je      L(b00)
-       cmpl    $2, %eax
+       cmpl    $2, R32(%rax)
         jc      L(b01)
         je      L(b10)
  
@@ -113,7 +120,8 @@ L(e10):     movq    24(vp,n,8), %r9
         movq    %r9, 24(rp,n,8)
         addq    $4, n
         jnc     L(oop)
-L(ret):        ret
+L(ret):        FUNC_EXIT()
+       ret
  EPILOGUE()
  ')
  
@@ -121,16 +129,17 @@ ifdef(`VARIANT_2',`
         TEXT
         ALIGN(32)
  PROLOGUE(func)
+       FUNC_ENTRY(4)
         movq    (vp), %r8
         notq    %r8
-       movl    %ecx, %eax
+       movl    R32(%rcx), R32(%rax)
         leaq    (vp,n,8), vp
         leaq    (up,n,8), up
         leaq    (rp,n,8), rp
         negq    n
-       andl    $3, %eax
+       andl    $3, R32(%rax)
         je      L(b00)
-       cmpl    $2, %eax
+       cmpl    $2, R32(%rax)
         jc      L(b01)
         je      L(b10)
  
@@ -164,7 +173,8 @@ L(e10):     movq    24(vp,n,8), %r9
         movq    %r9, 24(rp,n,8)
         addq    $4, n
         jnc     L(oop)
-L(ret):        ret
+L(ret):        FUNC_EXIT()
+       ret
  EPILOGUE()
  ')
  
@@ -172,15 +182,16 @@ ifdef(`VARIANT_3',`
         TEXT
         ALIGN(32)
  PROLOGUE(func)
+       FUNC_ENTRY(4)
         movq    (vp), %r8
-       movl    %ecx, %eax
+       movl    R32(%rcx), R32(%rax)
         leaq    (vp,n,8), vp
         leaq    (up,n,8), up
         leaq    (rp,n,8), rp
         negq    n
-       andl    $3, %eax
+       andl    $3, R32(%rax)
         je      L(b00)
-       cmpl    $2, %eax
+       cmpl    $2, R32(%rax)
         jc      L(b01)
         je      L(b10)
  
@@ -216,6 +227,7 @@ L(e10):     movq    24(vp,n,8), %r9
         movq    %r9, 24(rp,n,8)
         addq    $4, n
         jnc     L(oop)
-L(ret):        ret
+L(ret):        FUNC_EXIT()
+       ret
  EPILOGUE()
  ')
diff --git a/mpn/x86_64/lshift.asm b/mpn/x86_64/lshift.asm

index d59d8250a9c31b3d6bc4716ffc0eb282c8772959..d4c40a6073cfbc76e7c53203fed245f988225095 100644 (file)
--- a/mpn/x86_64/lshift.asm
+++ b/mpn/x86_64/lshift.asm
@@ -1,19 +1,20 @@
  dnl  AMD64 mpn_lshift -- mpn left shift.
  
-dnl  Copyright 2003, 2005, 2007, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2007, 2009, 2011, 2012 Free Software Foundation,
+dnl  Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
@@ -21,11 +22,13 @@ include(`../config.m4')
  
  
  C           cycles/limb   cycles/limb cnt=1
-C K8,K9:        2.375           1.375
-C K10:          2.375           1.375
-C P4:           8              10.5
-C P6-15 (Core2): 2.11           4.28
-C P6-28 (Atom):         5.75            3.5
+C AMD K8,K9     2.375           1.375
+C AMD K10       2.375           1.375
+C Intel P4      8              10.5
+C Intel core2   2.11            4.28
+C Intel corei   ?               ?
+C Intel atom    5.75            3.5
+C VIA nano      3.5             2.25
  
  
  C INPUT PARAMETERS
@@ -34,15 +37,19 @@ define(`up',        `%rsi')
  define(`n',    `%rdx')
  define(`cnt',  `%rcx')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(mpn_lshift)
+       FUNC_ENTRY(4)
         cmp     $1, R8(%rcx)
         jne     L(gen)
  
  C For cnt=1 we want to work from lowest limb towards higher limbs.
-C Check for bad overlap (up=rp is OK!) up=1..rp+n-1 is bad.
+C Check for bad overlap (up=rp is OK!) up=rp+1..rp+n-1 is bad.
  C FIXME: this could surely be done more cleverly.
  
         mov    rp, %rax
@@ -81,6 +88,7 @@ L(t1):        mov     (up), %r8
         dec     R32(%rax)
         jne     L(n00)
         adc     R32(%rax), R32(%rax)
+       FUNC_EXIT()
         ret
  L(e1): test    R32(%rax), R32(%rax)    C clear cy
  L(n00):        mov     (up), %r8
@@ -89,6 +97,7 @@ L(n00):       mov     (up), %r8
         adc     %r8, %r8
         mov     %r8, (rp)
  L(ret):        adc     R32(%rax), R32(%rax)
+       FUNC_EXIT()
         ret
  L(n01):        dec     R32(%rax)
         mov     8(up), %r9
@@ -98,6 +107,7 @@ L(n01):      dec     R32(%rax)
         mov     %r8, (rp)
         mov     %r9, 8(rp)
         adc     R32(%rax), R32(%rax)
+       FUNC_EXIT()
         ret
  L(n10):        mov     16(up), %r10
         adc     %r8, %r8
@@ -107,6 +117,7 @@ L(n10):     mov     16(up), %r10
         mov     %r9, 8(rp)
         mov     %r10, 16(rp)
         adc     $-1, R32(%rax)
+       FUNC_EXIT()
         ret
  
  L(gen):        neg     R32(%rcx)               C put rsh count in cl
@@ -220,5 +231,6 @@ L(end):
  L(ast):        mov     (up), %r10
         shl     R8(%rcx), %r10
         mov     %r10, (rp)
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/lshiftc.asm b/mpn/x86_64/lshiftc.asm

index 2423529c4a5356e7123c429e53f354a7518828ac..03ae40a27b88d354a481cf19d2486b6d1bee0e07 100644 (file)
--- a/mpn/x86_64/lshiftc.asm
+++ b/mpn/x86_64/lshiftc.asm
@@ -1,6 +1,6 @@
  dnl  AMD64 mpn_lshiftc -- mpn left shift with complement.
  
-dnl  Copyright 2003, 2005, 2006, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -21,11 +21,13 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        2.75
-C K10:          2.75
-C P4:           ?
-C P6-15 (Core2): ?
-C P6-28 (Atom):         ?
+C AMD K8,K9     2.75
+C AMD K10       2.75
+C Intel P4      ?
+C Intel core2   ?
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      3.75
  
  
  C INPUT PARAMETERS
@@ -34,10 +36,14 @@ define(`up',        `%rsi')
  define(`n',    `%rdx')
  define(`cnt',  `%rcx')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(mpn_lshiftc)
+       FUNC_ENTRY(4)
         neg     R32(%rcx)               C put rsh count in cl
         mov     -8(up,n,8), %rax
         shr     R8(%rcx), %rax          C function return value
@@ -144,21 +150,22 @@ L(top):
         jae     L(top)                  C                                     2
  L(end):
         neg     R32(%rcx)               C put rsh count in cl
-       mov     16(up,n,8), %r8
+       mov     8(up), %r8
         shr     R8(%rcx), %r8
         or      %r8, %r10
-       mov     8(up,n,8), %r9
+       mov     (up), %r9
         shr     R8(%rcx), %r9
         or      %r9, %r11
         not     %r10
         not     %r11
-       mov     %r10, 24(rp,n,8)
-       mov     %r11, 16(rp,n,8)
+       mov     %r10, 16(rp)
+       mov     %r11, 8(rp)
  
         neg     R32(%rcx)               C put lsh count in cl
  L(ast):        mov     (up), %r10
         shl     R8(%rcx), %r10
         not     %r10
         mov     %r10, (rp)
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/lshsub_n.asm b/mpn/x86_64/lshsub_n.asm

index 6ae7c364027ce63765e4167494e25451bc4d0ac9..2938b913a43d0b797f8c662468c8e2c05d932668 100644 (file)
--- a/mpn/x86_64/lshsub_n.asm
+++ b/mpn/x86_64/lshsub_n.asm
@@ -1,6 +1,6 @@
  dnl  AMD64 mpn_lshsub_n.  R = 2^k(U - V).
  
-dnl  Copyright 2006 Free Software Foundation, Inc.
+dnl  Copyright 2006, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,10 +21,13 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        3.15   (mpn_sub_n + mpn_lshift costs about 4 c/l)
-C K10:          3.15   (mpn_sub_n + mpn_lshift costs about 4 c/l)
-C P4:          16.5
-C P6-15:        4.35
+C AMD K8,K9     3.15   (mpn_sub_n + mpn_lshift costs about 4 c/l)
+C AMD K10       3.15   (mpn_sub_n + mpn_lshift costs about 4 c/l)
+C Intel P4     16.5
+C Intel core2   4.35
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      ?
  
  C This was written quickly and not optimized at all, but it runs very well on
  C K8.  But perhaps one could get under 3 c/l.  Ideas:
@@ -41,10 +44,15 @@ define(`vp',        `%rdx')
  define(`n',    `%rcx')
  define(`cnt',  `%r8')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_lshsub_n)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')
  
         push    %r12
         push    %r13
@@ -53,32 +61,32 @@ PROLOGUE(mpn_lshsub_n)
         push    %rbx
  
         mov     n, %rax
-       xor     %ebx, %ebx              C clear carry save register
-       mov     %r8d, %ecx              C shift count
-       xor     %r15d, %r15d            C limb carry
+       xor     R32(%rbx), R32(%rbx)    C clear carry save register
+       mov     R32(%r8), R32(%rcx)     C shift count
+       xor     R32(%r15), R32(%r15)    C limb carry
  
-       mov     %eax, %r11d
-       and     $3, %r11d
+       mov     R32(%rax), R32(%r11)
+       and     $3, R32(%r11)
         je      L(4)
-       sub     $1, %r11d
+       sub     $1, R32(%r11)
  
  L(oopette):
-       add     %ebx, %ebx              C restore carry flag
+       add     R32(%rbx), R32(%rbx)    C restore carry flag
         mov     0(up), %r8
         lea     8(up), up
         sbb     0(vp), %r8
         mov     %r8, %r12
-       sbb     %ebx, %ebx              C save carry flag
-       shl     %cl, %r8
+       sbb     R32(%rbx), R32(%rbx)    C save carry flag
+       shl     R8(%rcx), %r8
         or      %r15, %r8
         mov     %r12, %r15
         lea     8(vp), vp
-       neg     %cl
-       shr     %cl, %r15
-       neg     %cl
+       neg     R8(%rcx)
+       shr     R8(%rcx), %r15
+       neg     R8(%rcx)
         mov     %r8, 0(rp)
         lea     8(rp), rp
-       sub     $1, %r11d
+       sub     $1, R32(%r11)
         jnc     L(oopette)
  
  L(4):
@@ -87,7 +95,7 @@ L(4):
  
         ALIGN(16)
  L(oop):
-       add     %ebx, %ebx              C restore carry flag
+       add     R32(%rbx), R32(%rbx)    C restore carry flag
  
         mov     0(up), %r8
         mov     8(up), %r9
@@ -104,29 +112,29 @@ L(oop):
         mov     %r10, %r14
         sbb     24(vp), %r11
  
-       sbb     %ebx, %ebx              C save carry flag
+       sbb     R32(%rbx), R32(%rbx)    C save carry flag
  
-       shl     %cl, %r8
-       shl     %cl, %r9
-       shl     %cl, %r10
+       shl     R8(%rcx), %r8
+       shl     R8(%rcx), %r9
+       shl     R8(%rcx), %r10
         or      %r15, %r8
         mov     %r11, %r15
-       shl     %cl, %r11
+       shl     R8(%rcx), %r11
  
         lea     32(vp), vp
  
-       neg     %cl
+       neg     R8(%rcx)
  
-       shr     %cl, %r12
-       shr     %cl, %r13
-       shr     %cl, %r14
-       shr     %cl, %r15               C used next loop
+       shr     R8(%rcx), %r12
+       shr     R8(%rcx), %r13
+       shr     R8(%rcx), %r14
+       shr     R8(%rcx), %r15          C used next loop
  
         or      %r12, %r9
         or      %r13, %r10
         or      %r14, %r11
  
-       neg     %cl
+       neg     R8(%rcx)
  
         mov     %r8, 0(rp)
         mov     %r9, 8(rp)
@@ -138,8 +146,8 @@ L(oop):
         sub     $4, %rax
         jnc     L(oop)
  L(end):
-       neg     %ebx
-       shl     %cl, %rbx
+       neg     R32(%rbx)
+       shl     R8(%rcx), %rbx
         adc     %r15, %rbx
         mov     %rbx, %rax
         pop     %rbx
@@ -148,5 +156,6 @@ L(end):
         pop     %r13
         pop     %r12
  
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/mod_1_1.asm b/mpn/x86_64/mod_1_1.asm

new file mode 100644 (file)

index 0000000..42a8c4c
--- /dev/null
+++ b/mpn/x86_64/mod_1_1.asm
@@ -0,0 +1,224 @@
+dnl  AMD64 mpn_mod_1_1p
+
+dnl  Contributed to the GNU project by Torbjörn Granlund and Niels Möller.
+
+dnl  Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     6
+C AMD K10       6
+C Intel P4     26
+C Intel core2  12.5
+C Intel NHM    11.3
+C Intel SBR     8.4    (slowdown, old code took 8.0)
+C Intel atom   26
+C VIA nano     13
+
+define(`B2mb',   `%r10')
+define(`B2modb', `%r11')
+define(`ap',     `%rdi')
+define(`n',      `%rsi')
+define(`pre',    `%r8')
+define(`b',      `%rbx')
+
+define(`r0',     `%rbp') C r1 kept in %rax
+define(`r2',    `%rcx')  C kept negated. Also used as shift count
+define(`t0',     `%r9')
+
+C mp_limb_t
+C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
+C                       %rdi         %rsi         %rdx                %rcx
+C The pre array contains bi, cnt, B1modb, B2modb
+C Note: This implementation needs B1modb only when cnt > 0
+
+C The iteration is almost as follows,
+C
+C   r_2 B^3 + r_1 B^2 + r_0 B + u = r_1 B2modb + (r_0 + r_2 B2mod) B + u
+C
+C where r2 is a single bit represented as a mask. But to make sure that the
+C result fits in two limbs and a bit, carry from the addition
+C
+C   r_0 + r_2 B2mod
+C
+C is handled specially. On carry, we subtract b to cancel the carry,
+C and we use instead the value
+C
+C   r_0 + B2mb (mod B)
+C
+C This addition can be issued early since it doesn't depend on r2, and it is
+C the source of the cmov in the loop.
+C
+C We have the invariant that r_2 B^2 + r_1 B + r_0 < B^2 + B b
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_1_1p)
+       FUNC_ENTRY(4)
+       push    %rbp
+       push    %rbx
+       mov     %rdx, b
+       mov     %rcx, pre
+
+       mov     -8(ap, n, 8), %rax
+       cmp     $3, n
+       jnc     L(first)
+       mov     -16(ap, n, 8), r0
+       jmp     L(reduce_two)
+
+L(first):
+       C First iteration, no r2
+       mov     24(pre), B2modb
+       mul     B2modb
+       mov     -24(ap, n, 8), r0
+       add     %rax, r0
+       mov     -16(ap, n, 8), %rax
+       adc     %rdx, %rax
+       sbb     r2, r2
+       sub     $4, n
+       jc      L(reduce_three)
+
+       mov     B2modb, B2mb
+       sub     b, B2mb
+
+       ALIGN(16)
+L(top):        and     B2modb, r2
+       lea     (B2mb, r0), t0
+       mul     B2modb
+       add     r0, r2
+       mov     (ap, n, 8), r0
+       cmovc   t0, r2
+       add     %rax, r0
+       mov     r2, %rax
+       adc     %rdx, %rax
+       sbb     r2, r2
+       sub     $1, n
+       jnc     L(top)
+
+L(reduce_three):
+       C Eliminate r2
+       and     b, r2
+       sub     r2, %rax
+
+L(reduce_two):
+       mov     8(pre), R32(%rcx)
+       test    R32(%rcx), R32(%rcx)
+       jz      L(normalized)
+
+       C Unnormalized, use B1modb to reduce to size < B (b+1)
+       mulq    16(pre)
+       xor     t0, t0
+       add     %rax, r0
+       adc     %rdx, t0
+       mov     t0, %rax
+
+       C Left-shift to normalize
+ifdef(`SHLD_SLOW',`
+       shl     R8(%rcx), %rax
+       mov     r0, t0
+       neg     R32(%rcx)
+       shr     R8(%rcx), t0
+       or      t0, %rax
+       neg     R32(%rcx)
+',`
+       shld    R8(%rcx), r0, %rax
+')
+       shl     R8(%rcx), r0
+       jmp     L(udiv)
+
+L(normalized):
+       mov     %rax, t0
+       sub     b, t0
+       cmovnc  t0, %rax
+
+L(udiv):
+       lea     1(%rax), t0
+       mulq    (pre)
+       add     r0, %rax
+       adc     t0, %rdx
+       imul    b, %rdx
+       sub     %rdx, r0
+       cmp     r0, %rax
+       lea     (b, r0), %rax
+       cmovnc  r0, %rax
+       cmp     b, %rax
+       jnc     L(fix)
+L(ok): shr     R8(%rcx), %rax
+
+       pop     %rbx
+       pop     %rbp
+       FUNC_EXIT()
+       ret
+L(fix):        sub     b, %rax
+       jmp     L(ok)
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(mpn_mod_1_1p_cps)
+       FUNC_ENTRY(2)
+       push    %rbp
+       bsr     %rsi, %rcx
+       push    %rbx
+       mov     %rdi, %rbx
+       push    %r12
+       xor     $63, R32(%rcx)
+       mov     %rsi, %r12
+       mov     R32(%rcx), R32(%rbp)
+       sal     R8(%rcx), %r12
+IFSTD(`        mov     %r12, %rdi      ')      C pass parameter
+IFDOS(`        mov     %r12, %rcx      ')      C pass parameter
+       CALL(   mpn_invert_limb)
+       neg     %r12
+       mov     %r12, %r8
+       mov     %rax, (%rbx)            C store bi
+       mov     %rbp, 8(%rbx)           C store cnt
+       imul    %rax, %r12
+       mov     %r12, 24(%rbx)          C store B2modb
+       mov     R32(%rbp), R32(%rcx)
+       test    R32(%rcx), R32(%rcx)
+       jz      L(z)
+
+       mov     $1, R32(%rdx)
+ifdef(`SHLD_SLOW',`
+       C Destroys %rax, unlike shld. Otherwise, we could do B1modb
+       C before B2modb, and get rid of the move %r12, %r8 above.
+
+       shl     R8(%rcx), %rdx
+       neg     R32(%rcx)
+       shr     R8(%rcx), %rax
+       or      %rax, %rdx
+       neg     R32(%rcx)
+',`
+       shld    R8(%rcx), %rax, %rdx
+')
+       imul    %rdx, %r8
+       shr     R8(%rcx), %r8
+       mov     %r8, 16(%rbx)           C store B1modb
+L(z):
+       pop     %r12
+       pop     %rbx
+       pop     %rbp
+       FUNC_EXIT()
+       ret
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86_64/mod_1_2.asm b/mpn/x86_64/mod_1_2.asm

new file mode 100644 (file)

index 0000000..6310e45
--- /dev/null
+++ b/mpn/x86_64/mod_1_2.asm
@@ -0,0 +1,227 @@
+dnl  AMD64 mpn_mod_1s_2p
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C AMD K8,K9     4
+C AMD K10       4
+C Intel P4     19
+C Intel core2   8
+C Intel NHM     6.5
+C Intel SBR     4.5
+C Intel atom   28
+C VIA nano      8
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mod_1s_2p)
+       FUNC_ENTRY(4)
+       push    %r14
+       test    $1, R8(%rsi)
+       mov     %rdx, %r14
+       push    %r13
+       mov     %rcx, %r13
+       push    %r12
+       push    %rbp
+       push    %rbx
+       mov     16(%rcx), %r10
+       mov     24(%rcx), %rbx
+       mov     32(%rcx), %rbp
+       je      L(b0)
+       dec     %rsi
+       je      L(one)
+       mov     -8(%rdi,%rsi,8), %rax
+       mul     %r10
+       mov     %rax, %r9
+       mov     %rdx, %r8
+       mov     (%rdi,%rsi,8), %rax
+       add     -16(%rdi,%rsi,8), %r9
+       adc     $0, %r8
+       mul     %rbx
+       add     %rax, %r9
+       adc     %rdx, %r8
+       jmp     L(11)
+
+L(b0): mov     -8(%rdi,%rsi,8), %r8
+       mov     -16(%rdi,%rsi,8), %r9
+
+L(11): sub     $4, %rsi
+       jb      L(ed2)
+       lea     40(%rdi,%rsi,8), %rdi
+       mov     -40(%rdi), %r11
+       mov     -32(%rdi), %rax
+       jmp     L(m0)
+
+       ALIGN(16)
+L(top):        mov     -24(%rdi), %r9
+       add     %rax, %r11
+       mov     -16(%rdi), %rax
+       adc     %rdx, %r12
+       mul     %r10
+       add     %rax, %r9
+       mov     %r11, %rax
+       mov     %rdx, %r8
+       adc     $0, %r8
+       mul     %rbx
+       add     %rax, %r9
+       mov     %r12, %rax
+       adc     %rdx, %r8
+       mul     %rbp
+       sub     $2, %rsi
+       jb      L(ed1)
+       mov     -40(%rdi), %r11
+       add     %rax, %r9
+       mov     -32(%rdi), %rax
+       adc     %rdx, %r8
+L(m0): mul     %r10
+       add     %rax, %r11
+       mov     %r9, %rax
+       mov     %rdx, %r12
+       adc     $0, %r12
+       mul     %rbx
+       add     %rax, %r11
+       lea     -32(%rdi), %rdi         C ap -= 4
+       mov     %r8, %rax
+       adc     %rdx, %r12
+       mul     %rbp
+       sub     $2, %rsi
+       jae     L(top)
+
+L(ed0):        mov     %r11, %r9
+       mov     %r12, %r8
+L(ed1):        add     %rax, %r9
+       adc     %rdx, %r8
+L(ed2):        mov     8(%r13), R32(%rdi)              C cnt
+       mov     %r8, %rax
+       mov     %r9, %r8
+       mul     %r10
+       add     %rax, %r8
+       adc     $0, %rdx
+L(1):  xor     R32(%rcx), R32(%rcx)
+       mov     %r8, %r9
+       sub     R32(%rdi), R32(%rcx)
+       shr     R8(%rcx), %r9
+       mov     R32(%rdi), R32(%rcx)
+       sal     R8(%rcx), %rdx
+       or      %rdx, %r9
+       sal     R8(%rcx), %r8
+       mov     %r9, %rax
+       mulq    (%r13)
+       mov     %rax, %rsi
+       inc     %r9
+       add     %r8, %rsi
+       adc     %r9, %rdx
+       imul    %r14, %rdx
+       sub     %rdx, %r8
+       lea     (%r8,%r14), %rax
+       cmp     %r8, %rsi
+       cmovc   %rax, %r8
+       mov     %r8, %rax
+       sub     %r14, %rax
+       cmovc   %r8, %rax
+       mov     R32(%rdi), R32(%rcx)
+       shr     R8(%rcx), %rax
+       pop     %rbx
+       pop     %rbp
+       pop     %r12
+       pop     %r13
+       pop     %r14
+       FUNC_EXIT()
+       ret
+L(one):
+       mov     (%rdi), %r8
+       mov     8(%rcx), R32(%rdi)
+       xor     %rdx, %rdx
+       jmp     L(1)
+EPILOGUE()
+
+       ALIGN(16)
+PROLOGUE(mpn_mod_1s_2p_cps)
+       FUNC_ENTRY(2)
+       push    %rbp
+       bsr     %rsi, %rcx
+       push    %rbx
+       mov     %rdi, %rbx
+       push    %r12
+       xor     $63, R32(%rcx)
+       mov     %rsi, %r12
+       mov     R32(%rcx), R32(%rbp)    C preserve cnt over call
+       sal     R8(%rcx), %r12          C b << cnt
+IFSTD(`        mov     %r12, %rdi      ')      C pass parameter
+IFDOS(`        mov     %r12, %rcx      ')      C pass parameter
+       CALL(   mpn_invert_limb)
+       mov     %r12, %r8
+       mov     %rax, %r11
+       mov     %rax, (%rbx)            C store bi
+       mov     %rbp, 8(%rbx)           C store cnt
+       neg     %r8
+       mov     R32(%rbp), R32(%rcx)
+       mov     $1, R32(%rsi)
+ifdef(`SHLD_SLOW',`
+       shl     R8(%rcx), %rsi
+       neg     R32(%rcx)
+       mov     %rax, %rbp
+       shr     R8(%rcx), %rax
+       or      %rax, %rsi
+       mov     %rbp, %rax
+       neg     R32(%rcx)
+',`
+       shld    R8(%rcx), %rax, %rsi    C FIXME: Slow on Atom and Nano
+')
+       imul    %r8, %rsi
+       mul     %rsi
+
+       add     %rsi, %rdx
+       shr     R8(%rcx), %rsi
+       mov     %rsi, 16(%rbx)          C store B1modb
+
+       not     %rdx
+       imul    %r12, %rdx
+       lea     (%rdx,%r12), %rsi
+       cmp     %rdx, %rax
+       cmovnc  %rdx, %rsi
+       mov     %r11, %rax
+       mul     %rsi
+
+       add     %rsi, %rdx
+       shr     R8(%rcx), %rsi
+       mov     %rsi, 24(%rbx)          C store B2modb
+
+       not     %rdx
+       imul    %r12, %rdx
+       add     %rdx, %r12
+       cmp     %rdx, %rax
+       cmovnc  %rdx, %r12
+
+       shr     R8(%rcx), %r12
+       mov     %r12, 32(%rbx)          C store B3modb
+
+       pop     %r12
+       pop     %rbx
+       pop     %rbp
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/mod_1_4.asm b/mpn/x86_64/mod_1_4.asm

index bb8a6b221b498a7593792b9300f0ad48b619b602..91a372c89e708f6067664662925c70bab6969ce2 100644 (file)
--- a/mpn/x86_64/mod_1_4.asm
+++ b/mpn/x86_64/mod_1_4.asm
@@ -2,7 +2,7 @@ dnl  AMD64 mpn_mod_1s_4p
  
  dnl  Contributed to the GNU project by Torbjorn Granlund.
  
-dnl  Copyright 2009 Free Software Foundation, Inc.
+dnl  Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -22,30 +22,36 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C K8,K9:        3.0
-C K10:          3.0
-C P4:          14.5
-C P6 core2:     5.0
-C P6 corei7:    4.3
-C P6 atom:     25.0
+C AMD K8,K9     3
+C AMD K10       3
+C Intel P4     15.5
+C Intel core2   5
+C Intel corei   4
+C Intel atom   23
+C VIA nano      4.75
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_mod_1s_4p)
+       FUNC_ENTRY(4)
+       push    %r15
         push    %r14
         push    %r13
         push    %r12
         push    %rbp
         push    %rbx
  
-       mov     %rdx, -16(%rsp)
+       mov     %rdx, %r15
         mov     %rcx, %r14
-       mov     16(%rcx), %r11
-       mov     24(%rcx), %rbx
-       mov     32(%rcx), %rbp
-       mov     40(%rcx), %r13
-       mov     48(%rcx), %r12
+       mov     16(%rcx), %r11          C B1modb
+       mov     24(%rcx), %rbx          C B2modb
+       mov     32(%rcx), %rbp          C B3modb
+       mov     40(%rcx), %r13          C B4modb
+       mov     48(%rcx), %r12          C B5modb
         xor     R32(%r8), R32(%r8)
         mov     R32(%rsi), R32(%rdx)
         and     $3, R32(%rdx)
@@ -86,33 +92,32 @@ L(b1):      lea     -8(%rdi,%rsi,8), %rdi
  
         ALIGN(8)
  L(b2): lea     -16(%rdi,%rsi,8), %rdi
-       mov     8(%rdi), %rax
-       mul     %r11
+       mov     8(%rdi), %r8
         mov     (%rdi), %r9
-       jmp     L(m0)
+       jmp     L(m1)
  
         ALIGN(16)
  L(top):        mov     -24(%rdi), %rax
         mov     -32(%rdi), %r10
-       mul     %r11
+       mul     %r11                    C up[1] * B1modb
         add     %rax, %r10
         mov     -16(%rdi), %rax
-       mov     %rdx, %rcx
-       adc     $0, %rcx
-       mul     %rbx
+       mov     $0, R32(%rcx)
+       adc     %rdx, %rcx
+       mul     %rbx                    C up[2] * B2modb
         add     %rax, %r10
         mov     -8(%rdi), %rax
         adc     %rdx, %rcx
         sub     $32, %rdi
-       mul     %rbp
+       mul     %rbp                    C up[3] * B3modb
         add     %rax, %r10
-       mov     %r9, %rax
+       mov     %r13, %rax
         adc     %rdx, %rcx
-       mul     %r13
+       mul     %r9                     C rl * B4modb
         add     %rax, %r10
-       mov     %r8, %rax
+       mov     %r12, %rax
         adc     %rdx, %rcx
-       mul     %r12
+       mul     %r8                     C rh * B5modb
         mov     %r10, %r9
         mov     %rcx, %r8
  L(m0): add     %rax, %r9
@@ -135,7 +140,7 @@ L(end):     mov     8(%r14), R32(%rsi)
         or      %rdx, %rdi
         mov     %rdi, %rax
         mulq    (%r14)
-       mov     -16(%rsp), %rbx
+       mov     %r15, %rbx
         mov     %rax, %r9
         sal     R8(%rcx), %r8
         inc     %rdi
@@ -145,91 +150,109 @@ L(end):  mov     8(%r14), R32(%rsi)
         sub     %rdx, %r8
         lea     (%r8,%rbx), %rax
         cmp     %r8, %r9
-       cmovb   %rax, %r8
+       cmovc   %rax, %r8
         mov     %r8, %rax
         sub     %rbx, %rax
-       cmovb   %r8, %rax
+       cmovc   %r8, %rax
         shr     R8(%rcx), %rax
         pop     %rbx
         pop     %rbp
         pop     %r12
         pop     %r13
         pop     %r14
+       pop     %r15
+       FUNC_EXIT()
         ret
  EPILOGUE()
  
         ALIGN(16)
  PROLOGUE(mpn_mod_1s_4p_cps)
-       push    %r12
-       bsr     %rsi, %rcx
+       FUNC_ENTRY(2)
         push    %rbp
-       xor     $63, R32(%rcx)
-       mov     %rsi, %rbp
-       mov     R32(%rcx), R32(%r12)
-       sal     R8(%rcx), %rbp
+       bsr     %rsi, %rcx
         push    %rbx
         mov     %rdi, %rbx
-       mov     %rbp, %rdi
+       push    %r12
+       xor     $63, R32(%rcx)
+       mov     %rsi, %r12
+       mov     R32(%rcx), R32(%rbp)    C preserve cnt over call
+       sal     R8(%rcx), %r12          C b << cnt
+IFSTD(`        mov     %r12, %rdi      ')      C pass parameter
+IFDOS(`        mov     %r12, %rcx      ')      C pass parameter
         CALL(   mpn_invert_limb)
-       mov     R32(%r12), R32(%rcx)
-       mov     $1, R32(%r10)
-       sal     R8(%rcx), %r10
-       mov     $64, R32(%rcx)
-       mov     %rax, %r9
-       sub     R32(%r12), R32(%rcx)
-       mov     %r9, (%rbx)
+       mov     %r12, %r8
+       mov     %rax, %r11
+       mov     %rax, (%rbx)            C store bi
+       mov     %rbp, 8(%rbx)           C store cnt
+       neg     %r8
+       mov     R32(%rbp), R32(%rcx)
+       mov     $1, R32(%rsi)
+ifdef(`SHLD_SLOW',`
+       shl     R8(%rcx), %rsi
+       neg     R32(%rcx)
+       mov     %rax, %rbp
         shr     R8(%rcx), %rax
-       mov     R32(%r12), R32(%rcx)
-       or      %rax, %r10
+       or      %rax, %rsi
         mov     %rbp, %rax
-       neg     %rax
-       imul    %rax, %r10
-       mov     %r10, %rax
-       mul     %r9
-       lea     1(%r10,%rdx), %r8
-       neg     %r8
-       imul    %rbp, %r8
-       cmp     %r8, %rax
-       lea     (%r8,%rbp), %rdx
-       cmovb   %rdx, %r8
-       mov     %r8, %rax
-       mul     %r9
-       lea     1(%r8,%rdx), %rdi
-       neg     %rdi
-       imul    %rbp, %rdi
-       cmp     %rdi, %rax
-       lea     (%rdi,%rbp), %rdx
-       cmovb   %rdx, %rdi
-       mov     %rdi, %rax
-       mul     %r9
-       lea     1(%rdi,%rdx), %rsi
-       neg     %rsi
-       imul    %rbp, %rsi
-       cmp     %rsi, %rax
-       lea     (%rsi,%rbp), %rdx
-       cmovb   %rdx, %rsi
-       mov     %rsi, %rax
-       mul     %r9
-       lea     1(%rsi,%rdx), %rdx
-       neg     %rdx
-       imul    %rbp, %rdx
+       neg     R32(%rcx)
+',`
+       shld    R8(%rcx), %rax, %rsi    C FIXME: Slow on Atom and Nano
+')
+       imul    %r8, %rsi
+       mul     %rsi
+
+       add     %rsi, %rdx
+       shr     R8(%rcx), %rsi
+       mov     %rsi, 16(%rbx)          C store B1modb
+
+       not     %rdx
+       imul    %r12, %rdx
+       lea     (%rdx,%r12), %rsi
         cmp     %rdx, %rax
-       lea     (%rdx,%rbp), %rbp
-       movslq  R32(%r12), %rax
-       cmovae  %rdx, %rbp
-       shr     R8(%rcx), %r10
-       shr     R8(%rcx), %r8
-       shr     R8(%rcx), %rbp
-       shr     R8(%rcx), %rdi
+       cmovnc  %rdx, %rsi
+       mov     %r11, %rax
+       mul     %rsi
+
+       add     %rsi, %rdx
+       shr     R8(%rcx), %rsi
+       mov     %rsi, 24(%rbx)          C store B2modb
+
+       not     %rdx
+       imul    %r12, %rdx
+       lea     (%rdx,%r12), %rsi
+       cmp     %rdx, %rax
+       cmovnc  %rdx, %rsi
+       mov     %r11, %rax
+       mul     %rsi
+
+       add     %rsi, %rdx
         shr     R8(%rcx), %rsi
-       mov     %rbp, 48(%rbx)
-       mov     %rax, 8(%rbx)
-       mov     %r10, 16(%rbx)
-       mov     %r8, 24(%rbx)
-       mov     %rdi, 32(%rbx)
-       mov     %rsi, 40(%rbx)
+       mov     %rsi, 32(%rbx)          C store B3modb
+
+       not     %rdx
+       imul    %r12, %rdx
+       lea     (%rdx,%r12), %rsi
+       cmp     %rdx, %rax
+       cmovnc  %rdx, %rsi
+       mov     %r11, %rax
+       mul     %rsi
+
+       add     %rsi, %rdx
+       shr     R8(%rcx), %rsi
+       mov     %rsi, 40(%rbx)          C store B4modb
+
+       not     %rdx
+       imul    %r12, %rdx
+       add     %rdx, %r12
+       cmp     %rdx, %rax
+       cmovnc  %rdx, %r12
+
+       shr     R8(%rcx), %r12
+       mov     %r12, 48(%rbx)          C store B5modb
+
+       pop     %r12
         pop     %rbx
         pop     %rbp
-       pop     %r12
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/mod_34lsub1.asm b/mpn/x86_64/mod_34lsub1.asm

index 318fb96d619727b4aa9c38d74962029c96c71107..d088381bbaa1c762d9534cdb19647bcd1f7486bd 100644 (file)
--- a/mpn/x86_64/mod_34lsub1.asm
+++ b/mpn/x86_64/mod_34lsub1.asm
@@ -1,155 +1,183 @@
  dnl  AMD64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
  
-dnl  Copyright 2000, 2001, 2002, 2004, 2005, 2007 Free Software Foundation,
-dnl  Inc.
-dnl
+dnl  Copyright 2000, 2001, 2002, 2004, 2005, 2007, 2009, 2010, 2011, 2012 Free
+dnl  Software Foundation, Inc.
+
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
  
-C           cycles/limb
-C K8,K9:        1.0
-C K10:          1.12
-C P4:           3.25
-C P6-15 (Core2): 1.5
-C P6-28 (Atom):         2.5
-
+C          cycles/limb
+C AMD K8,K9     0.67      0.583 is possible with zero-reg instead of $0, 4-way
+C AMD K10       0.67      this seems hard to beat
+C AMD bd1       1
+C AMD bobcat    1.07
+C Intel P4      7.35      terrible, use old code
+C Intel core2   1.25      1+epsilon with huge unrolling
+C Intel NHM     1.15      this seems hard to beat
+C Intel SBR     0.93
+C Intel atom    2.5
+C VIA nano      1.25      this seems hard to beat
  
  C INPUT PARAMETERS
-C up   rdi
-C n    rsi
+define(`ap',   %rdi)
+define(`n',    %rsi)
  
  C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
  
  C TODO
-C  * Apply the movzwl tricks to the x86/k7 code
-C  * Review feed-in and wind-down code.  In particular, try to avoid adc and
-C    sbb to placate Pentium4.
-C  * More unrolling and/or index addressing could bring time to under 1 c/l
-C    for Athlon64, approaching 0.67 c/l seems possible.
-C  * There are recurrencies on the carry registers (r8, r9, r10) that might
-C    be the limiting factor for the Pentium4 speed.  Splitting these into 6
-C    registers would help.
-C  * For ultimate Athlon64 performance, a sequence like this might be best.
-C    It should reach 0.5 c/l (limited by L1 cache bandwidth).
-C
-C      add     (%rdi), %rax
-C      adc     8(%rdi), %rcx
-C      adc     16(%rdi), %rdx
-C      adc     $0, %r8
-C      add     24(%rdi), %rax
-C      adc     32(%rdi), %rcx
-C      adc     40(%rdi), %rdx
-C      adc     $0, %r8
-C      ...
+C  * Review feed-in and wind-down code.
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(mpn_mod_34lsub1)
+       FUNC_ENTRY(2)
  
         mov     $0x0000FFFFFFFFFFFF, %r11
  
-       sub     $2, %rsi
+       mov     (ap), %rax
+
+       cmp     $2, %rsi
         ja      L(gt2)
  
-       mov     (%rdi), %rax
-       nop
-       jb      L(1)
+       jb      L(one)
  
-       mov     8(%rdi), %rsi
+       mov     8(ap), %rsi
         mov     %rax, %rdx
         shr     $48, %rax               C src[0] low
  
         and     %r11, %rdx              C src[0] high
         add     %rdx, %rax
-       mov     %esi, %edx
+       mov     R32(%rsi), R32(%rdx)
  
         shr     $32, %rsi               C src[1] high
         add     %rsi, %rax
  
         shl     $16, %rdx               C src[1] low
         add     %rdx, %rax
+L(one):        FUNC_EXIT()
+       ret
  
-L(1):  ret
  
+C Don't change this, the wind-down code is not able to handle greater values
+define(UNROLL,3)
  
-       ALIGN(16)
-L(gt2):        xor     %eax, %eax
-       xor     %ecx, %ecx
-       xor     %edx, %edx
-       xor     %r8, %r8
+L(gt2):        mov     8(ap), %rcx
+       mov     16(ap), %rdx
         xor     %r9, %r9
-       xor     %r10, %r10
-
-L(top):        add     (%rdi), %rax
-       adc     $0, %r10
-       add     8(%rdi), %rcx
-       adc     $0, %r8
-       add     16(%rdi), %rdx
+       add     $24, ap
+       sub     $eval(UNROLL*3+3), %rsi
+       jc      L(end)
+       ALIGN(16)
+L(top):
+       add     (ap), %rax
+       adc     8(ap), %rcx
+       adc     16(ap), %rdx
         adc     $0, %r9
-
-       sub     $3,%rsi
-       jng     L(end)
-
-       add     24(%rdi), %rax
-       adc     $0, %r10
-       add     32(%rdi), %rcx
-       adc     $0, %r8
-       add     40(%rdi), %rdx
-       lea     48(%rdi), %rdi
+forloop(i,1,UNROLL-1,`dnl
+       add     eval(i*24)(ap), %rax
+       adc     eval(i*24+8)(ap), %rcx
+       adc     eval(i*24+16)(ap), %rdx
         adc     $0, %r9
+')dnl
+       add     $eval(UNROLL*24), ap
+       sub     $eval(UNROLL*3), %rsi
+       jnc     L(top)
+
+L(end):
+       lea     L(tab)(%rip), %r8
+ifdef(`PIC',
+`      movslq  36(%r8,%rsi,4), %r10
+       add     %r10, %r8
+       jmp     *%r8
+',`
+       jmp     *72(%r8,%rsi,8)
+')
+       JUMPTABSECT
+       ALIGN(8)
+L(tab):        JMPENT( L(0), L(tab))
+       JMPENT( L(1), L(tab))
+       JMPENT( L(2), L(tab))
+       JMPENT( L(3), L(tab))
+       JMPENT( L(4), L(tab))
+       JMPENT( L(5), L(tab))
+       JMPENT( L(6), L(tab))
+       JMPENT( L(7), L(tab))
+       JMPENT( L(8), L(tab))
+       TEXT
  
-       sub     $3,%rsi
-       jg      L(top)
-
-
-       add     $-24, %rdi
-L(end):        add     %r9, %rax
-       adc     %r10, %rcx
-       adc     %r8, %rdx
-
-       inc     %rsi
-       mov     $0x1, %r10d
-       js      L(combine)
-
-       mov     $0x10000, %r10d
-       adc     24(%rdi), %rax
-       dec     %rsi
-       js      L(combine)
+L(6):  add     (ap), %rax
+       adc     8(ap), %rcx
+       adc     16(ap), %rdx
+       adc     $0, %r9
+       add     $24, ap
+L(3):  add     (ap), %rax
+       adc     8(ap), %rcx
+       adc     16(ap), %rdx
+       jmp     L(cj1)
+
+L(7):  add     (ap), %rax
+       adc     8(ap), %rcx
+       adc     16(ap), %rdx
+       adc     $0, %r9
+       add     $24, ap
+L(4):  add     (ap), %rax
+       adc     8(ap), %rcx
+       adc     16(ap), %rdx
+       adc     $0, %r9
+       add     $24, ap
+L(1):  add     (ap), %rax
+       adc     $0, %rcx
+       jmp     L(cj2)
+
+L(8):  add     (ap), %rax
+       adc     8(ap), %rcx
+       adc     16(ap), %rdx
+       adc     $0, %r9
+       add     $24, ap
+L(5):  add     (ap), %rax
+       adc     8(ap), %rcx
+       adc     16(ap), %rdx
+       adc     $0, %r9
+       add     $24, ap
+L(2):  add     (ap), %rax
+       adc     8(ap), %rcx
  
-       adc     32(%rdi), %rcx
-       mov     $0x100000000, %r10
+L(cj2):        adc     $0, %rdx
+L(cj1):        adc     $0, %r9
+L(0):  add     %r9, %rax
+       adc     $0, %rcx
+       adc     $0, %rdx
+       adc     $0, %rax
  
-L(combine):
-       sbb     %rsi, %rsi              C carry
         mov     %rax, %rdi              C 0mod3
         shr     $48, %rax               C 0mod3 high
  
-       and     %r10, %rsi              C carry masked
         and     %r11, %rdi              C 0mod3 low
-       mov     %ecx, %r10d             C 1mod3
+       mov     R32(%rcx), R32(%r10)    C 1mod3
  
-       add     %rsi, %rax              C apply carry
         shr     $32, %rcx               C 1mod3 high
  
         add     %rdi, %rax              C apply 0mod3 low
-       movzwl  %dx, %edi               C 2mod3
+       movzwl  %dx, R32(%rdi)          C 2mod3
         shl     $16, %r10               C 1mod3 low
  
         add     %rcx, %rax              C apply 1mod3 high
@@ -161,5 +189,6 @@ L(combine):
         add     %rdx, %rax              C apply 2mod3 high
         add     %rdi, %rax              C apply 2mod3 low
  
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/mode1o.asm b/mpn/x86_64/mode1o.asm

index ae5f83c29b259fe738ca9a3da5da514a49f2174e..0d95bec5f65d5ecd016a21dfe37a43b8fcc8270c 100644 (file)
--- a/mpn/x86_64/mode1o.asm
+++ b/mpn/x86_64/mode1o.asm
@@ -1,20 +1,20 @@
-dnl  AMD64 mpn_modexact_1_odd -- exact division style remainder.
+dnl  AMD64 mpn_modexact_1_odd -- Hensel norm remainder.
+
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2011, 2012 Free
+dnl  Software Foundation, Inc.
  
-dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
-dnl  Foundation, Inc.
-dnl
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
@@ -22,119 +22,100 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:       10
-C K10:         10
-C P4:          33
-C P6 core2:    13
-C P6 corei7:   14.5
-C P6 Atom:     35
-
-
-C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
-C                               mp_limb_t divisor);
-C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
-C                                mp_limb_t divisor, mp_limb_t carry);
-C
-C
+C AMD K8,K9    10
+C AMD K10      10
+C Intel P4     33
+C Intel core2  13
+C Intel corei  14.5
+C Intel atom   35
+C VIA nano      ?
+
+
  C The dependent chain in the main loop is
  C
  C                            cycles
-C      subq    %rdx, %rax      1
-C      imulq   %r9, %rax       4
-C      mulq    %r8             5
+C      sub     %rdx, %rax      1
+C      imul    %r9, %rax       4
+C      mul     %r8             5
  C                            ----
  C       total                 10
  C
-C The movq load from src seems to need to be scheduled back before the jz to
-C achieve this speed, out-of-order execution apparently can't completely
-C hide the latency otherwise.
+C The mov load from src seems to need to be scheduled back before the jz to
+C achieve this speed, out-of-order execution apparently can't completely hide
+C the latency otherwise.
  C
-C The l=src[i]-cbit step is rotated back too, since that allows us to avoid
-C it for the first iteration (where there's no cbit).
+C The l=src[i]-cbit step is rotated back too, since that allows us to avoid it
+C for the first iteration (where there's no cbit).
  C
-C The code alignment used (32-byte) for the loop also seems necessary.
-C Without that the non-PIC case has adcq crossing the 0x60 offset,
-C apparently making it run at 11 cycles instead of 10.
-C
-C Not done:
-C
-C divq for size==1 was measured at about 79 cycles, compared to the inverse
-C at about 25 cycles (both including function call overheads), so that's not
-C used.
-C
-C Enhancements:
-C
-C For PIC, we shouldn't really need the GOT fetch for binvert_limb_table,
-C it'll be in rodata or text in libgmp.so and can be accessed directly %rip
-C relative.  This would be for small model only (something we don't
-C presently detect, but which is all that gcc 3.3.3 supports), since 8-byte
-C PC-relative relocations are apparently not available.  Some rough
-C experiments with binutils 2.13 looked worrylingly like it might come out
-C with an unwanted text segment relocation though, even with ".protected".
+C The code alignment used (32-byte) for the loop also seems necessary.  Without
+C that the non-PIC case has adc crossing the 0x60 offset, apparently making it
+C run at 11 cycles instead of 10.
  
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(mpn_modexact_1_odd)
-
-       movl    $0, %ecx
+       FUNC_ENTRY(3)
+       mov     $0, R32(%rcx)
+IFDOS(`        jmp     L(ent)          ')
  
  PROLOGUE(mpn_modexact_1c_odd)
-
+       FUNC_ENTRY(4)
+L(ent):
         C rdi   src
         C rsi   size
         C rdx   divisor
         C rcx   carry
  
-       movq    %rdx, %r8               C d
-       shrl    %edx                    C d/2
-ifdef(`PIC',`
-       movq    binvert_limb_table@GOTPCREL(%rip), %r9
-',`
-       movabsq $binvert_limb_table, %r9
-')
+       mov     %rdx, %r8               C d
+       shr     R32(%rdx)               C d/2
+
+       LEA(    binvert_limb_table, %r9)
  
-       andl    $127, %edx
-       movq    %rcx, %r10              C initial carry
+       and     $127, R32(%rdx)
+       mov     %rcx, %r10              C initial carry
  
-       movzbl  (%r9,%rdx), %edx        C inv 8 bits
+       movzbl  (%r9,%rdx), R32(%rdx)   C inv 8 bits
  
-       movq    (%rdi), %rax            C src[0]
-       leaq    (%rdi,%rsi,8), %r11     C src end
-       movq    %r8, %rdi               C d, made available to imull
+       mov     (%rdi), %rax            C src[0]
+       lea     (%rdi,%rsi,8), %r11     C src end
+       mov     %r8, %rdi               C d, made available to imull
  
-       leal    (%rdx,%rdx), %ecx       C 2*inv
-       imull   %edx, %edx              C inv*inv
+       lea     (%rdx,%rdx), R32(%rcx)  C 2*inv
+       imul    R32(%rdx), R32(%rdx)    C inv*inv
  
-       negq    %rsi                    C -size
+       neg     %rsi                    C -size
  
-       imull   %edi, %edx              C inv*inv*d
+       imul    R32(%rdi), R32(%rdx)    C inv*inv*d
  
-       subl    %edx, %ecx              C inv = 2*inv - inv*inv*d, 16 bits
+       sub     R32(%rdx), R32(%rcx)    C inv = 2*inv - inv*inv*d, 16 bits
  
-       leal    (%rcx,%rcx), %edx       C 2*inv
-       imull   %ecx, %ecx              C inv*inv
+       lea     (%rcx,%rcx), R32(%rdx)  C 2*inv
+       imul    R32(%rcx), R32(%rcx)    C inv*inv
  
-       imull   %edi, %ecx              C inv*inv*d
+       imul    R32(%rdi), R32(%rcx)    C inv*inv*d
  
-       subl    %ecx, %edx              C inv = 2*inv - inv*inv*d, 32 bits
-       xorl    %ecx, %ecx              C initial cbit
+       sub     R32(%rcx), R32(%rdx)    C inv = 2*inv - inv*inv*d, 32 bits
+       xor     R32(%rcx), R32(%rcx)    C initial cbit
  
-       leaq    (%rdx,%rdx), %r9        C 2*inv
-       imulq   %rdx, %rdx              C inv*inv
+       lea     (%rdx,%rdx), %r9        C 2*inv
+       imul    %rdx, %rdx              C inv*inv
  
-       imulq   %r8, %rdx               C inv*inv*d
+       imul    %r8, %rdx               C inv*inv*d
  
-       subq    %rdx, %r9               C inv = 2*inv - inv*inv*d, 64 bits
-       movq    %r10, %rdx              C initial climb
+       sub     %rdx, %r9               C inv = 2*inv - inv*inv*d, 64 bits
+       mov     %r10, %rdx              C initial climb
  
         ASSERT(e,`      C d*inv == 1 mod 2^64
-       movq    %r8, %r10
-       imulq   %r9, %r10
-       cmpq    $1, %r10')
+       mov     %r8, %r10
+       imul    %r9, %r10
+       cmp     $1, %r10')
  
-       incq    %rsi
+       inc     %rsi
         jz      L(one)
  
  
@@ -149,30 +130,31 @@ L(top):
         C r9    inverse
         C r11   src end ptr
  
-       subq    %rdx, %rax              C l = src[i]-cbit - climb
+       sub     %rdx, %rax              C l = src[i]-cbit - climb
  
-       adcq    $0, %rcx                C more cbit
-       imulq   %r9, %rax               C q = l * inverse
+       adc     $0, %rcx                C more cbit
+       imul    %r9, %rax               C q = l * inverse
  
-       mulq    %r8                     C climb = high (q * d)
+       mul     %r8                     C climb = high (q * d)
  
-       movq    (%r11,%rsi,8), %rax     C src[i+1]
-       subq    %rcx, %rax              C next l = src[i+1] - cbit
-       setc    %cl                     C new cbit
+       mov     (%r11,%rsi,8), %rax     C src[i+1]
+       sub     %rcx, %rax              C next l = src[i+1] - cbit
+       setc    R8(%rcx)                C new cbit
  
-       incq    %rsi
+       inc     %rsi
         jnz     L(top)
  
  
  L(one):
-       subq    %rdx, %rax              C l = src[i]-cbit - climb
+       sub     %rdx, %rax              C l = src[i]-cbit - climb
  
-       adcq    $0, %rcx                C more cbit
-       imulq   %r9, %rax               C q = l * inverse
+       adc     $0, %rcx                C more cbit
+       imul    %r9, %rax               C q = l * inverse
  
-       mulq    %r8                     C climb = high (q * d)
+       mul     %r8                     C climb = high (q * d)
  
-       leaq    (%rcx,%rdx), %rax       C climb+cbit
+       lea     (%rcx,%rdx), %rax       C climb+cbit
+       FUNC_EXIT()
         ret
  
  EPILOGUE(mpn_modexact_1c_odd)
diff --git a/mpn/x86_64/mul_1.asm b/mpn/x86_64/mul_1.asm

index a0c45990e967204e9c7f9483eeb929c95536f0ea..ee435db9c683b0134b89f2a44cdafddc1f67f1c2 100644 (file)
--- a/mpn/x86_64/mul_1.asm
+++ b/mpn/x86_64/mul_1.asm
@@ -1,6 +1,6 @@
  dnl  AMD64 mpn_mul_1.
  
-dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -20,45 +20,68 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C K8,K9:        2.5
-C K10:          2.5
-C P4:           12.3
-C P6 core2:     4.0
-C P6 corei7:    3.8
-C Atom:                19.8
-
-C The inner loop of this code is the result of running a code generation and
+C AMD K8,K9     2.5
+C AMD K10       2.5
+C AMD bd1       5.0
+C AMD bobcat    5.5
+C Intel P4     12.3
+C Intel core2   4.0
+C Intel NHM     3.75
+C Intel SBR     2.95
+C Intel atom   19.8
+C VIA nano      4.25
+
+C The loop of this code is the result of running a code generation and
  C optimization tool suite written by David Harvey and Torbjorn Granlund.
  
-C TODO:
-C  * The inner loop is great, but the prologue and epilogue code was
-C    quickly written.  Tune it!
+C TODO
+C  * The loop is great, but the prologue and epilogue code was quickly written.
+C    Tune it!
  
-C INPUT PARAMETERS
-define(`rp',    `%rdi')
-define(`up',    `%rsi')
-define(`n_param',`%rdx')
-define(`vl',    `%rcx')
+define(`rp',      `%rdi')   C rcx
+define(`up',      `%rsi')   C rdx
+define(`n_param', `%rdx')   C r8
+define(`vl',      `%rcx')   C r9
  
-define(`n',    `%r11')
+define(`n',       `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(`        define(`up', ``%rsi'')  ') dnl
+IFDOS(`        define(`rp', ``%rcx'')  ') dnl
+IFDOS(`        define(`vl', ``%r9'')   ') dnl
+IFDOS(`        define(`r9', ``rdi'')   ') dnl
+IFDOS(`        define(`n',  ``%r8'')   ') dnl
+IFDOS(`        define(`r8', ``r11'')   ') dnl
  
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_mul_1c)
+IFDOS(``push   %rsi            '')
+IFDOS(``push   %rdi            '')
+IFDOS(``mov    %rdx, %rsi      '')
         push    %rbx
-       mov     %r8, %r10
+IFSTD(`        mov     %r8, %r10')
+IFDOS(`        mov     64(%rsp), %r10')        C 40 + 3*8  (3 push insns)
         jmp     L(common)
  EPILOGUE()
  
  PROLOGUE(mpn_mul_1)
+
+IFDOS(``push   %rsi            '')
+IFDOS(``push   %rdi            '')
+IFDOS(``mov    %rdx, %rsi      '')
+
         push    %rbx
         xor     %r10, %r10
  L(common):
         mov     (up), %rax              C read first u limb early
-       mov     n_param, %rbx           C move away n from rdx, mul uses it
+IFSTD(`        mov     n_param, %rbx   ')      C move away n from rdx, mul uses it
+IFDOS(`        mov     n, %rbx         ')
         mul     vl
-       mov     %rbx, %r11
+IFSTD(`        mov     %rbx, n         ')
  
         add     %r10, %rax
         adc     $0, %rdx
@@ -113,7 +136,7 @@ L(top):     mov     %r10, (rp,n,8)
         add     %rax, %r9
         mov     (up,n,8), %rax
         adc     %rdx, %r8
-       mov     $0, %r10d
+       mov     $0, R32(%r10)
  L(L1): mul     vl
         mov     %r9, 8(rp,n,8)
         add     %rax, %r8
@@ -126,11 +149,11 @@ L(L0):    mov     8(up,n,8), %rax
  L(L3): mov     16(up,n,8), %rax
         mul     vl
         mov     %rbx, 24(rp,n,8)
-       mov     $0, %r8d                # zero
-       mov     %r8, %rbx               # zero
+       mov     $0, R32(%r8)            C zero
+       mov     %r8, %rbx               C zero
         add     %rax, %r10
         mov     24(up,n,8), %rax
-       mov     %r8, %r9                # zero
+       mov     %r8, %r9                C zero
         adc     %rdx, %r9
  L(L2): mul     vl
         add     $4, n
@@ -144,5 +167,7 @@ L(L2):      mul     vl
  L(ret):        mov     %rdx, %rax
  
         pop     %rbx
+IFDOS(``pop    %rdi            '')
+IFDOS(``pop    %rsi            '')
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/mul_2.asm b/mpn/x86_64/mul_2.asm

index ab87aaf21ea16496a58be5ff0763821aab367c7f..73b346fa9e7db45dd7491331f05de31b264551c9 100644 (file)
--- a/mpn/x86_64/mul_2.asm
+++ b/mpn/x86_64/mul_2.asm
@@ -1,7 +1,7 @@
  dnl  AMD64 mpn_mul_2 -- Multiply an n-limb vector with a 2-limb vector and
  dnl  store the result in a third limb vector.
  
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,11 +21,13 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C K8,K9:        2.275
-C K10:          2.275
-C P4:           ?
-C P6 core2:     4.0
-C P6 corei7:    3.8
+C AMD K8,K9     2.275
+C AMD K10       2.275
+C Intel P4     13.5
+C Intel core2   4.0
+C Intel corei   3.8
+C Intel atom    ?
+C VIA nano      ?
  
  C This code is the result of running a code generation and optimization tool
  C suite written by David Harvey and Torbjorn Granlund.
@@ -51,10 +53,14 @@ define(`w2', `%rbp')
  define(`w3', `%r10')
  define(`n',  `%r11')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_mul_2)
+       FUNC_ENTRY(4)
         push    %rbx
         push    %rbp
  
@@ -170,5 +176,6 @@ L(m22):     mul     v1
  
         pop     %rbp
         pop     %rbx
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/mul_basecase.asm b/mpn/x86_64/mul_basecase.asm

index 53207662991556a50457e9c93b5f60899a7b96da..be9e2760a2d96ed3f984e116057c244a71b9a79b 100644 (file)
--- a/mpn/x86_64/mul_basecase.asm
+++ b/mpn/x86_64/mul_basecase.asm
@@ -2,7 +2,7 @@ dnl  AMD64 mpn_mul_basecase.
  
  dnl  Contributed to the GNU project by Torbjorn Granlund and David Harvey.
  
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -22,10 +22,13 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  include(`../config.m4')
  
  C           cycles/limb
-C K8,K9:        2.375
-C K10:          2.375
-C P4:           ?
-C P6-15:        4.45
+C AMD K8,K9     2.375
+C AMD K10       2.375
+C Intel P4     15-16
+C Intel core2   4.45
+C Intel corei   4.35
+C Intel atom    ?
+C VIA nano      4.5
  
  C The inner loops of this code are the result of running a code generation and
  C optimization tool suite written by David Harvey and Torbjorn Granlund.
@@ -56,10 +59,15 @@ define(`n',  `%r11')
  define(`outer_addr', `%r14')
  define(`un',  `%r13')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_mul_basecase)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')
         push    %rbx
         push    %rbp
         push    %r12
@@ -92,7 +100,13 @@ L(mul_1):
         cmp     $2, R32(w0)
         jc      L(mul_1_prologue_1)
         jz      L(mul_1_prologue_2)
-       jmp     L(mul_1_prologue_3)
+
+L(mul_1_prologue_3):
+       add     $-1, n
+       lea     L(addmul_outer_3)(%rip), outer_addr
+       mov     %rax, w3
+       mov     %rdx, w0
+       jmp     L(mul_1_entry_3)
  
  L(mul_1_prologue_0):
         mov     %rax, w2
@@ -124,13 +138,6 @@ L(mul_1_prologue_2):
         xor     R32(w3), R32(w3)
         jmp     L(mul_1_entry_2)
  
-L(mul_1_prologue_3):
-       add     $-1, n
-       lea     L(addmul_outer_3)(%rip), outer_addr
-       mov     %rax, w3
-       mov     %rdx, w0
-       jmp     L(mul_1_entry_3)
-
  
         C this loop is 10 c/loop = 2.5 c/l on K8, for all up/rp alignments
  
@@ -291,7 +298,7 @@ L(mul_2_entry_1):
         mov     w3, -32(rp,n,8)
         js      L(mul_2_top)
  
-       mov     -32(up,n,8), %rax
+       mov     -32(up,n,8), %rax       C FIXME: n is constant
         mul     v1
         add     %rax, w0
         mov     w0, (rp)
@@ -445,6 +452,7 @@ L(ret):     pop     %r15
         pop     %r12
         pop     %rbp
         pop     %rbx
+       FUNC_EXIT()
         ret
  
  EPILOGUE()
diff --git a/mpn/x86_64/mullo_basecase.asm b/mpn/x86_64/mullo_basecase.asm

new file mode 100644 (file)

index 0000000..a066b82
--- /dev/null
+++ b/mpn/x86_64/mullo_basecase.asm
@@ -0,0 +1,425 @@
+dnl  AMD64 mpn_mullo_basecase.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C The inner loops of this code are the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C NOTES
+C   * There is a major stupidity in that we call mpn_mul_1 initially, for a
+C     large trip count.  Instead, we should start with mul_2 for any operand
+C     size congruence class.
+C   * Stop iterating addmul_2 earlier, falling into straight-line triangle code
+C     for the last 2-3 iterations.
+C   * Perhaps implement n=4 special code.
+C   * The reload of the outer loop jump address hurts branch preditiction.
+C   * The addmul_2 loop ends with an MUL whose high part is not used upon loop
+C     exit.
+
+C INPUT PARAMETERS
+define(`rp',      `%rdi')
+define(`up',      `%rsi')
+define(`vp_param', `%rdx')
+define(`n',       `%rcx')
+
+define(`vp',   `%r11')
+define(`outer_addr', `%r8')
+define(`j',    `%r9')
+define(`v0',   `%r13')
+define(`v1',   `%r14')
+define(`w0',   `%rbx')
+define(`w1',   `%r15')
+define(`w2',   `%rbp')
+define(`w3',   `%r10')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mullo_basecase)
+       FUNC_ENTRY(4)
+       cmp     $4, n
+       jge     L(gen)
+       mov     (up), %rax              C u0
+       mov     (vp_param), %r8         C v0
+
+       lea     L(tab)(%rip), %r9
+ifdef(`PIC',
+`      movslq  (%r9,%rcx,4), %r10
+       add     %r10, %r9
+       jmp     *%r9
+',`
+       jmp     *(%r9,n,8)
+')
+       JUMPTABSECT
+       ALIGN(8)
+L(tab):        JMPENT( L(tab), L(tab))                 C not allowed
+       JMPENT( L(1), L(tab))                   C 1
+       JMPENT( L(2), L(tab))                   C 2
+       JMPENT( L(3), L(tab))                   C 3
+dnl    JMPENT( L(0m4), L(tab))                 C 4
+dnl    JMPENT( L(1m4), L(tab))                 C 5
+dnl    JMPENT( L(2m4), L(tab))                 C 6
+dnl    JMPENT( L(3m4), L(tab))                 C 7
+dnl    JMPENT( L(0m4), L(tab))                 C 8
+dnl    JMPENT( L(1m4), L(tab))                 C 9
+dnl    JMPENT( L(2m4), L(tab))                 C 10
+dnl    JMPENT( L(3m4), L(tab))                 C 11
+       TEXT
+
+L(1):  imul    %r8, %rax
+       mov     %rax, (rp)
+       FUNC_EXIT()
+       ret
+
+L(2):  mov     8(vp_param), %r11
+       imul    %rax, %r11              C u0 x v1
+       mul     %r8                     C u0 x v0
+       mov     %rax, (rp)
+       imul    8(up), %r8              C u1 x v0
+       lea     (%r11, %rdx), %rax
+       add     %r8, %rax
+       mov     %rax, 8(rp)
+       FUNC_EXIT()
+       ret
+
+L(3):  mov     8(vp_param), %r9        C v1
+       mov     16(vp_param), %r11
+       mul     %r8                     C u0 x v0 -> <r1,r0>
+       mov     %rax, (rp)              C r0
+       mov     (up), %rax              C u0
+       mov     %rdx, %rcx              C r1
+       mul     %r9                     C u0 x v1 -> <r2,r1>
+       imul    8(up), %r9              C u1 x v1 -> r2
+       mov     16(up), %r10
+       imul    %r8, %r10               C u2 x v0 -> r2
+       add     %rax, %rcx
+       adc     %rdx, %r9
+       add     %r10, %r9
+       mov     8(up), %rax             C u1
+       mul     %r8                     C u1 x v0 -> <r2,r1>
+       add     %rax, %rcx
+       adc     %rdx, %r9
+       mov     %r11, %rax
+       imul    (up), %rax              C u0 x v2 -> r2
+       add     %rax, %r9
+       mov     %rcx, 8(rp)
+       mov     %r9, 16(rp)
+       FUNC_EXIT()
+       ret
+
+L(0m4):
+L(1m4):
+L(2m4):
+L(3m4):
+L(gen):        push    %rbx
+       push    %rbp
+       push    %r13
+       push    %r14
+       push    %r15
+
+       mov     (up), %rax
+       mov     (vp_param), v0
+       mov     vp_param, vp
+
+       lea     (rp,n,8), rp
+       lea     (up,n,8), up
+       neg     n
+
+       mul     v0
+
+       test    $1, R8(n)
+       jz      L(mul_2)
+
+L(mul_1):
+       lea     -8(rp), rp
+       lea     -8(up), up
+       test    $2, R8(n)
+       jnz     L(mul_1_prologue_3)
+
+L(mul_1_prologue_2):           C n = 7, 11, 15, ...
+       lea     -1(n), j
+       lea     L(addmul_outer_1)(%rip), outer_addr
+       mov     %rax, w0
+       mov     %rdx, w1
+       xor     R32(w2), R32(w2)
+       xor     R32(w3), R32(w3)
+       mov     16(up,n,8), %rax
+       jmp     L(mul_1_entry_2)
+
+L(mul_1_prologue_3):           C n = 5, 9, 13, ...
+       lea     1(n), j
+       lea     L(addmul_outer_3)(%rip), outer_addr
+       mov     %rax, w2
+       mov     %rdx, w3
+       xor     R32(w0), R32(w0)
+       jmp     L(mul_1_entry_0)
+
+       ALIGN(16)
+L(mul_1_top):
+       mov     w0, -16(rp,j,8)
+       add     %rax, w1
+       mov     (up,j,8), %rax
+       adc     %rdx, w2
+       xor     R32(w0), R32(w0)
+       mul     v0
+       mov     w1, -8(rp,j,8)
+       add     %rax, w2
+       adc     %rdx, w3
+L(mul_1_entry_0):
+       mov     8(up,j,8), %rax
+       mul     v0
+       mov     w2, (rp,j,8)
+       add     %rax, w3
+       adc     %rdx, w0
+       mov     16(up,j,8), %rax
+       mul     v0
+       mov     w3, 8(rp,j,8)
+       xor     R32(w2), R32(w2)        C zero
+       mov     w2, w3                  C zero
+       add     %rax, w0
+       mov     24(up,j,8), %rax
+       mov     w2, w1                  C zero
+       adc     %rdx, w1
+L(mul_1_entry_2):
+       mul     v0
+       add     $4, j
+       js      L(mul_1_top)
+
+       mov     w0, -16(rp)
+       add     %rax, w1
+       mov     w1, -8(rp)
+       adc     %rdx, w2
+
+       imul    (up), v0
+       add     v0, w2
+       mov     w2, (rp)
+
+       add     $1, n
+       jz      L(ret)
+
+       mov     8(vp), v0
+       mov     16(vp), v1
+
+       lea     16(up), up
+       lea     8(vp), vp
+       lea     24(rp), rp
+
+       jmp     *outer_addr
+
+
+L(mul_2):
+       mov     8(vp), v1
+       test    $2, R8(n)
+       jz      L(mul_2_prologue_3)
+
+       ALIGN(16)
+L(mul_2_prologue_1):
+       lea     0(n), j
+       mov     %rax, w3
+       mov     %rdx, w0
+       xor     R32(w1), R32(w1)
+       mov     (up,n,8), %rax
+       lea     L(addmul_outer_3)(%rip), outer_addr
+       jmp     L(mul_2_entry_1)
+
+       ALIGN(16)
+L(mul_2_prologue_3):
+       lea     2(n), j
+       mov     $0, R32(w3)
+       mov     %rax, w1
+       mov     (up,n,8), %rax
+       mov     %rdx, w2
+       lea     L(addmul_outer_1)(%rip), outer_addr
+       jmp     L(mul_2_entry_3)
+
+       ALIGN(16)
+L(mul_2_top):
+       mov     -32(up,j,8), %rax
+       mul     v1
+       add     %rax, w0
+       adc     %rdx, w1
+       mov     -24(up,j,8), %rax
+       xor     R32(w2), R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     -24(up,j,8), %rax
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+       mul     v1
+       add     %rax, w1
+       mov     w0, -24(rp,j,8)
+       adc     %rdx, w2
+       mov     -16(up,j,8), %rax
+       mul     v0
+       mov     $0, R32(w3)
+       add     %rax, w1
+       adc     %rdx, w2
+       mov     -16(up,j,8), %rax
+       adc     $0, R32(w3)
+L(mul_2_entry_3):
+       mov     $0, R32(w0)
+       mov     w1, -16(rp,j,8)
+       mul     v1
+       add     %rax, w2
+       mov     -8(up,j,8), %rax
+       adc     %rdx, w3
+       mov     $0, R32(w1)
+       mul     v0
+       add     %rax, w2
+       mov     -8(up,j,8), %rax
+       adc     %rdx, w3
+       adc     R32(w1), R32(w0)
+       mul     v1
+       add     %rax, w3
+       mov     w2, -8(rp,j,8)
+       adc     %rdx, w0
+       mov     (up,j,8), %rax
+       mul     v0
+       add     %rax, w3
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+L(mul_2_entry_1):
+       add     $4, j
+       mov     w3, -32(rp,j,8)
+       js      L(mul_2_top)
+
+       imul    -16(up), v1
+       add     v1, w0
+       imul    -8(up), v0
+       add     v0, w0
+       mov     w0, -8(rp)
+
+       add     $2, n
+       jz      L(ret)
+
+       mov     16(vp), v0
+       mov     24(vp), v1
+
+       lea     16(vp), vp
+       lea     16(rp), rp
+
+       jmp     *outer_addr
+
+
+L(addmul_outer_1):
+       lea     -2(n), j
+       mov     -16(up,n,8), %rax
+       mul     v0
+       mov     %rax, w3
+       mov     -16(up,n,8), %rax
+       mov     %rdx, w0
+       xor     R32(w1), R32(w1)
+       lea     L(addmul_outer_3)(%rip), outer_addr
+       jmp     L(addmul_entry_1)
+
+L(addmul_outer_3):
+       lea     0(n), j
+       mov     -16(up,n,8), %rax
+       xor     R32(w3), R32(w3)
+       mul     v0
+       mov     %rax, w1
+       mov     -16(up,n,8), %rax
+       mov     %rdx, w2
+       lea     L(addmul_outer_1)(%rip), outer_addr
+       jmp     L(addmul_entry_3)
+
+       ALIGN(16)
+L(addmul_top):
+       add     w3, -32(rp,j,8)
+       adc     %rax, w0
+       mov     -24(up,j,8), %rax
+       adc     %rdx, w1
+       xor     R32(w2), R32(w2)
+       mul     v0
+       add     %rax, w0
+       mov     -24(up,j,8), %rax
+       adc     %rdx, w1
+       adc     R32(w2), R32(w2)
+       mul     v1
+       xor     R32(w3), R32(w3)
+       add     w0, -24(rp,j,8)
+       adc     %rax, w1
+       mov     -16(up,j,8), %rax
+       adc     %rdx, w2
+       mul     v0
+       add     %rax, w1
+       mov     -16(up,j,8), %rax
+       adc     %rdx, w2
+       adc     $0, R32(w3)
+L(addmul_entry_3):
+       mul     v1
+       add     w1, -16(rp,j,8)
+       adc     %rax, w2
+       mov     -8(up,j,8), %rax
+       adc     %rdx, w3
+       mul     v0
+       xor     R32(w0), R32(w0)
+       add     %rax, w2
+       adc     %rdx, w3
+       mov     $0, R32(w1)
+       mov     -8(up,j,8), %rax
+       adc     R32(w1), R32(w0)
+       mul     v1
+       add     w2, -8(rp,j,8)
+       adc     %rax, w3
+       adc     %rdx, w0
+       mov     (up,j,8), %rax
+       mul     v0
+       add     %rax, w3
+       mov     (up,j,8), %rax
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+L(addmul_entry_1):
+       mul     v1
+       add     $4, j
+       js      L(addmul_top)
+
+       add     w3, -32(rp)
+       adc     %rax, w0
+
+       imul    -24(up), v0
+       add     v0, w0
+       add     w0, -24(rp)
+
+       add     $2, n
+       jns     L(ret)
+
+       lea     16(vp), vp
+
+       mov     (vp), v0
+       mov     8(vp), v1
+
+       lea     -16(up), up
+
+       jmp     *outer_addr
+
+L(ret):        pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/mulmid_basecase.asm b/mpn/x86_64/mulmid_basecase.asm

new file mode 100644 (file)

index 0000000..a4802bf
--- /dev/null
+++ b/mpn/x86_64/mulmid_basecase.asm
@@ -0,0 +1,548 @@
+dnl  AMD64 mpn_mulmid_basecase
+
+dnl  Contributed by David Harvey.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C           cycles/limb
+C K8,K9:        2.375  (2.5 when un - vn is "small")
+C K10:          ?
+C P4:           ?
+C P6-15:        ?
+
+C INPUT PARAMETERS
+define(`rp',      `%rdi')
+define(`up',      `%rsi')
+define(`un_param',`%rdx')
+define(`vp_param',`%rcx')
+define(`vn',      `%r8')
+
+define(`v0', `%r12')
+define(`v1', `%r9')
+
+define(`w0', `%rbx')
+define(`w1', `%rcx')
+define(`w2', `%rbp')
+define(`w3', `%r10')
+
+define(`n',  `%r11')
+define(`outer_addr', `%r14')
+define(`un',  `%r13')
+define(`vp',  `%r15')
+
+define(`vp_inner', `%r10')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_mulmid_basecase)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       mov     vp_param, vp
+
+       C use un for row length (= un_param - vn + 1)
+       lea     1(un_param), un
+       sub     vn, un
+
+       lea     (rp,un,8), rp
+
+       cmp     $4, un          C TODO: needs tuning
+       jc      L(diagonal)
+
+       lea     (up,un_param,8), up
+
+       test    $1, vn
+       jz      L(mul_2)
+
+C ===========================================================
+C     mul_1 for vp[0] if vn is odd
+
+L(mul_1):
+       mov     R32(un), R32(w0)
+
+       neg     un
+       mov     (up,un,8), %rax
+       mov     (vp), v0
+       mul     v0
+
+       and     $-4, un         C round down to multiple of 4
+       mov     un, n
+
+       and     $3, R32(w0)
+       jz      L(mul_1_prologue_0)
+       cmp     $2, R32(w0)
+       jc      L(mul_1_prologue_1)
+       jz      L(mul_1_prologue_2)
+
+L(mul_1_prologue_3):
+       mov     %rax, w3
+       mov     %rdx, w0
+       lea     L(addmul_prologue_3)(%rip), outer_addr
+       jmp     L(mul_1_entry_3)
+
+       ALIGN(16)
+L(mul_1_prologue_0):
+       mov     %rax, w2
+       mov     %rdx, w3                C note already w0 == 0
+       lea     L(addmul_prologue_0)(%rip), outer_addr
+       jmp     L(mul_1_entry_0)
+
+       ALIGN(16)
+L(mul_1_prologue_1):
+       add     $4, n
+       mov     %rax, w1
+       mov     %rdx, w2
+       mov     $0, R32(w3)
+       mov     (up,n,8), %rax
+       lea     L(addmul_prologue_1)(%rip), outer_addr
+       jmp     L(mul_1_entry_1)
+
+       ALIGN(16)
+L(mul_1_prologue_2):
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     24(up,n,8), %rax
+       mov     $0, R32(w2)
+       mov     $0, R32(w3)
+       lea     L(addmul_prologue_2)(%rip), outer_addr
+       jmp     L(mul_1_entry_2)
+
+
+       C this loop is 10 c/loop = 2.5 c/l on K8
+
+       ALIGN(16)
+L(mul_1_top):
+       mov     w0, -16(rp,n,8)
+       add     %rax, w1
+       mov     (up,n,8), %rax
+       adc     %rdx, w2
+L(mul_1_entry_1):
+       mov     $0, R32(w0)
+       mul     v0
+       mov     w1, -8(rp,n,8)
+       add     %rax, w2
+       adc     %rdx, w3
+L(mul_1_entry_0):
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     w2, (rp,n,8)
+       add     %rax, w3
+       adc     %rdx, w0
+L(mul_1_entry_3):
+       mov     16(up,n,8), %rax
+       mul     v0
+       mov     w3, 8(rp,n,8)
+       mov     $0, R32(w2)             C zero
+       mov     w2, w3                  C zero
+       add     %rax, w0
+       mov     24(up,n,8), %rax
+       mov     w2, w1                  C zero
+       adc     %rdx, w1
+L(mul_1_entry_2):
+       mul     v0
+       add     $4, n
+       js      L(mul_1_top)
+
+       mov     w0, -16(rp)
+       add     %rax, w1
+       mov     w1, -8(rp)
+       mov     w2, 8(rp)               C zero last limb of output
+       adc     %rdx, w2
+       mov     w2, (rp)
+
+       dec     vn
+       jz      L(ret)
+
+       lea     -8(up), up
+       lea     8(vp), vp
+
+       mov     un, n
+       mov     (vp), v0
+       mov     8(vp), v1
+
+       jmp     *outer_addr
+
+C ===========================================================
+C     mul_2 for vp[0], vp[1] if vn is even
+
+       ALIGN(16)
+L(mul_2):
+       mov     R32(un), R32(w0)
+
+       neg     un
+       mov     -8(up,un,8), %rax
+       mov     (vp), v0
+       mov     8(vp), v1
+       mul     v1
+
+       and     $-4, un         C round down to multiple of 4
+       mov     un, n
+
+       and     $3, R32(w0)
+       jz      L(mul_2_prologue_0)
+       cmp     $2, R32(w0)
+       jc      L(mul_2_prologue_1)
+       jz      L(mul_2_prologue_2)
+
+L(mul_2_prologue_3):
+       mov     %rax, w1
+       mov     %rdx, w2
+       lea     L(addmul_prologue_3)(%rip), outer_addr
+       jmp     L(mul_2_entry_3)
+
+       ALIGN(16)
+L(mul_2_prologue_0):
+       mov     %rax, w0
+       mov     %rdx, w1
+       lea     L(addmul_prologue_0)(%rip), outer_addr
+       jmp     L(mul_2_entry_0)
+
+       ALIGN(16)
+L(mul_2_prologue_1):
+       mov     %rax, w3
+       mov     %rdx, w0
+       mov     $0, R32(w1)
+       lea     L(addmul_prologue_1)(%rip), outer_addr
+       jmp     L(mul_2_entry_1)
+
+       ALIGN(16)
+L(mul_2_prologue_2):
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     $0, R32(w0)
+       mov     16(up,n,8), %rax
+       lea     L(addmul_prologue_2)(%rip), outer_addr
+       jmp     L(mul_2_entry_2)
+
+
+       C this loop is 18 c/loop = 2.25 c/l on K8
+
+       ALIGN(16)
+L(mul_2_top):
+       mov     -8(up,n,8), %rax
+       mul     v1
+       add     %rax, w0
+       adc     %rdx, w1
+L(mul_2_entry_0):
+       mov     $0, R32(w2)
+       mov     (up,n,8), %rax
+       mul     v0
+       add     %rax, w0
+       mov     (up,n,8), %rax
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+       mul     v1
+       add     %rax, w1
+       mov     w0, (rp,n,8)
+       adc     %rdx, w2
+L(mul_2_entry_3):
+       mov     8(up,n,8), %rax
+       mul     v0
+       mov     $0, R32(w3)
+       add     %rax, w1
+       adc     %rdx, w2
+       mov     $0, R32(w0)
+       adc     $0, R32(w3)
+       mov     8(up,n,8), %rax
+       mov     w1, 8(rp,n,8)
+       mul     v1
+       add     %rax, w2
+       mov     16(up,n,8), %rax
+       adc     %rdx, w3
+L(mul_2_entry_2):
+       mov     $0, R32(w1)
+       mul     v0
+       add     %rax, w2
+       mov     16(up,n,8), %rax
+       adc     %rdx, w3
+       adc     $0, R32(w0)
+       mul     v1
+       add     %rax, w3
+       mov     w2, 16(rp,n,8)
+       adc     %rdx, w0
+L(mul_2_entry_1):
+       mov     24(up,n,8), %rax
+       mul     v0
+       add     %rax, w3
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+       add     $4, n
+       mov     w3, -8(rp,n,8)
+       jnz     L(mul_2_top)
+
+       mov     w0, (rp)
+       mov     w1, 8(rp)
+
+       sub     $2, vn
+       jz      L(ret)
+
+       lea     16(vp), vp
+       lea     -16(up), up
+
+       mov     un, n
+       mov     (vp), v0
+       mov     8(vp), v1
+
+       jmp     *outer_addr
+
+C ===========================================================
+C     addmul_2 for remaining vp's
+
+       ALIGN(16)
+L(addmul_prologue_0):
+       mov     -8(up,n,8), %rax
+       mul     v1
+       mov     %rax, w1
+       mov     %rdx, w2
+       mov     $0, R32(w3)
+       jmp     L(addmul_entry_0)
+
+       ALIGN(16)
+L(addmul_prologue_1):
+       mov     16(up,n,8), %rax
+       mul     v1
+       mov     %rax, w0
+       mov     %rdx, w1
+       mov     $0, R32(w2)
+       mov     24(up,n,8), %rax
+       jmp     L(addmul_entry_1)
+
+       ALIGN(16)
+L(addmul_prologue_2):
+       mov     8(up,n,8), %rax
+       mul     v1
+       mov     %rax, w3
+       mov     %rdx, w0
+       mov     $0, R32(w1)
+       jmp     L(addmul_entry_2)
+
+       ALIGN(16)
+L(addmul_prologue_3):
+       mov     (up,n,8), %rax
+       mul     v1
+       mov     %rax, w2
+       mov     %rdx, w3
+       mov     $0, R32(w0)
+       mov     $0, R32(w1)
+       jmp     L(addmul_entry_3)
+
+       C this loop is 19 c/loop = 2.375 c/l on K8
+
+       ALIGN(16)
+L(addmul_top):
+       mov     $0, R32(w3)
+       add     %rax, w0
+       mov     -8(up,n,8), %rax
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+       mul     v1
+       add     w0, -8(rp,n,8)
+       adc     %rax, w1
+       adc     %rdx, w2
+L(addmul_entry_0):
+       mov     (up,n,8), %rax
+       mul     v0
+       add     %rax, w1
+       mov     (up,n,8), %rax
+       adc     %rdx, w2
+       adc     $0, R32(w3)
+       mul     v1
+       add     w1, (rp,n,8)
+       mov     $0, R32(w1)
+       adc     %rax, w2
+       mov     $0, R32(w0)
+       adc     %rdx, w3
+L(addmul_entry_3):
+       mov     8(up,n,8), %rax
+       mul     v0
+       add     %rax, w2
+       mov     8(up,n,8), %rax
+       adc     %rdx, w3
+       adc     $0, R32(w0)
+       mul     v1
+       add     w2, 8(rp,n,8)
+       adc     %rax, w3
+       adc     %rdx, w0
+L(addmul_entry_2):
+       mov     16(up,n,8), %rax
+       mul     v0
+       add     %rax, w3
+       mov     16(up,n,8), %rax
+       adc     %rdx, w0
+       adc     $0, R32(w1)
+       mul     v1
+       add     w3, 16(rp,n,8)
+       nop                     C don't ask...
+       adc     %rax, w0
+       mov     $0, R32(w2)
+       mov     24(up,n,8), %rax
+       adc     %rdx, w1
+L(addmul_entry_1):
+       mul     v0
+       add     $4, n
+       jnz     L(addmul_top)
+
+       add     %rax, w0
+       adc     %rdx, w1
+       adc     $0, R32(w2)
+
+       add     w0, -8(rp)
+       adc     w1, (rp)
+       adc     w2, 8(rp)
+
+       sub     $2, vn
+       jz      L(ret)
+
+       lea     16(vp), vp
+       lea     -16(up), up
+
+       mov     un, n
+       mov     (vp), v0
+       mov     8(vp), v1
+
+       jmp     *outer_addr
+
+C ===========================================================
+C     accumulate along diagonals if un - vn is small
+
+       ALIGN(16)
+L(diagonal):
+       xor     R32(w0), R32(w0)
+       xor     R32(w1), R32(w1)
+       xor     R32(w2), R32(w2)
+
+       neg     un
+
+       mov     R32(vn), %eax
+       and     $3, %eax
+       jz      L(diag_prologue_0)
+       cmp     $2, %eax
+       jc      L(diag_prologue_1)
+       jz      L(diag_prologue_2)
+
+L(diag_prologue_3):
+       lea     -8(vp), vp
+       mov     vp, vp_inner
+       add     $1, vn
+       mov     vn, n
+       lea     L(diag_entry_3)(%rip), outer_addr
+       jmp     L(diag_entry_3)
+
+L(diag_prologue_0):
+       mov     vp, vp_inner
+       mov     vn, n
+       lea     0(%rip), outer_addr
+       mov     -8(up,n,8), %rax
+       jmp     L(diag_entry_0)
+
+L(diag_prologue_1):
+       lea     8(vp), vp
+       mov     vp, vp_inner
+       add     $3, vn
+       mov     vn, n
+       lea     0(%rip), outer_addr
+       mov     -8(vp_inner), %rax
+       jmp     L(diag_entry_1)
+
+L(diag_prologue_2):
+       lea     -16(vp), vp
+       mov     vp, vp_inner
+       add     $2, vn
+       mov     vn, n
+       lea     0(%rip), outer_addr
+       mov     16(vp_inner), %rax
+       jmp     L(diag_entry_2)
+
+
+       C this loop is 10 c/loop = 2.5 c/l on K8
+
+       ALIGN(16)
+L(diag_top):
+       add     %rax, w0
+       adc     %rdx, w1
+       mov     -8(up,n,8), %rax
+       adc     $0, w2
+L(diag_entry_0):
+       mulq    (vp_inner)
+       add     %rax, w0
+       adc     %rdx, w1
+       adc     $0, w2
+L(diag_entry_3):
+       mov     -16(up,n,8), %rax
+       mulq    8(vp_inner)
+       add     %rax, w0
+       mov     16(vp_inner), %rax
+       adc     %rdx, w1
+       adc     $0, w2
+L(diag_entry_2):
+       mulq    -24(up,n,8)
+       add     %rax, w0
+       mov     24(vp_inner), %rax
+       adc     %rdx, w1
+       lea     32(vp_inner), vp_inner
+       adc     $0, w2
+L(diag_entry_1):
+       mulq    -32(up,n,8)
+       sub     $4, n
+       jnz     L(diag_top)
+
+       add     %rax, w0
+       adc     %rdx, w1
+       adc     $0, w2
+
+       mov     w0, (rp,un,8)
+
+       inc     un
+       jz      L(diag_end)
+
+       mov     vn, n
+       mov     vp, vp_inner
+
+       lea     8(up), up
+       mov     w1, w0
+       mov     w2, w1
+       xor     R32(w2), R32(w2)
+
+       jmp     *outer_addr
+
+L(diag_end):
+       mov     w1, (rp)
+       mov     w2, 8(rp)
+
+L(ret):        pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/nano/copyd.asm b/mpn/x86_64/nano/copyd.asm

new file mode 100644 (file)

index 0000000..6c6e9db
--- /dev/null
+++ b/mpn/x86_64/nano/copyd.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_copyd optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyd)
+include_mpn(`x86_64/fastsse/copyd-palignr.asm')
diff --git a/mpn/x86_64/nano/copyi.asm b/mpn/x86_64/nano/copyi.asm

new file mode 100644 (file)

index 0000000..4714449
--- /dev/null
+++ b/mpn/x86_64/nano/copyi.asm
@@ -0,0 +1,26 @@
+dnl  X86-64 mpn_copyi optimised for Intel Sandy Bridge.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyi)
+include_mpn(`x86_64/fastsse/copyi-palignr.asm')
diff --git a/mpn/x86_64/nano/dive_1.asm b/mpn/x86_64/nano/dive_1.asm

new file mode 100644 (file)

index 0000000..2edbcd9
--- /dev/null
+++ b/mpn/x86_64/nano/dive_1.asm
@@ -0,0 +1,156 @@
+dnl  AMD64 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2001, 2002, 2004, 2005, 2006, 2010, 2011, 2012 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C             norm            unorm
+C AMD K8,K9    11              11
+C AMD K10      11              11
+C Intel P4      ?
+C Intel core2  13.5            13.25
+C Intel corei  14.25
+C Intel atom   34              36
+C VIA nano     19.25           19.25
+
+
+C INPUT PARAMETERS
+C rp           rdi
+C up           rsi
+C n            rdx
+C divisor      rcx
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_divexact_1)
+       FUNC_ENTRY(4)
+       push    %rbx
+
+       mov     %rcx, %rax
+       xor     R32(%rcx), R32(%rcx)    C shift count
+       mov     %rdx, %r8
+
+       bt      $0, R32(%rax)
+       jc      L(odd)                  C skip bsfq unless divisor is even
+       bsf     %rax, %rcx
+       shr     R8(%rcx), %rax
+L(odd):        mov     %rax, %rbx
+       shr     R32(%rax)
+       and     $127, R32(%rax)         C d/2, 7 bits
+
+       LEA(    binvert_limb_table, %rdx)
+
+       movzbl  (%rdx,%rax), R32(%rax)  C inv 8 bits
+
+       mov     %rbx, %r11              C d without twos
+
+       lea     (%rax,%rax), R32(%rdx)  C 2*inv
+       imul    R32(%rax), R32(%rax)    C inv*inv
+       imul    R32(%rbx), R32(%rax)    C inv*inv*d
+       sub     R32(%rax), R32(%rdx)    C inv = 2*inv - inv*inv*d, 16 bits
+
+       lea     (%rdx,%rdx), R32(%rax)  C 2*inv
+       imul    R32(%rdx), R32(%rdx)    C inv*inv
+       imul    R32(%rbx), R32(%rdx)    C inv*inv*d
+       sub     R32(%rdx), R32(%rax)    C inv = 2*inv - inv*inv*d, 32 bits
+
+       lea     (%rax,%rax), %r10       C 2*inv
+       imul    %rax, %rax              C inv*inv
+       imul    %rbx, %rax              C inv*inv*d
+       sub     %rax, %r10              C inv = 2*inv - inv*inv*d, 64 bits
+
+       lea     (%rsi,%r8,8), %rsi      C up end
+       lea     -8(%rdi,%r8,8), %rdi    C rp end
+       neg     %r8                     C -n
+
+       mov     (%rsi,%r8,8), %rax      C up[0]
+
+       inc     %r8
+       jz      L(one)
+
+       test    R32(%rcx), R32(%rcx)
+       jnz     L(unorm)                C branch if count != 0
+       xor     R32(%rbx), R32(%rbx)
+       jmp     L(nent)
+
+       ALIGN(8)
+L(ntop):mul    %r11                    C carry limb in rdx     0 10
+       mov     -8(%rsi,%r8,8), %rax    C
+       sub     %rbx, %rax              C apply carry bit
+       setc    %bl                     C
+       sub     %rdx, %rax              C apply carry limb      5
+       adc     $0, %rbx                C                       6
+L(nent):imul   %r10, %rax              C                       6
+       mov     %rax, (%rdi,%r8,8)      C
+       inc     %r8                     C
+       jnz     L(ntop)
+
+       mov     -8(%rsi), %r9           C up high limb
+       jmp     L(com)
+
+L(unorm):
+       mov     (%rsi,%r8,8), %r9       C up[1]
+       shr     R8(%rcx), %rax          C
+       neg     R32(%rcx)
+       shl     R8(%rcx), %r9           C
+       neg     R32(%rcx)
+       or      %r9, %rax
+       xor     R32(%rbx), R32(%rbx)
+       jmp     L(uent)
+
+       ALIGN(8)
+L(utop):mul    %r11                    C carry limb in rdx     0 10
+       mov     (%rsi,%r8,8), %rax      C
+       shl     R8(%rcx), %rax          C
+       neg     R32(%rcx)
+       or      %r9, %rax
+       sub     %rbx, %rax              C apply carry bit
+       setc    %bl                     C
+       sub     %rdx, %rax              C apply carry limb      5
+       adc     $0, %rbx                C                       6
+L(uent):imul   %r10, %rax              C                       6
+       mov     (%rsi,%r8,8), %r9       C
+       shr     R8(%rcx), %r9           C
+       neg     R32(%rcx)
+       mov     %rax, (%rdi,%r8,8)      C
+       inc     %r8                     C
+       jnz     L(utop)
+
+L(com):        mul     %r11                    C carry limb in rdx
+       sub     %rbx, %r9               C apply carry bit
+       sub     %rdx, %r9               C apply carry limb
+       imul    %r10, %r9
+       mov     %r9, (%rdi)
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+
+L(one):        shr     R8(%rcx), %rax
+       imul    %r10, %rax
+       mov     %rax, (%rdi)
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/nano/gcd_1.asm b/mpn/x86_64/nano/gcd_1.asm

new file mode 100644 (file)

index 0000000..bbbdbcd
--- /dev/null
+++ b/mpn/x86_64/nano/gcd_1.asm
@@ -0,0 +1,26 @@
+dnl  AMD64 mpn_gcd_1.
+
+dnl  Copyright 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_1)
+include_mpn(`x86_64/core2/gcd_1.asm')
diff --git a/mpn/x86_64/nano/gmp-mparam.h b/mpn/x86_64/nano/gmp-mparam.h

index e029a3ab3b6a02f4411ca155e9dcfcf0ae105317..d93548faef232141b0bdd08e04635fb9779acb9a 100644 (file)
--- a/mpn/x86_64/nano/gmp-mparam.h
+++ b/mpn/x86_64/nano/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* VIA Nano gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,43 +21,50 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define GMP_LIMB_BITS 64
  #define BYTES_PER_MP_LIMB 8
  
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
  /* 1600 MHz Nano 2xxx */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         7
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     13
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        20
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           24
+#define BMOD_1_TO_MOD_1_THRESHOLD           22
  
-#define MUL_TOOM22_THRESHOLD                28
+#define MUL_TOOM22_THRESHOLD                27
  #define MUL_TOOM33_THRESHOLD                33
-#define MUL_TOOM44_THRESHOLD               292
-#define MUL_TOOM6H_THRESHOLD               746
-#define MUL_TOOM8H_THRESHOLD               866
+#define MUL_TOOM44_THRESHOLD               290
+#define MUL_TOOM6H_THRESHOLD               718
+#define MUL_TOOM8H_THRESHOLD               915
  
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     201
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     211
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     219
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      67
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     184
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     193
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     193
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     287
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 38
-#define SQR_TOOM3_THRESHOLD                 77
-#define SQR_TOOM4_THRESHOLD                620
-#define SQR_TOOM6_THRESHOLD                996
-#define SQR_TOOM8_THRESHOLD               1138
+#define SQR_TOOM2_THRESHOLD                 34
+#define SQR_TOOM3_THRESHOLD                 93
+#define SQR_TOOM4_THRESHOLD                587
+#define SQR_TOOM6_THRESHOLD               1095
+#define SQR_TOOM8_THRESHOLD                  0  /* always */
+
+#define MULMID_TOOM42_THRESHOLD             28
  
-#define MULMOD_BNM1_THRESHOLD               15
+#define MULMOD_BNM1_THRESHOLD               13
  #define SQRMOD_BNM1_THRESHOLD               17
  
-#define MUL_FFT_MODF_THRESHOLD             468  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             376  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    468, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+  { {    376, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
      {     12, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
      {     15, 5}, {     31, 6}, {     21, 7}, {     11, 6}, \
      {     24, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
@@ -112,11 +119,11 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
      {2097152,22}, {4194304,23}, {8388608,24} }
  #define MUL_FFT_TABLE3_SIZE 215
-#define MUL_FFT_THRESHOLD                 3712
+#define MUL_FFT_THRESHOLD                 3200
  
-#define SQR_FFT_MODF_THRESHOLD             432  /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD             400  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    432, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+  { {    400, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
      {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
      {     25, 7}, {     13, 6}, {     27, 7}, {     25, 8}, \
      {     13, 7}, {     28, 8}, {     15, 7}, {     32, 8}, \
@@ -171,38 +178,45 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
      { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
      {2097152,22}, {4194304,23}, {8388608,24} }
  #define SQR_FFT_TABLE3_SIZE 215
-#define SQR_FFT_THRESHOLD                 3264
+#define SQR_FFT_THRESHOLD                 2880
  
-#define MULLO_BASECASE_THRESHOLD            11
-#define MULLO_DC_THRESHOLD                   0  /* never mpn_mullo_basecase */
+#define MULLO_BASECASE_THRESHOLD             0  /* always */
+#define MULLO_DC_THRESHOLD                  79
  #define MULLO_MUL_N_THRESHOLD             6253
  
-#define DC_DIV_QR_THRESHOLD                 53
-#define DC_DIVAPPR_Q_THRESHOLD             151
+#define DC_DIV_QR_THRESHOLD                 54
+#define DC_DIVAPPR_Q_THRESHOLD             153
  #define DC_BDIV_QR_THRESHOLD                51
-#define DC_BDIV_Q_THRESHOLD                 79
+#define DC_BDIV_Q_THRESHOLD                 52
  
-#define INV_MULMOD_BNM1_THRESHOLD           82
-#define INV_NEWTON_THRESHOLD               149
-#define INV_APPR_THRESHOLD                 155
+#define INV_MULMOD_BNM1_THRESHOLD           52
+#define INV_NEWTON_THRESHOLD               150
+#define INV_APPR_THRESHOLD                 151
  
-#define BINV_NEWTON_THRESHOLD              228
-#define REDC_1_TO_REDC_2_THRESHOLD          12
-#define REDC_2_TO_REDC_N_THRESHOLD          77
+#define BINV_NEWTON_THRESHOLD              232
+#define REDC_1_TO_REDC_2_THRESHOLD          13
+#define REDC_2_TO_REDC_N_THRESHOLD          55
  
-#define MU_DIV_QR_THRESHOLD               1787
-#define MU_DIVAPPR_Q_THRESHOLD            1970
-#define MUPI_DIV_QR_THRESHOLD               74
-#define MU_BDIV_QR_THRESHOLD              1334
-#define MU_BDIV_Q_THRESHOLD               1652
+#define MU_DIV_QR_THRESHOLD               1499
+#define MU_DIVAPPR_Q_THRESHOLD            1620
+#define MUPI_DIV_QR_THRESHOLD               75
+#define MU_BDIV_QR_THRESHOLD              1142
+#define MU_BDIV_Q_THRESHOLD               1499
+
+#define POWM_SEC_TABLE  4,29,387,1421
  
  #define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                      93
-#define GCD_DC_THRESHOLD                   245
-#define GCDEXT_DC_THRESHOLD                456
-#define JACOBI_BASE_METHOD                   1
+#define HGCD_THRESHOLD                     112
+#define HGCD_APPR_THRESHOLD                185
+#define HGCD_REDUCE_THRESHOLD             3134
+#define GCD_DC_THRESHOLD                   492
+#define GCDEXT_DC_THRESHOLD                465
+#define JACOBI_BASE_METHOD                   4
  
  #define GET_STR_DC_THRESHOLD                11
-#define GET_STR_PRECOMPUTE_THRESHOLD        24
-#define SET_STR_DC_THRESHOLD               552
-#define SET_STR_PRECOMPUTE_THRESHOLD      1898
+#define GET_STR_PRECOMPUTE_THRESHOLD        25
+#define SET_STR_DC_THRESHOLD               414
+#define SET_STR_PRECOMPUTE_THRESHOLD      1945
+
+#define FAC_DSC_THRESHOLD                 1517
+#define FAC_ODD_THRESHOLD                   44
diff --git a/mpn/x86_64/nano/popcount.asm b/mpn/x86_64/nano/popcount.asm

new file mode 100644 (file)

index 0000000..4753acf
--- /dev/null
+++ b/mpn/x86_64/nano/popcount.asm
@@ -0,0 +1,24 @@
+dnl  x86-64 mpn_popcount.
+
+dnl  Copyright 2007, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/mpn/x86_64/pentium4/aors_n.asm b/mpn/x86_64/pentium4/aors_n.asm

index 90f5a219b97e44b6cdbcf0065e7ab363d99cd3bf..f238d03b01a991d273f01564f1e5af4afba7b271 100644 (file)
--- a/mpn/x86_64/pentium4/aors_n.asm
+++ b/mpn/x86_64/pentium4/aors_n.asm
@@ -1,6 +1,8 @@
  dnl  x86-64 mpn_add_n/mpn_sub_n optimized for Pentium 4.
  
-dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2007, 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,10 +23,13 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        2.8
-C K10:          2.8
-C P4:           4
-C P6-15:        3.6-5  (fluctuating)
+C AMD K8,K9     2.8
+C AMD K10       2.8
+C Intel P4      4
+C Intel core2   3.6-5  (fluctuating)
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      ?
  
  
  C INPUT PARAMETERS
@@ -43,19 +48,20 @@ ifdef(`OPERATION_sub_n', `
         define(func,          mpn_sub_n)
         define(func_nc,       mpn_sub_nc)')
  
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
  ASM_START()
-
         TEXT
-       ALIGN(16)
-
-PROLOGUE(func_nc)
-       jmp     L(ent)
-EPILOGUE()
-
  PROLOGUE(func)
+       FUNC_ENTRY(4)
         xor     %r8, %r8
+IFDOS(`        jmp     L(ent)          ')
+EPILOGUE()
+PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
  L(ent):        push    %rbx
         push    %r12
  
@@ -174,5 +180,6 @@ L(1):       mov     %r11, 8(rp)
  L(ret):        mov     R32(%rbx), R32(%rax)
         pop     %r12
         pop     %rbx
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/pentium4/aorslsh1_n.asm b/mpn/x86_64/pentium4/aorslsh1_n.asm

index 0723f3e6cad16310a6101c7e6711c12e4eb20320..24aecff6186e208a562297733bd6dcfa5e1345c9 100644 (file)
--- a/mpn/x86_64/pentium4/aorslsh1_n.asm
+++ b/mpn/x86_64/pentium4/aorslsh1_n.asm
@@ -1,7 +1,9 @@
  dnl  AMD64 mpn_addlsh1_n, mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1),
-dnl  optimized for Pentium 4.
+dnl  optimised for Pentium 4.
  
-dnl  Copyright 2008 Free Software Foundation, Inc.
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -20,173 +22,18 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-C           cycles/limb
-C K8,K9:        3.8
-C K10:          4.8
-C P4:           5.8
-C P6-15:        ?
-
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`vp',`%rdx')
-define(`n', `%rcx')
+define(LSH, 1)
+define(RSH, 31)                        C 31, not 63, since we use 32-bit ops
  
  ifdef(`OPERATION_addlsh1_n', `
-       define(ADDSUB,        add)
-       define(func,          mpn_addlsh1_n)')
+  define(ADDSUB,       add)
+  define(func,         mpn_addlsh1_n)')
  ifdef(`OPERATION_sublsh1_n', `
-       define(ADDSUB,        sub)
-       define(func,          mpn_sublsh1_n)')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
-       TEXT
-       ALIGN(16)
-PROLOGUE(func)
-       push    %rbx
-       push    %r12
-       push    %rbp
-
-       mov     (vp), %r9
-       shl     %r9
-       mov     4(vp), R32(%rbp)
-
-       xor     R32(%rbx), R32(%rbx)
+  define(ADDSUB,       sub)
+  define(func,         mpn_sublsh1_n)')
  
-       mov     R32(n), R32(%rax)
-       and     $3, R32(%rax)
-       jne     L(n00)          C n = 0, 4, 8, ...
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
-       mov     (up), %r8
-       mov     8(up), %r10
-       shr     $31, R32(%rbp)
-       ADDSUB  %r9, %r8
-       mov     8(vp), %r9
-       lea     (%rbp,%r9,2), %r9
-       setc    R8(%rax)
-       mov     12(vp), R32(%rbp)
-       lea     -16(rp), rp
-       jmp     L(L00)
-
-L(n00):        cmp     $2, R32(%rax)
-       jnc     L(n01)          C n = 1, 5, 9, ...
-       mov     (up), %r11
-       lea     -8(rp), rp
-       shr     $31, R32(%rbp)
-       ADDSUB  %r9, %r11
-       setc    R8(%rbx)
-       dec     n
-       jz      L(1)            C jump for n = 1
-       mov     8(up), %r8
-       mov     8(vp), %r9
-       lea     (%rbp,%r9,2), %r9
-       mov     12(vp), R32(%rbp)
-       lea     8(up), up
-       lea     8(vp), vp
-       jmp     L(L01)
-
-L(n01):        jne     L(n10)          C n = 2, 6, 10, ...
-       mov     (up), %r12
-       mov     8(up), %r11
-       shr     $31, R32(%rbp)
-       ADDSUB  %r9, %r12
-       mov     8(vp), %r9
-       lea     (%rbp,%r9,2), %r9
-       setc    R8(%rax)
-       mov     12(vp), R32(%rbp)
-       lea     16(up), up
-       lea     16(vp), vp
-       jmp     L(L10)
-
-L(n10):        mov     (up), %r10
-       mov     8(up), %r12
-       shr     $31, R32(%rbp)
-       ADDSUB  %r9, %r10
-       mov     8(vp), %r9
-       lea     (%rbp,%r9,2), %r9
-       setc    R8(%rbx)
-       mov     12(vp), R32(%rbp)
-       lea     -24(rp), rp
-       lea     -8(up), up
-       lea     -8(vp), vp
-       jmp     L(L11)
-
-L(c0): mov     $1, R8(%rbx)
-       jmp     L(rc0)
-L(c1): mov     $1, R8(%rax)
-       jmp     L(rc1)
-L(c2): mov     $1, R8(%rbx)
-       jmp     L(rc2)
-
-       ALIGN(16)
-L(top):        mov     (up), %r8       C not on critical path
-       shr     $31, R32(%rbp)
-       ADDSUB  %r9, %r11       C not on critical path
-       mov     (vp), %r9
-       lea     (%rbp,%r9,2), %r9
-       setc    R8(%rbx)        C save carry out
-       mov     4(vp), R32(%rbp)
-       mov     %r12, (rp)
-       ADDSUB  %rax, %r11      C apply previous carry out
-       jc      L(c0)           C jump if ripple
-L(rc0):
-L(L01):        mov     8(up), %r10
-       shr     $31, R32(%rbp)
-       ADDSUB  %r9, %r8
-       mov     8(vp), %r9
-       lea     (%rbp,%r9,2), %r9
-       setc    R8(%rax)
-       mov     12(vp), R32(%rbp)
-       mov     %r11, 8(rp)
-       ADDSUB  %rbx, %r8
-       jc      L(c1)
-L(rc1):
-L(L00):        mov     16(up), %r12
-       shr     $31, R32(%rbp)
-       ADDSUB  %r9, %r10
-       mov     16(vp), %r9
-       lea     (%rbp,%r9,2), %r9
-       setc    R8(%rbx)
-       mov     20(vp), R32(%rbp)
-       mov     %r8, 16(rp)
-       ADDSUB  %rax, %r10
-       jc      L(c2)
-L(rc2):
-L(L11):        mov     24(up), %r11
-       shr     $31, R32(%rbp)
-       ADDSUB  %r9, %r12
-       mov     24(vp), %r9
-       lea     (%rbp,%r9,2), %r9
-       lea     32(up), up
-       lea     32(vp), vp
-       setc    R8(%rax)
-       mov     -4(vp), R32(%rbp)
-       mov     %r10, 24(rp)
-       ADDSUB  %rbx, %r12
-       jc      L(c3)
-L(rc3):        lea     32(rp), rp
-L(L10):        sub     $4, n
-       ja      L(top)
-
-L(end):
-       shr     $31, R32(%rbp)
-       ADDSUB  %r9, %r11
-       setc    R8(%rbx)
-       mov     %r12, (rp)
-       ADDSUB  %rax, %r11
-       jnc     L(1)
-       mov     $1, R8(%rbx)
-L(1):  mov     %r11, 8(rp)
-       lea     (%rbx,%rbp), R32(%rax)
-       pop     %rbp
-       pop     %r12
-       pop     %rbx
-       emms
-       ret
-L(c3): mov     $1, R8(%rax)
-       jmp     L(rc3)
-EPILOGUE()
-ASM_END()
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+include_mpn(`x86_64/pentium4/aorslshC_n.asm')
diff --git a/mpn/x86_64/pentium4/aorslsh2_n.asm b/mpn/x86_64/pentium4/aorslsh2_n.asm

new file mode 100644 (file)

index 0000000..4f2d52b
--- /dev/null
+++ b/mpn/x86_64/pentium4/aorslsh2_n.asm
@@ -0,0 +1,39 @@
+dnl  AMD64 mpn_addlsh2_n, mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2),
+dnl  optimised for Pentium 4.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)                        C 30, not 62, since we use 32-bit ops
+
+ifdef(`OPERATION_addlsh2_n', `
+  define(ADDSUB,       add)
+  define(func,         mpn_addlsh2_n)')
+ifdef(`OPERATION_sublsh2_n', `
+  define(ADDSUB,       sub)
+  define(func,         mpn_sublsh2_n)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
+include_mpn(`x86_64/pentium4/aorslshC_n.asm')
diff --git a/mpn/x86_64/pentium4/aorslshC_n.asm b/mpn/x86_64/pentium4/aorslshC_n.asm

new file mode 100644 (file)

index 0000000..c61f6f9
--- /dev/null
+++ b/mpn/x86_64/pentium4/aorslshC_n.asm
@@ -0,0 +1,193 @@
+dnl  AMD64 mpn_addlshC_n, mpn_sublshC_n -- rp[] = up[] +- (vp[] << C), where
+dnl  C is 1, 2, 3.  Optimized for Pentium 4.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+C           cycles/limb
+C AMD K8,K9     3.8
+C AMD K10       3.8
+C Intel P4      5.8
+C Intel core2   4.75
+C Intel corei   4.75
+C Intel atom    ?
+C VIA nano      4.75
+
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+define(M, eval(m4_lshift(1,LSH)))
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(func)
+       FUNC_ENTRY(4)
+       push    %rbx
+       push    %r12
+       push    %rbp
+
+       mov     (vp), %r9
+       shl     $LSH, %r9
+       mov     4(vp), R32(%rbp)
+
+       xor     R32(%rbx), R32(%rbx)
+
+       mov     R32(n), R32(%rax)
+       and     $3, R32(%rax)
+       jne     L(n00)          C n = 0, 4, 8, ...
+
+       mov     (up), %r8
+       mov     8(up), %r10
+       shr     $RSH, R32(%rbp)
+       ADDSUB  %r9, %r8
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,M), %r9
+       setc    R8(%rax)
+       mov     12(vp), R32(%rbp)
+       lea     -16(rp), rp
+       jmp     L(L00)
+
+L(n00):        cmp     $2, R32(%rax)
+       jnc     L(n01)          C n = 1, 5, 9, ...
+       mov     (up), %r11
+       lea     -8(rp), rp
+       shr     $RSH, R32(%rbp)
+       ADDSUB  %r9, %r11
+       setc    R8(%rbx)
+       dec     n
+       jz      L(1)            C jump for n = 1
+       mov     8(up), %r8
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,M), %r9
+       mov     12(vp), R32(%rbp)
+       lea     8(up), up
+       lea     8(vp), vp
+       jmp     L(L01)
+
+L(n01):        jne     L(n10)          C n = 2, 6, 10, ...
+       mov     (up), %r12
+       mov     8(up), %r11
+       shr     $RSH, R32(%rbp)
+       ADDSUB  %r9, %r12
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,M), %r9
+       setc    R8(%rax)
+       mov     12(vp), R32(%rbp)
+       lea     16(up), up
+       lea     16(vp), vp
+       jmp     L(L10)
+
+L(n10):        mov     (up), %r10
+       mov     8(up), %r12
+       shr     $RSH, R32(%rbp)
+       ADDSUB  %r9, %r10
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,M), %r9
+       setc    R8(%rbx)
+       mov     12(vp), R32(%rbp)
+       lea     -24(rp), rp
+       lea     -8(up), up
+       lea     -8(vp), vp
+       jmp     L(L11)
+
+L(c0): mov     $1, R8(%rbx)
+       jmp     L(rc0)
+L(c1): mov     $1, R8(%rax)
+       jmp     L(rc1)
+L(c2): mov     $1, R8(%rbx)
+       jmp     L(rc2)
+
+       ALIGN(16)
+L(top):        mov     (up), %r8       C not on critical path
+       shr     $RSH, R32(%rbp)
+       ADDSUB  %r9, %r11       C not on critical path
+       mov     (vp), %r9
+       lea     (%rbp,%r9,M), %r9
+       setc    R8(%rbx)        C save carry out
+       mov     4(vp), R32(%rbp)
+       mov     %r12, (rp)
+       ADDSUB  %rax, %r11      C apply previous carry out
+       jc      L(c0)           C jump if ripple
+L(rc0):
+L(L01):        mov     8(up), %r10
+       shr     $RSH, R32(%rbp)
+       ADDSUB  %r9, %r8
+       mov     8(vp), %r9
+       lea     (%rbp,%r9,M), %r9
+       setc    R8(%rax)
+       mov     12(vp), R32(%rbp)
+       mov     %r11, 8(rp)
+       ADDSUB  %rbx, %r8
+       jc      L(c1)
+L(rc1):
+L(L00):        mov     16(up), %r12
+       shr     $RSH, R32(%rbp)
+       ADDSUB  %r9, %r10
+       mov     16(vp), %r9
+       lea     (%rbp,%r9,M), %r9
+       setc    R8(%rbx)
+       mov     20(vp), R32(%rbp)
+       mov     %r8, 16(rp)
+       ADDSUB  %rax, %r10
+       jc      L(c2)
+L(rc2):
+L(L11):        mov     24(up), %r11
+       shr     $RSH, R32(%rbp)
+       ADDSUB  %r9, %r12
+       mov     24(vp), %r9
+       lea     (%rbp,%r9,M), %r9
+       lea     32(up), up
+       lea     32(vp), vp
+       setc    R8(%rax)
+       mov     -4(vp), R32(%rbp)
+       mov     %r10, 24(rp)
+       ADDSUB  %rbx, %r12
+       jc      L(c3)
+L(rc3):        lea     32(rp), rp
+L(L10):        sub     $4, n
+       ja      L(top)
+
+L(end):
+       shr     $RSH, R32(%rbp)
+       ADDSUB  %r9, %r11
+       setc    R8(%rbx)
+       mov     %r12, (rp)
+       ADDSUB  %rax, %r11
+       jnc     L(1)
+       mov     $1, R8(%rbx)
+L(1):  mov     %r11, 8(rp)
+       lea     (%rbx,%rbp), R32(%rax)
+       pop     %rbp
+       pop     %r12
+       pop     %rbx
+       emms
+       FUNC_EXIT()
+       ret
+L(c3): mov     $1, R8(%rax)
+       jmp     L(rc3)
+EPILOGUE()
+ASM_END()
diff --git a/mpn/x86_64/pentium4/gmp-mparam.h b/mpn/x86_64/pentium4/gmp-mparam.h

index 899a45588f55f73909c7edc257ecf7486f186e3b..37c94dc46f33cba0a18d2ab43b880bd73b8c3aa6 100644 (file)
--- a/mpn/x86_64/pentium4/gmp-mparam.h
+++ b/mpn/x86_64/pentium4/gmp-mparam.h
@@ -1,7 +1,7 @@
  /* Pentium 4-64 gmp-mparam.h -- Compiler/machine parameter header file.
  
  Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2012 Free Software Foundation, Inc.
+2008, 2009, 2010 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,196 +23,143 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /* These routines exists for all x86_64 chips, but they are slower on Pentium4
     than separate add/sub and shift.  Make sure they are not really used.  */
-#undef HAVE_NATIVE_mpn_rsh1add_n
-#undef HAVE_NATIVE_mpn_rsh1sub_n
+#undef HAVE_NATIVE_mpn_rsblsh1_n
+#undef HAVE_NATIVE_mpn_rsblsh2_n
+#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_rsblsh_n
  
-/* 3200 MHz Pentium / 2048 Kibyte cache / socket 775 */
+/* 3400 MHz Pentium / 1024 Kibyte cache */
  
  #define MOD_1_NORM_THRESHOLD                 0  /* always */
  #define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        16
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
+#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        14
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD        36
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
  #define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
  #define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           24
+#define BMOD_1_TO_MOD_1_THRESHOLD           20
  
-#define MUL_TOOM22_THRESHOLD                12
-#define MUL_TOOM33_THRESHOLD                81
-#define MUL_TOOM44_THRESHOLD               121
-#define MUL_TOOM6H_THRESHOLD               270
-#define MUL_TOOM8H_THRESHOLD               430
+#define MUL_TOOM22_THRESHOLD                11
+#define MUL_TOOM33_THRESHOLD                68
+#define MUL_TOOM44_THRESHOLD               120
+#define MUL_TOOM6H_THRESHOLD               157
+#define MUL_TOOM8H_THRESHOLD               236
  
  #define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     138
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     144
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      88
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     131
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     122
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     106
  
  #define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 20
+#define SQR_TOOM2_THRESHOLD                 18
  #define SQR_TOOM3_THRESHOLD                 81
-#define SQR_TOOM4_THRESHOLD                226
-#define SQR_TOOM6_THRESHOLD                303
-#define SQR_TOOM8_THRESHOLD                454
+#define SQR_TOOM4_THRESHOLD                214
+#define SQR_TOOM6_THRESHOLD                238
+#define SQR_TOOM8_THRESHOLD                430
  
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD               11
+#define MULMID_TOOM42_THRESHOLD             16
  
+#define MULMOD_BNM1_THRESHOLD                9
+#define SQRMOD_BNM1_THRESHOLD                9
  
-#define MUL_FFT_MODF_THRESHOLD             240  /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD             236  /* k = 5 */
  #define MUL_FFT_TABLE3                                      \
-  { {    240, 5}, {      9, 4}, {     19, 5}, {     11, 6}, \
-    {      6, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {      8, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {      8, 6}, \
-    {     17, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
-    {      9, 7}, {     21, 8}, {     11, 7}, {     23, 8}, \
-    {     13, 9}, {      7, 8}, {     15, 7}, {     31, 8}, \
+  { {    236, 5}, {      9, 4}, {     19, 5}, {     13, 6}, \
+    {      9, 5}, {     19, 6}, {     13, 7}, {      7, 6}, \
+    {     15, 7}, {      8, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
+    {      7, 7}, {     17, 8}, {      9, 7}, {     21, 8}, \
+    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
      {     21, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
      {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
      {     23, 8}, {     47, 9}, {     27,10}, {     15, 9}, \
      {     39,10}, {     23, 9}, {     51,11}, {     15,10}, \
-    {     31, 9}, {     63,10}, {     39, 9}, {     79,10}, \
-    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255,10}, {     79, 9}, {    159,11}, {     47,10}, \
-    {     95,12}, {     31,11}, {     63,10}, {    127, 9}, \
+    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
+    {     47, 9}, {     95,10}, {     55,11}, {     31,10}, \
+    {     79,11}, {     47, 9}, {    191,12}, {     31,11}, \
+    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
      {    287,11}, {     79,10}, {    159, 9}, {    319,10}, \
-    {    175,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207, 9}, {    415,11}, {    111,10}, {    223,12}, \
-    {     63,11}, {    127,10}, {    255,11}, {    143,10}, \
-    {    287,11}, {    159,10}, {    319,11}, {    175,12}, \
-    {     95,11}, {    223,13}, {     63,12}, {    127,11}, \
-    {    287,10}, {    575,12}, {    159,11}, {    319,10}, \
-    {    639,11}, {    351,12}, {    191,11}, {    383,12}, \
-    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    511,12}, {    287,11}, {    575,12}, {    319,11}, \
-    {    639,12}, {    351,13}, {    191,12}, {    415,11}, \
-    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
-    {    511,11}, {   1023,12}, {    543,11}, {   1087,10}, \
-    {   2175,12}, {    575,13}, {    319,12}, {    639,11}, \
-    {   1279,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    831,11}, {   1663,13}, \
-    {    447,14}, {    255,13}, {    511,12}, {   1023,11}, \
-    {   2047,12}, {   1087,11}, {   2175,13}, {    575,12}, \
-    {   1151,11}, {   2303,12}, {   1215,11}, {   2431,10}, \
-    {   4863,13}, {    639,12}, {   1279,11}, {   2559,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
-    {   1535,13}, {    831,12}, {   1663,13}, {    895,15}, \
-    {    255,14}, {    511,13}, {   1023,12}, {   2047,13}, \
-    {   1087,12}, {   2175,13}, {   1215,12}, {   2431,11}, \
-    {   4863,14}, {    639,13}, {   1407,12}, {   2815,13}, \
-    {   1471,14}, {    767,13}, {   1663,14}, {    895,13}, \
-    {   1791,12}, {   3583,13}, {   1919,12}, {   3839,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2303,12}, {   4607,13}, {   2431,12}, {   4863,14}, \
-    {   1279,13}, {   2687,14}, {   1407,13}, {   2815,15}, \
-    {    767,14}, {   1791,13}, {   3583,14}, {   1919,13}, \
-    {   3839,12}, {   7679,16}, {    511,15}, {   1023,14}, \
-    {   2303,13}, {   4607,14}, {   2431,13}, {   4863,15}, \
-    {   1279,14}, {   2943,13}, {   5887,15}, {   1535,14}, \
-    {   3199,15}, {   1791,14}, {   3839,13}, {   7679,16}, \
-    {   1023,15}, {   2047,14}, {   4351,15}, {   2303,14}, \
-    {   4863,15}, {   2815,14}, {   5887,13}, {  11775,16}, \
-    {   1535,15}, {   3071,14}, {   6655,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 224
-#define MUL_FFT_THRESHOLD                 2752
-
-#define SQR_FFT_MODF_THRESHOLD             240  /* k = 5 */
+    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
+    {    383,10}, {    223,12}, {     63,11}, {    127,10}, \
+    {    255,11}, {    143,10}, {    287, 9}, {    575,10}, \
+    {    303,11}, {    159,10}, {    319,11}, {    175,12}, \
+    {     95,11}, {    191,10}, {    383,11}, {    223,13}, \
+    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
+    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+    {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 91
+#define MUL_FFT_THRESHOLD                 2240
+
+#define SQR_FFT_MODF_THRESHOLD             216  /* k = 5 */
  #define SQR_FFT_TABLE3                                      \
-  { {    240, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
-    {     15, 7}, {      8, 6}, {     19, 7}, {     10, 6}, \
-    {     21, 7}, {     13, 8}, {      7, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     13, 9}, {      7, 8}, \
-    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
-    {     25,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
-    {     19, 8}, {     39, 9}, {     23, 8}, {     47, 9}, \
-    {     27,10}, {     15, 9}, {     39,10}, {     23, 9}, \
-    {     51,11}, {     15,10}, {     31, 9}, {     63, 8}, \
-    {    127,10}, {     39, 9}, {     79,10}, {     47,11}, \
-    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
-    {     71, 9}, {    143, 7}, {    575,10}, {     79,11}, \
-    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
-    {    255,10}, {    143, 9}, {    287,11}, {     79, 9}, \
-    {    319,10}, {    191, 9}, {    383,10}, {    207,12}, \
+  { {    216, 5}, {     13, 6}, {      9, 5}, {     19, 6}, \
+    {     15, 7}, {      8, 6}, {     17, 7}, {      9, 6}, \
+    {     19, 7}, {     10, 6}, {     21, 7}, {     17, 8}, \
+    {      9, 7}, {     21, 8}, {     11, 7}, {     24, 8}, \
+    {     13, 9}, {      7, 8}, {     15, 7}, {     31, 8}, \
+    {     21, 9}, {     11, 8}, {     27,10}, {      7, 9}, \
+    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
+    {     23, 8}, {     47, 9}, {     27,10}, {     15, 9}, \
+    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
+    {     31, 9}, {     63, 8}, {    127,10}, {     39, 9}, \
+    {     79,10}, {     55,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
+    {    287,10}, {     79,11}, {     47,10}, {     95, 9}, \
+    {    191,12}, {     31,11}, {     63, 9}, {    255,10}, \
+    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
+    {    319,10}, {    175, 9}, {    351,11}, {     95,10}, \
+    {    191, 9}, {    383,10}, {    207,11}, {    111,12}, \
      {     63,11}, {    127,10}, {    255,11}, {    143,10}, \
-    {    287, 9}, {    575,11}, {    159,10}, {    319,11}, \
-    {    175,10}, {    351,12}, {     95,11}, {    191,10}, \
-    {    383,11}, {    223,13}, {     63,12}, {    127,11}, \
-    {    287,12}, {    159,11}, {    351,12}, {    191,11}, \
-    {    383,10}, {    767,11}, {    415,12}, {    223,11}, \
-    {    447,13}, {    127,12}, {    255,11}, {    511,12}, \
-    {    287,11}, {    575,12}, {    319,11}, {    639,12}, \
-    {    351,13}, {    191,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    831,12}, {    447,14}, {    127,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
-    {   1087,10}, {   2175,12}, {    575,13}, {    319,12}, \
-    {    639,11}, {   1279,12}, {    671,11}, {   1343,12}, \
-    {    703,13}, {    383,12}, {    767,11}, {   1535,12}, \
-    {    831,13}, {    447,14}, {    255,13}, {    511,12}, \
-    {   1023,11}, {   2047,12}, {   1087,11}, {   2175,13}, \
-    {    575,12}, {   1151,11}, {   2303,12}, {   1215,13}, \
-    {    639,12}, {   1279,11}, {   2559,12}, {   1343,13}, \
-    {    703,14}, {    383,13}, {    767,12}, {   1535,13}, \
-    {    831,12}, {   1663,15}, {    255,14}, {    511,13}, \
-    {   1023,12}, {   2047,13}, {   1087,12}, {   2175,13}, \
-    {   1151,12}, {   2303,13}, {   1215,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1407,12}, {   2815,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1791,12}, \
-    {   3583,13}, {   1919,12}, {   3839,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2303,12}, \
-    {   4607,13}, {   2431,12}, {   4863,14}, {   1279,13}, \
-    {   2687,14}, {   1407,13}, {   2815,15}, {    767,14}, \
-    {   1535,13}, {   3071,14}, {   1791,13}, {   3583,14}, \
-    {   1919,13}, {   3839,12}, {   7679,16}, {    511,15}, \
-    {   1023,14}, {   2175,13}, {   4351,14}, {   2303,13}, \
-    {   4607,14}, {   2431,13}, {   4863,15}, {   1279,14}, \
-    {   2815,13}, {   5631,14}, {   2943,13}, {   5887,12}, \
-    {  11775,15}, {   1535,14}, {   3199,15}, {   1791,14}, \
-    {   3583,13}, {   7167,14}, {   3839,13}, {   7679,16}, \
-    {   1023,15}, {   2047,14}, {   4351,15}, {   2303,14}, \
-    {   4863,15}, {   2815,14}, {   5887,13}, {  11775,16}, \
-    {   1535,15}, {   3071,14}, {   6655,15}, {   3583,14}, \
-    {   7167,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 222
-#define SQR_FFT_THRESHOLD                 2240
+    {    287,11}, {    159,10}, {    319, 9}, {    639,11}, \
+    {    175,10}, {    351,12}, {     95,11}, {    207,10}, \
+    {    415,11}, {    223,13}, {   8192,14}, {  16384,15}, \
+    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
+    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+    {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 93
+#define SQR_FFT_THRESHOLD                 1984
  
  #define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  27
-#define MULLO_MUL_N_THRESHOLD             5240
+#define MULLO_DC_THRESHOLD                  33
+#define MULLO_MUL_N_THRESHOLD             4392
  
-#define DC_DIV_QR_THRESHOLD                 28
+#define DC_DIV_QR_THRESHOLD                 27
  #define DC_DIVAPPR_Q_THRESHOLD              60
-#define DC_BDIV_QR_THRESHOLD                31
-#define DC_BDIV_Q_THRESHOLD                 49
+#define DC_BDIV_QR_THRESHOLD                27
+#define DC_BDIV_Q_THRESHOLD                 38
  
-#define INV_MULMOD_BNM1_THRESHOLD           22
-#define INV_NEWTON_THRESHOLD               226
-#define INV_APPR_THRESHOLD                 108
+#define INV_MULMOD_BNM1_THRESHOLD           20
+#define INV_NEWTON_THRESHOLD               202
+#define INV_APPR_THRESHOLD                 106
  
-#define BINV_NEWTON_THRESHOLD              262
-#define REDC_1_TO_REDC_2_THRESHOLD          11
-#define REDC_2_TO_REDC_N_THRESHOLD          44
+#define BINV_NEWTON_THRESHOLD              198
+#define REDC_1_TO_REDC_2_THRESHOLD          16
+#define REDC_2_TO_REDC_N_THRESHOLD          43
  
  #define MU_DIV_QR_THRESHOLD                979
-#define MU_DIVAPPR_Q_THRESHOLD            1078
-#define MUPI_DIV_QR_THRESHOLD               91
-#define MU_BDIV_QR_THRESHOLD               792
+#define MU_DIVAPPR_Q_THRESHOLD             979
+#define MUPI_DIV_QR_THRESHOLD               92
+#define MU_BDIV_QR_THRESHOLD               807
  #define MU_BDIV_Q_THRESHOLD                942
  
-#define MATRIX22_STRASSEN_THRESHOLD         21
-#define HGCD_THRESHOLD                      97
-#define GCD_DC_THRESHOLD                   217
-#define GCDEXT_DC_THRESHOLD                237
-#define JACOBI_BASE_METHOD                   1
+#define POWM_SEC_TABLE  6,65,192,792,2578
+
+#define MATRIX22_STRASSEN_THRESHOLD         17
+#define HGCD_THRESHOLD                      99
+#define HGCD_APPR_THRESHOLD                121
+#define HGCD_REDUCE_THRESHOLD             1679
+#define GCD_DC_THRESHOLD                   205
+#define GCDEXT_DC_THRESHOLD                225
+#define JACOBI_BASE_METHOD                   4
+
+#define GET_STR_DC_THRESHOLD                13
+#define GET_STR_PRECOMPUTE_THRESHOLD        25
+#define SET_STR_DC_THRESHOLD               232
+#define SET_STR_PRECOMPUTE_THRESHOLD      1585
  
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        23
-#define SET_STR_DC_THRESHOLD               572
-#define SET_STR_PRECOMPUTE_THRESHOLD      1588
+#define FAC_DSC_THRESHOLD                 1127
+#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/mpn/x86_64/pentium4/lshift.asm b/mpn/x86_64/pentium4/lshift.asm

index 7596d9c5c0c6a2e4fb71d5847b3177bf66e3bea6..19c641bd9290e5d5b59eceaa8365ab5e3ebddc7f 100644 (file)
--- a/mpn/x86_64/pentium4/lshift.asm
+++ b/mpn/x86_64/pentium4/lshift.asm
@@ -1,19 +1,19 @@
  dnl  x86-64 mpn_lshift optimized for Pentium 4.
  
-dnl  Copyright 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
-dnl
+dnl  Copyright 2003, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
+
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
@@ -21,11 +21,13 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        2.5
-C K10:          ?
-C P4:           3.29
-C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
-C P6-28 (Atom):        14.3
+C AMD K8,K9     2.5
+C AMD K10       ?
+C Intel P4      3.29
+C Intel core2   2.1 (fluctuates, presumably cache related)
+C Intel corei   ?
+C Intel atom   14.3
+C VIA nano      ?
  
  C INPUT PARAMETERS
  define(`rp',`%rdi')
@@ -33,24 +35,28 @@ define(`up',`%rsi')
  define(`n',`%rdx')
  define(`cnt',`%cl')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(mpn_lshift)
+       FUNC_ENTRY(4)
         mov     -8(up,n,8), %rax
-       movd    %ecx, %mm4
-       neg     %ecx                    C put rsh count in cl
-       and     $63, %ecx
-       movd    %ecx, %mm5
+       movd    R32(%rcx), %mm4
+       neg     R32(%rcx)               C put rsh count in cl
+       and     $63, R32(%rcx)
+       movd    R32(%rcx), %mm5
  
-       lea     1(n), %r8d
+       lea     1(n), R32(%r8)
  
-       shr     %cl, %rax               C function return value
+       shr     R8(%rcx), %rax          C function return value
  
-       and     $3, %r8d
+       and     $3, R32(%r8)
         je      L(rol)                  C jump for n = 3, 7, 11, ...
  
-       dec     %r8d
+       dec     R32(%r8)
         jne     L(1)
  C      n = 4, 8, 12, ...
         movq    -8(up,n,8), %mm2
@@ -62,7 +68,7 @@ C     n = 4, 8, 12, ...
         dec     n
         jmp     L(rol)
  
-L(1):  dec     %r8d
+L(1):  dec     R32(%r8)
         je      L(1x)                   C jump for n = 1, 5, 9, 13, ...
  C      n = 2, 6, 10, 16, ...
         movq    -8(up,n,8), %mm2
@@ -131,18 +137,19 @@ L(top):
  
         jae     L(top)                  C                                     2
  L(end):
-       movq    16(up,n,8), %mm0
+       movq    8(up), %mm0
         psrlq   %mm5, %mm0
         por     %mm0, %mm2
-       movq    8(up,n,8), %mm1
+       movq    (up), %mm1
         psrlq   %mm5, %mm1
         por     %mm1, %mm3
-       movq    %mm2, 24(rp,n,8)
-       movq    %mm3, 16(rp,n,8)
+       movq    %mm2, 16(rp)
+       movq    %mm3, 8(rp)
  
  L(ast):        movq    (up), %mm2
         psllq   %mm4, %mm2
         movq    %mm2, (rp)
         emms
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/pentium4/lshiftc.asm b/mpn/x86_64/pentium4/lshiftc.asm

new file mode 100644 (file)

index 0000000..3989490
--- /dev/null
+++ b/mpn/x86_64/pentium4/lshiftc.asm
@@ -0,0 +1,168 @@
+dnl  x86-64 mpn_lshiftc optimized for Pentium 4.
+
+dnl  Copyright 2003, 2005, 2007, 2008, 2010, 2012 Free Software Foundation,
+dnl  Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C AMD K8,K9     ?
+C AMD K10       ?
+C Intel P4      4.15
+C Intel core2   ?
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      ?
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+define(`cnt',`%cl')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_lshiftc)
+       FUNC_ENTRY(4)
+       mov     -8(up,n,8), %rax
+       pcmpeqd %mm6, %mm6              C 0xffff...fff
+       movd    R32(%rcx), %mm4
+       neg     R32(%rcx)               C put rsh count in cl
+       and     $63, R32(%rcx)
+       movd    R32(%rcx), %mm5
+
+       lea     1(n), R32(%r8)
+
+       shr     R8(%rcx), %rax          C function return value
+
+       and     $3, R32(%r8)
+       je      L(rol)                  C jump for n = 3, 7, 11, ...
+
+       dec     R32(%r8)
+       jne     L(1)
+C      n = 4, 8, 12, ...
+       movq    -8(up,n,8), %mm2
+       psllq   %mm4, %mm2
+       movq    -16(up,n,8), %mm0
+       pxor    %mm6, %mm2
+       psrlq   %mm5, %mm0
+       pandn   %mm2, %mm0
+       movq    %mm0, -8(rp,n,8)
+       dec     n
+       jmp     L(rol)
+
+L(1):  dec     R32(%r8)
+       je      L(1x)                   C jump for n = 1, 5, 9, 13, ...
+C      n = 2, 6, 10, 16, ...
+       movq    -8(up,n,8), %mm2
+       psllq   %mm4, %mm2
+       movq    -16(up,n,8), %mm0
+       pxor    %mm6, %mm2
+       psrlq   %mm5, %mm0
+       pandn   %mm2, %mm0
+       movq    %mm0, -8(rp,n,8)
+       dec     n
+L(1x):
+       cmp     $1, n
+       je      L(ast)
+       movq    -8(up,n,8), %mm2
+       psllq   %mm4, %mm2
+       movq    -16(up,n,8), %mm3
+       psllq   %mm4, %mm3
+       movq    -16(up,n,8), %mm0
+       movq    -24(up,n,8), %mm1
+       pxor    %mm6, %mm2
+       psrlq   %mm5, %mm0
+       pandn   %mm2, %mm0
+       pxor    %mm6, %mm3
+       psrlq   %mm5, %mm1
+       pandn   %mm3, %mm1
+       movq    %mm0, -8(rp,n,8)
+       movq    %mm1, -16(rp,n,8)
+       sub     $2, n
+
+L(rol):        movq    -8(up,n,8), %mm2
+       psllq   %mm4, %mm2
+       movq    -16(up,n,8), %mm3
+       psllq   %mm4, %mm3
+
+       sub     $4, n
+       jb      L(end)
+       ALIGN(32)
+L(top):
+       C finish stuff from lsh block
+       movq    16(up,n,8), %mm0
+       pxor    %mm6, %mm2
+       movq    8(up,n,8), %mm1
+       psrlq   %mm5, %mm0
+       psrlq   %mm5, %mm1
+       pandn   %mm2, %mm0
+       pxor    %mm6, %mm3
+       movq    %mm0, 24(rp,n,8)
+       movq    (up,n,8), %mm0
+       pandn   %mm3, %mm1
+       movq    %mm1, 16(rp,n,8)
+       movq    -8(up,n,8), %mm1
+       C start two new rsh
+       psrlq   %mm5, %mm0
+       psrlq   %mm5, %mm1
+
+       C finish stuff from rsh block
+       movq    8(up,n,8), %mm2
+       pxor    %mm6, %mm0
+       movq    (up,n,8), %mm3
+       psllq   %mm4, %mm2
+       psllq   %mm4, %mm3
+       pandn   %mm0, %mm2
+       pxor    %mm6, %mm1
+       movq    %mm2, 8(rp,n,8)
+       movq    -8(up,n,8), %mm2
+       pandn   %mm1, %mm3
+       movq    %mm3, (rp,n,8)
+       movq    -16(up,n,8), %mm3
+       C start two new lsh
+       sub     $4, n
+       psllq   %mm4, %mm2
+       psllq   %mm4, %mm3
+
+       jae     L(top)
+
+L(end):        pxor    %mm6, %mm2
+       movq    8(up), %mm0
+       psrlq   %mm5, %mm0
+       pandn   %mm2, %mm0
+       pxor    %mm6, %mm3
+       movq    (up), %mm1
+       psrlq   %mm5, %mm1
+       pandn   %mm3, %mm1
+       movq    %mm0, 16(rp)
+       movq    %mm1, 8(rp)
+
+L(ast):        movq    (up), %mm2
+       psllq   %mm4, %mm2
+       pxor    %mm6, %mm2
+       movq    %mm2, (rp)
+       emms
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/pentium4/mod_34lsub1.asm b/mpn/x86_64/pentium4/mod_34lsub1.asm

new file mode 100644 (file)

index 0000000..2974fcc
--- /dev/null
+++ b/mpn/x86_64/pentium4/mod_34lsub1.asm
@@ -0,0 +1,156 @@
+dnl  AMD64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
+
+dnl  Copyright 2000, 2001, 2002, 2004, 2005, 2007, 2010, 2011, 2012 Free
+dnl  Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C AMD K8,K9     1.0
+C AMD K10       1.12
+C Intel P4      3.25
+C Intel core2   1.5
+C Intel corei   1.5
+C Intel atom    2.5
+C VIA nano      1.75
+
+
+C INPUT PARAMETERS
+define(`ap',   %rdi)
+define(`n',    %rsi)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C  * Review feed-in and wind-down code.  In particular, try to avoid adc and
+C    sbb to placate Pentium4.
+C  * It seems possible to reach 2.67 c/l by using a cleaner 6-way unrolling,
+C    without the dual loop exits.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+       FUNC_ENTRY(2)
+
+       mov     $0x0000FFFFFFFFFFFF, %r11
+
+       sub     $2, %rsi
+       ja      L(gt2)
+
+       mov     (ap), %rax
+       nop
+       jb      L(1)
+
+       mov     8(ap), %rsi
+       mov     %rax, %rdx
+       shr     $48, %rax               C src[0] low
+
+       and     %r11, %rdx              C src[0] high
+       add     %rdx, %rax
+       mov     R32(%rsi), R32(%rdx)
+
+       shr     $32, %rsi               C src[1] high
+       add     %rsi, %rax
+
+       shl     $16, %rdx               C src[1] low
+       add     %rdx, %rax
+
+L(1):  FUNC_EXIT()
+       ret
+
+
+       ALIGN(16)
+L(gt2):        xor     R32(%rax), R32(%rax)
+       xor     R32(%rcx), R32(%rcx)
+       xor     R32(%rdx), R32(%rdx)
+       xor     %r8, %r8
+       xor     %r9, %r9
+       xor     %r10, %r10
+
+L(top):        add     (ap), %rax
+       adc     $0, %r10
+       add     8(ap), %rcx
+       adc     $0, %r8
+       add     16(ap), %rdx
+       adc     $0, %r9
+
+       sub     $3, %rsi
+       jng     L(end)
+
+       add     24(ap), %rax
+       adc     $0, %r10
+       add     32(ap), %rcx
+       adc     $0, %r8
+       add     40(ap), %rdx
+       lea     48(ap), ap
+       adc     $0, %r9
+
+       sub     $3, %rsi
+       jg      L(top)
+
+
+       add     $-24, ap
+L(end):        add     %r9, %rax
+       adc     %r10, %rcx
+       adc     %r8, %rdx
+
+       inc     %rsi
+       mov     $0x1, R32(%r10)
+       js      L(combine)
+
+       mov     $0x10000, R32(%r10)
+       adc     24(ap), %rax
+       dec     %rsi
+       js      L(combine)
+
+       adc     32(ap), %rcx
+       mov     $0x100000000, %r10
+
+L(combine):
+       sbb     %rsi, %rsi              C carry
+       mov     %rax, %rdi              C 0mod3
+       shr     $48, %rax               C 0mod3 high
+
+       and     %r10, %rsi              C carry masked
+       and     %r11, %rdi              C 0mod3 low
+       mov     R32(%rcx), R32(%r10)    C 1mod3
+
+       add     %rsi, %rax              C apply carry
+       shr     $32, %rcx               C 1mod3 high
+
+       add     %rdi, %rax              C apply 0mod3 low
+       movzwl  %dx, R32(%rdi)          C 2mod3
+       shl     $16, %r10               C 1mod3 low
+
+       add     %rcx, %rax              C apply 1mod3 high
+       shr     $16, %rdx               C 2mod3 high
+
+       add     %r10, %rax              C apply 1mod3 low
+       shl     $32, %rdi               C 2mod3 low
+
+       add     %rdx, %rax              C apply 2mod3 high
+       add     %rdi, %rax              C apply 2mod3 low
+
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/pentium4/rsh1aors_n.asm b/mpn/x86_64/pentium4/rsh1aors_n.asm

new file mode 100644 (file)

index 0000000..ca09a73
--- /dev/null
+++ b/mpn/x86_64/pentium4/rsh1aors_n.asm
@@ -0,0 +1,323 @@
+dnl  x86-64 mpn_rsh1add_n/mpn_rsh1sub_n optimized for Pentium 4.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund.
+
+dnl  Copyright 2007, 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C AMD K8,K9     4.13
+C AMD K10       4.13
+C Intel P4      5.70
+C Intel core2   4.75
+C Intel corei   5
+C Intel atom    8.75
+C VIA nano      5.25
+
+C TODO
+C  * Try to make this smaller, 746 bytes seem excessive for this 2nd class
+C    function.  Less sw pipelining would help, and since we now probably
+C    pipeline somewhat too deeply, it might not affect performance too much.
+C  * A separate small-n loop might speed things as well as make things smaller.
+C    That loop should be selected before pushing registers.
+
+C INPUT PARAMETERS
+define(`rp',   `%rdi')
+define(`up',   `%rsi')
+define(`vp',   `%rdx')
+define(`n',    `%rcx')
+define(`cy',   `%r8')
+
+ifdef(`OPERATION_rsh1add_n', `
+       define(ADDSUB,        add)
+       define(func,          mpn_rsh1add_n)
+       define(func_nc,       mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+       define(ADDSUB,        sub)
+       define(func,          mpn_rsh1sub_n)
+       define(func_nc,       mpn_rsh1sub_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+
+ASM_START()
+       TEXT
+PROLOGUE(func)
+       FUNC_ENTRY(4)
+       xor     %r8, %r8
+IFDOS(`        jmp     L(ent)          ')
+EPILOGUE()
+PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
+L(ent):        push    %rbx
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+
+       mov     (vp), %r9
+       mov     (up), %r15
+
+       mov     R32(n), R32(%rax)
+       and     $3, R32(%rax)
+       jne     L(n00)
+
+       mov     R32(%r8), R32(%rbx)     C n = 0, 4, 8, ...
+       mov     8(up), %r10
+       ADDSUB  %r9, %r15
+       mov     8(vp), %r9
+       setc    R8(%rax)
+       ADDSUB  %rbx, %r15              C return bit
+       jnc     1f
+       mov     $1, R8(%rax)
+1:     mov     16(up), %r12
+       ADDSUB  %r9, %r10
+       mov     16(vp), %r9
+       setc    R8(%rbx)
+       mov     %r15, %r13
+       ADDSUB  %rax, %r10
+       jnc     1f
+       mov     $1, R8(%rbx)
+1:     mov     24(up), %r11
+       ADDSUB  %r9, %r12
+       lea     32(up), up
+       mov     24(vp), %r9
+       lea     32(vp), vp
+       setc    R8(%rax)
+       mov     %r10, %r14
+       shl     $63, %r10
+       shr     %r13
+       jmp     L(L00)
+
+L(n00):        cmp     $2, R32(%rax)
+       jnc     L(n01)
+       xor     R32(%rbx), R32(%rbx)    C n = 1, 5, 9, ...
+       lea     -24(rp), rp
+       mov     R32(%r8), R32(%rax)
+       dec     n
+       jnz     L(gt1)
+       ADDSUB  %r9, %r15
+       setc    R8(%rbx)
+       ADDSUB  %rax, %r15
+       jnc     1f
+       mov     $1, R8(%rbx)
+1:     mov     %r15, %r14
+       shl     $63, %rbx
+       shr     %r14
+       jmp     L(cj1)
+L(gt1):        mov     8(up), %r8
+       ADDSUB  %r9, %r15
+       mov     8(vp), %r9
+       setc    R8(%rbx)
+       ADDSUB  %rax, %r15
+       jnc     1f
+       mov     $1, R8(%rbx)
+1:     mov     16(up), %r10
+       ADDSUB  %r9, %r8
+       mov     16(vp), %r9
+       setc    R8(%rax)
+       mov     %r15, %r14
+       ADDSUB  %rbx, %r8
+       jnc     1f
+       mov     $1, R8(%rax)
+1:     mov     24(up), %r12
+       ADDSUB  %r9, %r10
+       mov     24(vp), %r9
+       setc    R8(%rbx)
+       mov     %r8, %r13
+       shl     $63, %r8
+       shr     %r14
+       lea     8(up), up
+       lea     8(vp), vp
+       jmp     L(L01)
+
+L(n01):        jne     L(n10)
+       lea     -16(rp), rp             C n = 2, 6, 10, ...
+       mov     R32(%r8), R32(%rbx)
+       mov     8(up), %r11
+       ADDSUB  %r9, %r15
+       mov     8(vp), %r9
+       setc    R8(%rax)
+       ADDSUB  %rbx, %r15
+       jnc     1f
+       mov     $1, R8(%rax)
+1:     sub     $2, n
+       jnz     L(gt2)
+       ADDSUB  %r9, %r11
+       setc    R8(%rbx)
+       mov     %r15, %r13
+       ADDSUB  %rax, %r11
+       jnc     1f
+       mov     $1, R8(%rbx)
+1:     mov     %r11, %r14
+       shl     $63, %r11
+       shr     %r13
+       jmp     L(cj2)
+L(gt2):        mov     16(up), %r8
+       ADDSUB  %r9, %r11
+       mov     16(vp), %r9
+       setc    R8(%rbx)
+       mov     %r15, %r13
+       ADDSUB  %rax, %r11
+       jnc     1f
+       mov     $1, R8(%rbx)
+1:     mov     24(up), %r10
+       ADDSUB  %r9, %r8
+       mov     24(vp), %r9
+       setc    R8(%rax)
+       mov     %r11, %r14
+       shl     $63, %r11
+       shr     %r13
+       lea     16(up), up
+       lea     16(vp), vp
+       jmp     L(L10)
+
+L(n10):        xor     R32(%rbx), R32(%rbx)    C n = 3, 7, 11, ...
+       lea     -8(rp), rp
+       mov     R32(%r8), R32(%rax)
+       mov     8(up), %r12
+       ADDSUB  %r9, %r15
+       mov     8(vp), %r9
+       setc    R8(%rbx)
+       ADDSUB  %rax, %r15
+       jnc     1f
+       mov     $1, R8(%rbx)
+1:     mov     16(up), %r11
+       ADDSUB  %r9, %r12
+       mov     16(vp), %r9
+       setc    R8(%rax)
+       mov     %r15, %r14
+       ADDSUB  %rbx, %r12
+       jnc     1f
+       mov     $1, R8(%rax)
+1:     sub     $3, n
+       jnz     L(gt3)
+       ADDSUB  %r9, %r11
+       setc    R8(%rbx)
+       mov     %r12, %r13
+       shl     $63, %r12
+       shr     %r14
+       jmp     L(cj3)
+L(gt3):        mov     24(up), %r8
+       ADDSUB  %r9, %r11
+       mov     24(vp), %r9
+       setc    R8(%rbx)
+       mov     %r12, %r13
+       shl     $63, %r12
+       shr     %r14
+       lea     24(up), up
+       lea     24(vp), vp
+       jmp     L(L11)
+
+L(c0): mov     $1, R8(%rbx)
+       jmp     L(rc0)
+L(c1): mov     $1, R8(%rax)
+       jmp     L(rc1)
+L(c2): mov     $1, R8(%rbx)
+       jmp     L(rc2)
+
+       ALIGN(16)
+L(top):        mov     (up), %r8       C not on critical path
+       or      %r13, %r10
+       ADDSUB  %r9, %r11       C not on critical path
+       mov     (vp), %r9       C not on critical path
+       setc    R8(%rbx)        C save carry out
+       mov     %r12, %r13      C new for later
+       shl     $63, %r12       C shift new right
+       shr     %r14            C shift old left
+       mov     %r10, (rp)
+L(L11):        ADDSUB  %rax, %r11      C apply previous carry out
+       jc      L(c0)           C jump if ripple
+L(rc0):        mov     8(up), %r10
+       or      %r14, %r12
+       ADDSUB  %r9, %r8
+       mov     8(vp), %r9
+       setc    R8(%rax)
+       mov     %r11, %r14
+       shl     $63, %r11
+       shr     %r13
+       mov     %r12, 8(rp)
+L(L10):        ADDSUB  %rbx, %r8
+       jc      L(c1)
+L(rc1):        mov     16(up), %r12
+       or      %r13, %r11
+       ADDSUB  %r9, %r10
+       mov     16(vp), %r9
+       setc    R8(%rbx)
+       mov     %r8, %r13
+       shl     $63, %r8
+       shr     %r14
+       mov     %r11, 16(rp)
+L(L01):        ADDSUB  %rax, %r10
+       jc      L(c2)
+L(rc2):        mov     24(up), %r11
+       or      %r14, %r8
+       ADDSUB  %r9, %r12
+       lea     32(up), up
+       mov     24(vp), %r9
+       lea     32(vp), vp
+       setc    R8(%rax)
+       mov     %r10, %r14
+       shl     $63, %r10
+       shr     %r13
+       mov     %r8, 24(rp)
+       lea     32(rp), rp
+L(L00):        ADDSUB  %rbx, %r12
+       jc      L(c3)
+L(rc3):        sub     $4, n
+       ja      L(top)
+
+L(end):        or      %r13, %r10
+       ADDSUB  %r9, %r11
+       setc    R8(%rbx)
+       mov     %r12, %r13
+       shl     $63, %r12
+       shr     %r14
+       mov     %r10, (rp)
+L(cj3):        ADDSUB  %rax, %r11
+       jnc     1f
+       mov     $1, R8(%rbx)
+1:     or      %r14, %r12
+       mov     %r11, %r14
+       shl     $63, %r11
+       shr     %r13
+       mov     %r12, 8(rp)
+L(cj2):        or      %r13, %r11
+       shl     $63, %rbx
+       shr     %r14
+       mov     %r11, 16(rp)
+L(cj1):        or      %r14, %rbx
+       mov     %rbx, 24(rp)
+
+       mov     R32(%r15), R32(%rax)
+       and     $1, R32(%rax)
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+L(c3): mov     $1, R8(%rax)
+       jmp     L(rc3)
+EPILOGUE()
diff --git a/mpn/x86_64/pentium4/rshift.asm b/mpn/x86_64/pentium4/rshift.asm

index 61899c5ecfefc950ddcb2154d9eb49c81a020cb5..c491b4f77b53d64ccbdbfd1c836b2fbe07a3912a 100644 (file)
--- a/mpn/x86_64/pentium4/rshift.asm
+++ b/mpn/x86_64/pentium4/rshift.asm
@@ -1,19 +1,19 @@
  dnl  x86-64 mpn_rshift optimized for Pentium 4.
  
-dnl  Copyright 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
-dnl
+dnl  Copyright 2003, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
+
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
@@ -21,11 +21,13 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        2.5
-C K10:          ?
-C P4:           3.29
-C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
-C P6-28 (Atom):        14.3
+C AMD K8,K9     2.5
+C AMD K10       ?
+C Intel P4      3.29
+C Intel core2   2.1 (fluctuates, presumably cache related)
+C Intel corei   ?
+C Intel atom   14.3
+C VIA nano      ?
  
  C INPUT PARAMETERS
  define(`rp',`%rdi')
@@ -33,27 +35,31 @@ define(`up',`%rsi')
  define(`n',`%rdx')
  define(`cnt',`%cl')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(mpn_rshift)
+       FUNC_ENTRY(4)
         mov     (up), %rax
-       movd    %ecx, %mm4
-       neg     %ecx                    C put lsh count in cl
-       and     $63, %ecx
-       movd    %ecx, %mm5
+       movd    R32(%rcx), %mm4
+       neg     R32(%rcx)                       C put lsh count in cl
+       and     $63, R32(%rcx)
+       movd    R32(%rcx), %mm5
  
         lea     -8(up,n,8), up
         lea     -8(rp,n,8), rp
-       lea     1(n), %r8d
+       lea     1(n), R32(%r8)
         neg     n
  
-       shl     %cl, %rax               C function return value
+       shl     R8(%rcx), %rax          C function return value
  
-       and     $3, %r8d
+       and     $3, R32(%r8)
         je      L(rol)                  C jump for n = 3, 7, 11, ...
  
-       dec     %r8d
+       dec     R32(%r8)
         jne     L(1)
  C      n = 4, 8, 12, ...
         movq    8(up,n,8), %mm2
@@ -65,7 +71,7 @@ C     n = 4, 8, 12, ...
         inc     n
         jmp     L(rol)
  
-L(1):  dec     %r8d
+L(1):  dec     R32(%r8)
         je      L(1x)                   C jump for n = 1, 5, 9, 13, ...
  C      n = 2, 6, 10, 16, ...
         movq    8(up,n,8), %mm2
@@ -134,18 +140,19 @@ L(top):
  
         jae     L(top)                  C                                     2
  L(end):
-       movq    -16(up,n,8), %mm0
+       movq    -8(up), %mm0
         psllq   %mm5, %mm0
         por     %mm0, %mm2
-       movq    -8(up,n,8), %mm1
+       movq    (up), %mm1
         psllq   %mm5, %mm1
         por     %mm1, %mm3
-       movq    %mm2, -24(rp,n,8)
-       movq    %mm3, -16(rp,n,8)
+       movq    %mm2, -16(rp)
+       movq    %mm3, -8(rp)
  
  L(ast):        movq    (up), %mm2
         psrlq   %mm4, %mm2
         movq    %mm2, (rp)
         emms
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/popham.asm b/mpn/x86_64/popham.asm

index e2bdb1a0b882b28b68b2535789ffd5d705b14c2f..a1f53da120ba9fa8dee1bc27fae73d47354b4039 100644 (file)
--- a/mpn/x86_64/popham.asm
+++ b/mpn/x86_64/popham.asm
@@ -1,6 +1,6 @@
  dnl  AMD64 mpn_popcount, mpn_hamdist -- population count and hamming distance.
  
-dnl  Copyright 2004, 2005, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2007, 2010, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -23,10 +23,13 @@ include(`../config.m4')
  
  C                   popcount         hamdist
  C                  cycles/limb     cycles/limb
-C K8,K9:                6               7
-C K10:                  6               7
-C P4:                  12              14.3
-C P6-15:                7               8
+C AMD K8,K9             6               7
+C AMD K10               6               7
+C Intel P4             12              14.3
+C Intel core2           7               8
+C Intel corei           ?               7.3
+C Intel atom           16.5            17.5
+C VIA nano              8.75           10.4
  
  C TODO
  C  * Tune.  It should be possible to reach 5 c/l for popcount and 6 c/l for
@@ -41,6 +44,7 @@ ifdef(`OPERATION_popcount',`
    define(`h33333333',  `%r11')
    define(`h0f0f0f0f',  `%rcx')
    define(`h01010101',  `%rdx')
+  define(`POP',                `$1')
    define(`HAM',                `dnl')
  ')
  ifdef(`OPERATION_hamdist',`
@@ -52,106 +56,111 @@ ifdef(`OPERATION_hamdist',`
    define(`h33333333',  `%r11')
    define(`h0f0f0f0f',  `%rcx')
    define(`h01010101',  `%r14')
+  define(`POP',                `dnl')
    define(`HAM',                `$1')
  ')
  
  
  MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(func)
+ POP(` FUNC_ENTRY(2)           ')
+ HAM(` FUNC_ENTRY(3)           ')
+       push    %r12
+       push    %r13
+ HAM(` push    %r14            ')
  
-       pushq   %r12
-       pushq   %r13
- HAM(` pushq   %r14            ')
-
-       movq    $0x5555555555555555, h55555555
-       movq    $0x3333333333333333, h33333333
-       movq    $0x0f0f0f0f0f0f0f0f, h0f0f0f0f
-       movq    $0x0101010101010101, h01010101
+       mov     $0x5555555555555555, h55555555
+       mov     $0x3333333333333333, h33333333
+       mov     $0x0f0f0f0f0f0f0f0f, h0f0f0f0f
+       mov     $0x0101010101010101, h01010101
  
-       leaq    (up,n,8), up
- HAM(` leaq    (vp,n,8), vp    ')
-       negq    n
+       lea     (up,n,8), up
+ HAM(` lea     (vp,n,8), vp    ')
+       neg     n
  
-       xorl    %eax, %eax
+       xor     R32(%rax), R32(%rax)
  
-       btq     $0, n
-       jnc     L(oop)
+       bt      $0, R32(n)
+       jnc     L(top)
  
-       movq    (up,n,8), %r8
- HAM(` xorq    (vp,n,8), %r8   ')
+       mov     (up,n,8), %r8
+ HAM(` xor     (vp,n,8), %r8   ')
  
-       movq    %r8, %r9
-       shrq    %r8
-       andq    h55555555, %r8
-       subq    %r8, %r9
+       mov     %r8, %r9
+       shr     %r8
+       and     h55555555, %r8
+       sub     %r8, %r9
  
-       movq    %r9, %r8
-       shrq    $2, %r9
-       andq    h33333333, %r8
-       andq    h33333333, %r9
-       addq    %r8, %r9                C 16 4-bit fields (0..4)
+       mov     %r9, %r8
+       shr     $2, %r9
+       and     h33333333, %r8
+       and     h33333333, %r9
+       add     %r8, %r9                C 16 4-bit fields (0..4)
  
-       movq    %r9, %r8
-       shrq    $4, %r9
-       andq    h0f0f0f0f, %r8
-       andq    h0f0f0f0f, %r9
-       addq    %r8, %r9                C 8 8-bit fields (0..16)
+       mov     %r9, %r8
+       shr     $4, %r9
+       and     h0f0f0f0f, %r8
+       and     h0f0f0f0f, %r9
+       add     %r8, %r9                C 8 8-bit fields (0..16)
  
-       imulq   h01010101, %r9          C sum the 8 fields in high 8 bits
-       shrq    $56, %r9
+       imul    h01010101, %r9          C sum the 8 fields in high 8 bits
+       shr     $56, %r9
  
-       addq    %r9, %rax               C add to total
-       addq    $1, n
-       jz      L(done)
+       mov     %r9, %rax               C add to total
+       add     $1, n
+       jz      L(end)
  
         ALIGN(16)
-L(oop):        movq    (up,n,8), %r8
-       movq    8(up,n,8), %r12
- HAM(` xorq    (vp,n,8), %r8   ')
- HAM(` xorq    8(vp,n,8), %r12 ')
-
-       movq    %r8, %r9
-       movq    %r12, %r13
-       shrq    %r8
-       shrq    %r12
-       andq    h55555555, %r8
-       andq    h55555555, %r12
-       subq    %r8, %r9
-       subq    %r12, %r13
-
-       movq    %r9, %r8
-       movq    %r13, %r12
-       shrq    $2, %r9
-       shrq    $2, %r13
-       andq    h33333333, %r8
-       andq    h33333333, %r9
-       andq    h33333333, %r12
-       andq    h33333333, %r13
-       addq    %r8, %r9                C 16 4-bit fields (0..4)
-       addq    %r12, %r13              C 16 4-bit fields (0..4)
-
-       addq    %r13, %r9               C 16 4-bit fields (0..8)
-       movq    %r9, %r8
-       shrq    $4, %r9
-       andq    h0f0f0f0f, %r8
-       andq    h0f0f0f0f, %r9
-       addq    %r8, %r9                C 8 8-bit fields (0..16)
-
-       imulq   h01010101, %r9          C sum the 8 fields in high 8 bits
-       shrq    $56, %r9
-
-       addq    %r9, %rax               C add to total
-       addq    $2, n
-       jnc     L(oop)
-
-L(done):
- HAM(` popq    %r14            ')
-       popq    %r13
-       popq    %r12
+L(top):        mov     (up,n,8), %r8
+       mov     8(up,n,8), %r12
+ HAM(` xor     (vp,n,8), %r8   ')
+ HAM(` xor     8(vp,n,8), %r12 ')
+
+       mov     %r8, %r9
+       mov     %r12, %r13
+       shr     %r8
+       shr     %r12
+       and     h55555555, %r8
+       and     h55555555, %r12
+       sub     %r8, %r9
+       sub     %r12, %r13
+
+       mov     %r9, %r8
+       mov     %r13, %r12
+       shr     $2, %r9
+       shr     $2, %r13
+       and     h33333333, %r8
+       and     h33333333, %r9
+       and     h33333333, %r12
+       and     h33333333, %r13
+       add     %r8, %r9                C 16 4-bit fields (0..4)
+       add     %r12, %r13              C 16 4-bit fields (0..4)
+
+       add     %r13, %r9               C 16 4-bit fields (0..8)
+       mov     %r9, %r8
+       shr     $4, %r9
+       and     h0f0f0f0f, %r8
+       and     h0f0f0f0f, %r9
+       add     %r8, %r9                C 8 8-bit fields (0..16)
+
+       imul    h01010101, %r9          C sum the 8 fields in high 8 bits
+       shr     $56, %r9
+
+       add     %r9, %rax               C add to total
+       add     $2, n
+       jnc     L(top)
+
+L(end):
+ HAM(` pop     %r14            ')
+       pop     %r13
+       pop     %r12
+       FUNC_EXIT()
         ret
-
  EPILOGUE()
diff --git a/mpn/x86_64/redc_1.asm b/mpn/x86_64/redc_1.asm

index ceaadea01e4f3d3a46bede56a01e63dc39ba7b7d..b6404d42bb4f011b3b8fb6259c15efef11771526 100644 (file)
--- a/mpn/x86_64/redc_1.asm
+++ b/mpn/x86_64/redc_1.asm
@@ -1,6 +1,6 @@
  dnl  AMD64 mpn_redc_1 -- Montgomery reduction with a one-limb modular inverse.
  
-dnl  Copyright 2004, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2008, 2011, 2012 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -22,48 +22,51 @@ include(`../config.m4')
  
  C           cycles/limb
  C           cycles/limb
-C K8,K9:        2.5
-C K10:          2.5
-C P4:           ?
-C P6-15 (Core2): 5.3
-C P6-28 (Atom):         ?
+C AMD K8,K9     2.5
+C AMD K10       2.5
+C Intel P4      ?
+C Intel core2   5.3
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      ?
  
  C TODO
  C  * Handle certain sizes, e.g., 1, 2, 3, 4, 8, with single-loop code.
  C    The code for 1, 2, 3, 4 should perhaps be completely register based.
  C  * Perhaps align outer loops.
-C  * The sub_n at the end leaks side-channel data.  How do we fix that?
-C  * Write mpn_add_n_sub_n computing R = A + B - C.  It should run at 2 c/l.
  C  * We could software pipeline the IMUL stuff, by putting it before the
  C    outer loops and before the end of the outer loops.  The last outer
  C    loop iteration would then compute an unneeded product, but it is at
  C    least not a stray read from up[], since it is at up[n].
-C  * Can we combine both the add_n and sub_n into the loops, somehow?
+C  * Make a tail call to mpn_add_n.
  
  C INPUT PARAMETERS
-define(`rp',     `%rdi')
-define(`up',     `%rsi')
-define(`param_mp',`%rdx')
-define(`n',      `%rcx')
-define(`invm',   `%r8')
+define(`rp',     `%rdi')       C rcx
+define(`up',     `%rsi')       C rdx
+define(`mp_param',`%rdx')      C r8
+define(`n',      `%rcx')       C r9
+define(`invm',   `%r8')        C stack
  
  define(`mp',     `%r13')
  define(`i',      `%r11')
  define(`nneg',   `%r12')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(mpn_redc_1)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
         push    %rbp
         push    %rbx
         push    %r12
         push    %r13
         push    %r14
-       push    n
-       sub     $8, %rsp                C maintain ABI required rsp alignment
  
-       lea     (param_mp,n,8), mp      C mp += n
+       lea     (mp_param,n,8), mp      C mp += n
         lea     (up,n,8), up            C up += n
  
         mov     n, nneg
@@ -83,7 +86,7 @@ L(o1):        mov     nneg, i
         imul    invm, %rbp
  
         mov     (mp,i,8), %rax
-       xor     %ebx, %ebx
+       xor     R32(%rbx), R32(%rbx)
         mul     %rbp
         add     $1, i
         jnz     1f
@@ -102,7 +105,7 @@ L(lo1):     add     %r10, (up,i,8)
         adc     %rax, %r9
         mov     (mp,i,8), %rax
         adc     %rdx, %r14
-L(mi1):        xor     %r10d, %r10d
+L(mi1):        xor     R32(%r10), R32(%r10)
         mul     %rbp
         add     %r9, 8(up,i,8)
         adc     %rax, %r14
@@ -114,28 +117,27 @@ L(mi1):   xor     %r10d, %r10d
         adc     %rdx, %r10
         mov     16(mp,i,8), %rax
         mul     %rbp
-       xor     %r9d, %r9d
-       xor     %r14d, %r14d
+       xor     R32(%r9), R32(%r9)
+       xor     R32(%r14), R32(%r14)
         add     %rbx, 24(up,i,8)
         adc     %rax, %r10
         mov     24(mp,i,8), %rax
         adc     %rdx, %r9
-       xor     %ebx, %ebx
+       xor     R32(%rbx), R32(%rbx)
         mul     %rbp
         add     $4, i
         js      L(lo1)
  L(ed1):        add     %r10, (up)
         adc     %rax, %r9
         adc     %rdx, %r14
-       xor     %r10d, %r10d
+       xor     R32(%r10), R32(%r10)
         add     %r9, 8(up)
         adc     $0, %r14
  L(n1): mov     %r14, 16(up,nneg,8)     C up[0]
         add     $8, up
         dec     n
         jnz     L(o1)
-C      lea     (mp), mp
-       lea     16(up), up
+       lea     16(up,nneg,8), up
         jmp     L(common)
  
  L(b0): C lea   (mp), mp
@@ -145,7 +147,7 @@ L(o0):      mov     nneg, i
         imul    invm, %rbp
  
         mov     (mp,i,8), %rax
-       xor     %r10d, %r10d
+       xor     R32(%r10), R32(%r10)
         mul     %rbp
         mov     %rax, %r14
         mov     %rdx, %rbx
@@ -156,7 +158,7 @@ L(lo0):     add     %r10, (up,i,8)
         adc     %rax, %r9
         mov     (mp,i,8), %rax
         adc     %rdx, %r14
-       xor     %r10d, %r10d
+       xor     R32(%r10), R32(%r10)
         mul     %rbp
         add     %r9, 8(up,i,8)
         adc     %rax, %r14
@@ -168,31 +170,29 @@ L(mi0):   mov     8(mp,i,8), %rax
         adc     %rdx, %r10
         mov     16(mp,i,8), %rax
         mul     %rbp
-       xor     %r9d, %r9d
-       xor     %r14d, %r14d
+       xor     R32(%r9), R32(%r9)
+       xor     R32(%r14), R32(%r14)
         add     %rbx, 24(up,i,8)
         adc     %rax, %r10
         mov     24(mp,i,8), %rax
         adc     %rdx, %r9
-       xor     %ebx, %ebx
+       xor     R32(%rbx), R32(%rbx)
         mul     %rbp
         add     $4, i
         js      L(lo0)
  L(ed0):        add     %r10, (up)
         adc     %rax, %r9
         adc     %rdx, %r14
-       xor     %r10d, %r10d
+       xor     R32(%r10), R32(%r10)
         add     %r9, 8(up)
         adc     $0, %r14
         mov     %r14, 16(up,nneg,8)     C up[0]
         add     $8, up
         dec     n
         jnz     L(o0)
-C      lea     (mp), mp
-       lea     16(up), up
+       lea     16(up,nneg,8), up
         jmp     L(common)
  
-
  L(b3): lea     -8(mp), mp
         lea     -24(up), up
  L(o3): mov     nneg, i
@@ -210,7 +210,7 @@ L(lo3):     add     %r10, (up,i,8)
         adc     %rax, %r9
         mov     (mp,i,8), %rax
         adc     %rdx, %r14
-       xor     %r10d, %r10d
+       xor     R32(%r10), R32(%r10)
         mul     %rbp
         add     %r9, 8(up,i,8)
         adc     %rax, %r14
@@ -222,28 +222,27 @@ L(lo3):   add     %r10, (up,i,8)
         adc     %rdx, %r10
  L(mi3):        mov     16(mp,i,8), %rax
         mul     %rbp
-       xor     %r9d, %r9d
-       xor     %r14d, %r14d
+       xor     R32(%r9), R32(%r9)
+       xor     R32(%r14), R32(%r14)
         add     %rbx, 24(up,i,8)
         adc     %rax, %r10
         mov     24(mp,i,8), %rax
         adc     %rdx, %r9
-       xor     %ebx, %ebx
+       xor     R32(%rbx), R32(%rbx)
         mul     %rbp
         add     $4, i
         js      L(lo3)
  L(ed3):        add     %r10, 8(up)
         adc     %rax, %r9
         adc     %rdx, %r14
-       xor     %r10d, %r10d
+       xor     R32(%r10), R32(%r10)
         add     %r9, 16(up)
         adc     $0, %r14
         mov     %r14, 24(up,nneg,8)     C up[0]
         add     $8, up
         dec     n
         jnz     L(o3)
-       lea     8(mp), mp
-       lea     24(up), up
+       lea     24(up,nneg,8), up
         jmp     L(common)
  
  L(b2): lea     -16(mp), mp
@@ -254,7 +253,7 @@ L(o2):      mov     nneg, i
  
         mov     16(mp,i,8), %rax
         mul     %rbp
-       xor     %r14d, %r14d
+       xor     R32(%r14), R32(%r14)
         mov     %rax, %r10
         mov     24(mp,i,8), %rax
         mov     %rdx, %r9
@@ -265,7 +264,7 @@ L(lo2):     add     %r10, (up,i,8)
         adc     %rax, %r9
         mov     (mp,i,8), %rax
         adc     %rdx, %r14
-       xor     %r10d, %r10d
+       xor     R32(%r10), R32(%r10)
         mul     %rbp
         add     %r9, 8(up,i,8)
         adc     %rax, %r14
@@ -277,59 +276,52 @@ L(lo2):   add     %r10, (up,i,8)
         adc     %rdx, %r10
         mov     16(mp,i,8), %rax
         mul     %rbp
-       xor     %r9d, %r9d
-       xor     %r14d, %r14d
+       xor     R32(%r9), R32(%r9)
+       xor     R32(%r14), R32(%r14)
         add     %rbx, 24(up,i,8)
         adc     %rax, %r10
         mov     24(mp,i,8), %rax
         adc     %rdx, %r9
-L(mi2):        xor     %ebx, %ebx
+L(mi2):        xor     R32(%rbx), R32(%rbx)
         mul     %rbp
         add     $4, i
         js      L(lo2)
  L(ed2):        add     %r10, 16(up)
         adc     %rax, %r9
         adc     %rdx, %r14
-       xor     %r10d, %r10d
+       xor     R32(%r10), R32(%r10)
         add     %r9, 24(up)
         adc     $0, %r14
         mov     %r14, 32(up,nneg,8)     C up[0]
         add     $8, up
         dec     n
         jnz     L(o2)
-       lea     16(mp), mp
-       lea     32(up), up
+       lea     32(up,nneg,8), up
  
  
  L(common):
-       lea     (mp,nneg,8), mp         C restore entry mp
  
  C   cy = mpn_add_n (rp, up, up - n, n);
-C                  rdi rsi  rdx    rcx
-       lea     (up,nneg,8), up         C up -= n
-       lea     (up,nneg,8), %rdx       C rdx = up - n [up entry value]
-       mov     rp, nneg                C preserve rp over first call
-       mov     8(%rsp), %rcx           C pass entry n
-C      mov     rp, %rdi
+C                  rdi rsi  rdx    rcx         STD
+C                  rcx rdx  r8     r9          DOS
+
+IFSTD(` lea    (up,nneg,8), %rdx       ') C rdx = up - n [up entry value]
+IFSTD(` mov    nneg, %rcx              ')
+IFSTD(` neg    %rcx                    ') C rcx = -nneg = n
+
+IFDOS(` lea    (up,nneg,8), %r8        ') C r8 = up - n
+IFDOS(` mov    up, %rdx                ') C rdx = up
+IFDOS(` mov    nneg, %r9               ')
+IFDOS(` neg    %r9                     ') C r9 = -nneg = n
+IFDOS(` mov    rp, %rcx                ') C rcx = rp
+
         CALL(   mpn_add_n)
-       test    R32(%rax), R32(%rax)
-       jz      L(ret)
-
-C     mpn_sub_n (rp, rp, mp, n);
-C               rdi rsi rdx rcx
-       mov     nneg, %rdi
-       mov     nneg, %rsi
-       mov     mp, %rdx
-       mov     8(%rsp), %rcx           C pass entry n
-       CALL(   mpn_sub_n)
-
-L(ret):
-       add     $8, %rsp
-       pop     n                       C just increment rsp
+
         pop     %r14
         pop     %r13
         pop     %r12
         pop     %rbx
         pop     %rbp
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/rsh1aors_n.asm b/mpn/x86_64/rsh1aors_n.asm

index 41e67e37135ff9deba5b86ba3545b0fda2a37483..4deb939dcac1b2805d56c5350e11e91d9ab7b990 100644 (file)
--- a/mpn/x86_64/rsh1aors_n.asm
+++ b/mpn/x86_64/rsh1aors_n.asm
@@ -1,6 +1,7 @@
  dnl  AMD64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
+dnl  AMD64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
  
-dnl  Copyright 2003, 2005, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -19,23 +20,24 @@ dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  include(`../config.m4')
  
-
  C           cycles/limb
-C K8,K9:        2.14   (mpn_add_n + mpn_rshift need 4.125)
-C K10:          2.14   (mpn_add_n + mpn_rshift need 4.125)
-C P4:          12.75
-C P6-15:        3.75
+C AMD K8,K9     2.14   (mpn_add_n + mpn_rshift need 4.125)
+C AMD K10       2.14   (mpn_add_n + mpn_rshift need 4.125)
+C Intel P4     12.75
+C Intel core2   3.75
+C Intel NMH     4.4
+C Intel SBR     ?
+C Intel atom    ?
+C VIA nano      3.25
  
  C TODO
  C  * Rewrite to use indexed addressing, like addlsh1.asm and sublsh1.asm.
-C  * Try to approach the cache bandwidth 1.5 c/l.  It should be possible.
  
  C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`vp',`%rdx')
-define(`n',`%rcx')
-define(`n32',`%ecx')
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n',`  %rcx')
  
  ifdef(`OPERATION_rsh1add_n', `
         define(ADDSUB,        add)
@@ -50,14 +52,18 @@ ifdef(`OPERATION_rsh1sub_n', `
  
  MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
-
         ALIGN(16)
  PROLOGUE(func_nc)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8   ')
         push    %rbx
  
-       xor     %eax, %eax
+       xor     R32(%rax), R32(%rax)
         neg     %r8                     C set C flag from parameter
         mov     (up), %rbx
         ADCSBB  (vp), %rbx
@@ -66,16 +72,17 @@ EPILOGUE()
  
         ALIGN(16)
  PROLOGUE(func_n)
+       FUNC_ENTRY(4)
         push    %rbx
  
-       xor     %eax, %eax
+       xor     R32(%rax), R32(%rax)
         mov     (up), %rbx
         ADDSUB  (vp), %rbx
  L(ent):
         rcr     %rbx                    C rotate, save acy
-       adc     %eax, %eax              C return value
+       adc     R32(%rax), R32(%rax)    C return value
  
-       mov     n32, R32(%r11)
+       mov     R32(n), R32(%r11)
         and     $3, R32(%r11)
  
         cmp     $1, R32(%r11)
@@ -166,5 +173,6 @@ L(top):     add     %rbx, %rbx              C rotate carry limb, restore acy
  
  L(end):        mov     %rbx, (rp)
         pop     %rbx
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/rshift.asm b/mpn/x86_64/rshift.asm

index 3b1586828e1510c0999736d848705334071bc83b..8879f8012c657a0df0656585cd2c6bbc029c1f69 100644 (file)
--- a/mpn/x86_64/rshift.asm
+++ b/mpn/x86_64/rshift.asm
@@ -1,19 +1,19 @@
  dnl  AMD64 mpn_rshift -- mpn right shift.
  
-dnl  Copyright 2003, 2005, 2009 Free Software Foundation, Inc.
-dnl
+dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
  dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
-dnl  published by the Free Software Foundation; either version 3 of the
-dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
  dnl  You should have received a copy of the GNU Lesser General Public License
  dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
@@ -21,11 +21,13 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        2.375
-C K10:          2.375
-C P4:           8
-C P6-15 (Core2): 2.11
-C P6-28 (Atom):         5.75
+C AMD K8,K9     2.375
+C AMD K10       2.375
+C Intel P4      8
+C Intel core2   2.11
+C Intel corei   ?
+C Intel atom    5.75
+C VIA nano      3.5
  
  
  C INPUT PARAMETERS
@@ -34,10 +36,14 @@ define(`up',        `%rsi')
  define(`n',    `%rdx')
  define(`cnt',  `%rcx')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(32)
  PROLOGUE(mpn_rshift)
+       FUNC_ENTRY(4)
         neg     R32(%rcx)               C put rsh count in cl
         mov     (up), %rax
         shl     R8(%rcx), %rax          C function return value
@@ -154,5 +160,6 @@ L(end):
  L(ast):        mov     (up), %r10
         shr     R8(%rcx), %r10
         mov     %r10, (rp)
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/sqr_basecase.asm b/mpn/x86_64/sqr_basecase.asm

index cfab923f8c396f7e29540fccb01eb9f806b83df7..9c3dd76679f99533eda9545b15e5079a86b95bcf 100644 (file)
--- a/mpn/x86_64/sqr_basecase.asm
+++ b/mpn/x86_64/sqr_basecase.asm
@@ -2,7 +2,7 @@ dnl  AMD64 mpn_sqr_basecase.
  
  dnl  Contributed to the GNU project by Torbjorn Granlund.
  
-dnl  Copyright 2008, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -25,14 +25,21 @@ C The inner loops of this code are the result of running a code generation and
  C optimization tool suite written by David Harvey and Torbjorn Granlund.
  
  C NOTES
-C   * This code only handles operands up to SQR_TOOM2_THRESHOLD_MAX.  That
-C     means we can safely use 32-bit operations for all sizes, unlike in e.g.,
-C     mpn_addmul_1.
+C   * There is a major stupidity in that we call mpn_mul_1 initially, for a
+C     large trip count.  Instead, we should follow the generic/sqr_basecase.c
+C     code which uses addmul_2s from the start, conditionally leaving a 1x1
+C     multiply to the end.  (In assembly code, one would stop invoking
+C     addmul_2s loops when perhaps 3x2s respectively a 2x2s remains.)
+C   * Another stupidity is in the sqr_diag_addlsh1 code.  It does not need to
+C     save/restore carry, instead it can propagate into the high product word.
+C   * Align more labels, should shave off a few cycles.
+C   * We can safely use 32-bit size operations, since operands with (2^32)
+C     limbs will lead to non-termination in practice.
  C   * The jump table could probably be optimized, at least for non-pic.
-C   * The special code for n=1,2,3 was quickly written.  It is probably too
+C   * The special code for n <= 4 was quickly written.  It is probably too
  C     large and unnecessarily slow.
-C   * Consider combining small cases code so that the n=k-1 code jumps into
-C     the middle of the n=k code.
+C   * Consider combining small cases code so that the n=k-1 code jumps into the
+C     middle of the n=k code.
  C   * Avoid saving registers for small cases code.
  C   * Needed variables:
  C    n   r11  input size
@@ -57,12 +64,6 @@ define(`rp',   `%rdi')
  define(`up',     `%rsi')
  define(`n_param', `%rdx')
  
-C We should really trim this, for better spatial locality.  Alternatively,
-C we could grab the upper part of the stack area, leaving the lower part
-C instead of the upper part unused.
-deflit(SQR_TOOM2_THRESHOLD_MAX, 80)
-define(`STACK_ALLOC', eval(8*2*SQR_TOOM2_THRESHOLD_MAX))
-
  define(`n',    `%r11')
  define(`tp',   `%r12')
  define(`i',    `%r8')
@@ -74,122 +75,137 @@ define(`w1',      `%rcx')
  define(`w2',   `%rbp')
  define(`w3',   `%r10')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
  
  ASM_START()
         TEXT
         ALIGN(16)
-
  PROLOGUE(mpn_sqr_basecase)
-       add     $-48, %rsp
-       mov     %rbx, 40(%rsp)
-       mov     %rbp, 32(%rsp)
-       mov     %r12, 24(%rsp)
-       mov     %r13, 16(%rsp)
-       mov     %r14, 8(%rsp)
-
-       mov     R32(n_param), R32(n)            C free original n register (rdx)
+       FUNC_ENTRY(3)
         mov     R32(n_param), R32(%rcx)
+       mov     R32(n_param), R32(n)            C free original n register (rdx)
+
+       add     $-40, %rsp
+
         and     $3, R32(%rcx)
-       lea     4(%rcx), %rbx
         cmp     $4, R32(n_param)
-       cmovg   %rbx, %rcx
-       lea     L(jmptab)(%rip), %rax
+       lea     4(%rcx), %r8
+
+       mov     %rbx, 32(%rsp)
+       mov     %rbp, 24(%rsp)
+       mov     %r12, 16(%rsp)
+       mov     %r13, 8(%rsp)
+       mov     %r14, (%rsp)
+
+       cmovg   %r8, %rcx
+
+       lea     L(tab)(%rip), %rax
+ifdef(`PIC',
+`      movslq  (%rax,%rcx,4), %r10
+       add     %r10, %rax
+       jmp     *%rax
+',`
         jmp     *(%rax,%rcx,8)
+')
         JUMPTABSECT
         ALIGN(8)
-L(jmptab):
-       .quad   L(4)
-       .quad   L(1)
-       .quad   L(2)
-       .quad   L(3)
-       .quad   L(0m4)
-       .quad   L(1m4)
-       .quad   L(2m4)
-       .quad   L(3m4)
+L(tab):        JMPENT( L(4), L(tab))
+       JMPENT( L(1), L(tab))
+       JMPENT( L(2), L(tab))
+       JMPENT( L(3), L(tab))
+       JMPENT( L(0m4), L(tab))
+       JMPENT( L(1m4), L(tab))
+       JMPENT( L(2m4), L(tab))
+       JMPENT( L(3m4), L(tab))
         TEXT
  
  L(1):  mov     (up), %rax
         mul     %rax
+       add     $40, %rsp
         mov     %rax, (rp)
         mov     %rdx, 8(rp)
-       add     $40, %rsp
-       pop     %rbx
+       FUNC_EXIT()
         ret
  
  L(2):  mov     (up), %rax
+       mov     %rax, %r8
         mul     %rax
+       mov     8(up), %r11
         mov     %rax, (rp)
+       mov     %r11, %rax
         mov     %rdx, %r9
-       mov     8(up), %rax
         mul     %rax
+       add     $40, %rsp
         mov     %rax, %r10
+       mov     %r11, %rax
         mov     %rdx, %r11
-       mov     8(up), %rax
-       mov     (up), %rbx
-       mul     %rbx
+       mul     %r8
+       xor     %r8, %r8
         add     %rax, %r9
         adc     %rdx, %r10
-       adc     $0, %r11
+       adc     %r8, %r11
         add     %rax, %r9
         mov     %r9, 8(rp)
         adc     %rdx, %r10
         mov     %r10, 16(rp)
-       adc     $0, %r11
+       adc     %r8, %r11
         mov     %r11, 24(rp)
-       add     $40, %rsp
-       pop     %rbx
+       FUNC_EXIT()
         ret
  
  L(3):  mov     (up), %rax
+       mov     %rax, %r10
         mul     %rax
+       mov     8(up), %r11
         mov     %rax, (rp)
+       mov     %r11, %rax
         mov     %rdx, 8(rp)
-       mov     8(up), %rax
         mul     %rax
+       mov     16(up), %rcx
         mov     %rax, 16(rp)
+       mov     %rcx, %rax
         mov     %rdx, 24(rp)
-       mov     16(up), %rax
         mul     %rax
         mov     %rax, 32(rp)
         mov     %rdx, 40(rp)
  
-       mov     (up), %rbx
-       mov     8(up), %rax
-       mul     %rbx
+       mov     %r11, %rax
+       mul     %r10
         mov     %rax, %r8
+       mov     %rcx, %rax
         mov     %rdx, %r9
-       mov     16(up), %rax
-       mul     %rbx
-       xor     R32(%r10), R32(%r10)
+       mul     %r10
+       xor     %r10, %r10
         add     %rax, %r9
+       mov     %r11, %rax
+       mov     %r10, %r11
         adc     %rdx, %r10
  
-       mov     8(up), %rbx
-       mov     16(up), %rax
-       mul     %rbx
-       xor     R32(%r11), R32(%r11)
+       mul     %rcx
+       add     $40, %rsp
         add     %rax, %r10
-       adc     %rdx, %r11
+       adc     %r11, %rdx
         add     %r8, %r8
         adc     %r9, %r9
         adc     %r10, %r10
+       adc     %rdx, %rdx
         adc     %r11, %r11
-       mov     $0, R32(%rbx)
-       adc     %rbx, %rbx
         add     %r8, 8(rp)
         adc     %r9, 16(rp)
         adc     %r10, 24(rp)
-       adc     %r11, 32(rp)
-       adc     %rbx, 40(rp)
-       add     $40, %rsp
-       pop     %rbx
+       adc     %rdx, 32(rp)
+       adc     %r11, 40(rp)
+       FUNC_EXIT()
         ret
  
  L(4):  mov     (up), %rax
+       mov     %rax, %r11
         mul     %rax
+       mov     8(up), %rbx
         mov     %rax, (rp)
+       mov     %rbx, %rax
         mov     %rdx, 8(rp)
-       mov     8(up), %rax
         mul     %rax
         mov     %rax, 16(rp)
         mov     %rdx, 24(rp)
@@ -200,66 +216,62 @@ L(4):     mov     (up), %rax
         mov     24(up), %rax
         mul     %rax
         mov     %rax, 48(rp)
+       mov     %rbx, %rax
         mov     %rdx, 56(rp)
  
-       mov     (up), %rbx
-       mov     8(up), %rax
-       mul     %rbx
+       mul     %r11
+       add     $32, %rsp
         mov     %rax, %r8
         mov     %rdx, %r9
         mov     16(up), %rax
-       mul     %rbx
-       xor     R32(%r10), R32(%r10)
+       mul     %r11
+       xor     %r10, %r10
         add     %rax, %r9
         adc     %rdx, %r10
         mov     24(up), %rax
-       mul     %rbx
-       xor     R32(%r11), R32(%r11)
+       mul     %r11
+       xor     %r11, %r11
         add     %rax, %r10
         adc     %rdx, %r11
-       mov     8(up), %rbx
         mov     16(up), %rax
         mul     %rbx
-       xor     R32(%r12), R32(%r12)
+       xor     %rcx, %rcx
         add     %rax, %r10
         adc     %rdx, %r11
-       adc     $0, %r12
+       adc     $0, %rcx
         mov     24(up), %rax
         mul     %rbx
+       pop     %rbx
         add     %rax, %r11
-       adc     %rdx, %r12
-       mov     16(up), %rbx
+       adc     %rdx, %rcx
+       mov     16(up), %rdx
         mov     24(up), %rax
-       mul     %rbx
-       xor     R32(%rbp), R32(%rbp)
-       add     %rax, %r12
-       adc     %rdx, %rbp
+       mul     %rdx
+       add     %rax, %rcx
+       adc     $0, %rdx
  
         add     %r8, %r8
         adc     %r9, %r9
         adc     %r10, %r10
         adc     %r11, %r11
-       adc     %r12, %r12
-       mov     $0, R32(%rbx)
-       adc     %rbp, %rbp
+       adc     %rcx, %rcx
+       mov     $0, R32(%rax)
+       adc     %rdx, %rdx
  
-       adc     %rbx, %rbx
+       adc     %rax, %rax
         add     %r8, 8(rp)
         adc     %r9, 16(rp)
         adc     %r10, 24(rp)
         adc     %r11, 32(rp)
-       adc     %r12, 40(rp)
-       adc     %rbp, 48(rp)
-       adc     %rbx, 56(rp)
-       add     $24, %rsp
-       pop     %r12
-       pop     %rbp
-       pop     %rbx
+       adc     %rcx, 40(rp)
+       adc     %rdx, 48(rp)
+       adc     %rax, 56(rp)
+       FUNC_EXIT()
         ret
  
  
-L(0m4):        add     $-STACK_ALLOC, %rsp
-       lea     -24(%rsp,n,8), tp               C point tp in middle of result operand
+L(0m4):
+       lea     -16(rp,n,8), tp         C point tp in middle of result operand
         mov     (up), v0
         mov     8(up), %rax
         lea     (up,n,8), up            C point up at end of input operand
@@ -316,8 +328,8 @@ L(L3):      xor     R32(w1), R32(w1)
         jmp     L(dowhile)
  
  
-L(1m4):        add     $-STACK_ALLOC, %rsp
-       lea     (%rsp,n,8), tp          C point tp in middle of result operand
+L(1m4):
+       lea     8(rp,n,8), tp           C point tp in middle of result operand
         mov     (up), v0                C u0
         mov     8(up), %rax             C u1
         lea     8(up,n,8), up           C point up at end of input operand
@@ -331,7 +343,7 @@ C Function mpn_mul_2s_m0(tp, up - i, i, up - i - 1)
         mul     v0                      C u0 * u1
         mov     %rdx, w1
         xor     R32(w2), R32(w2)
-       mov     %rax, (%rsp)
+       mov     %rax, 8(rp)
         jmp     L(m0)
  
         ALIGN(16)
@@ -394,8 +406,8 @@ L(m2x):     mov     (up,j,8), %rax
         jmp     L(dowhile_end)
  
  
-L(2m4):        add     $-STACK_ALLOC, %rsp
-       lea     -24(%rsp,n,8), tp       C point tp in middle of result operand
+L(2m4):
+       lea     -16(rp,n,8), tp         C point tp in middle of result operand
         mov     (up), v0
         mov     8(up), %rax
         lea     (up,n,8), up            C point up at end of input operand
@@ -451,8 +463,8 @@ L(L1):      xor     R32(w0), R32(w0)
         jmp     L(dowhile_mid)
  
  
-L(3m4):        add     $-STACK_ALLOC, %rsp
-       lea     (%rsp,n,8), tp          C point tp in middle of result operand
+L(3m4):
+       lea     8(rp,n,8), tp           C point tp in middle of result operand
         mov     (up), v0                C u0
         mov     8(up), %rax             C u1
         lea     8(up,n,8), up           C point up at end of input operand
@@ -467,7 +479,7 @@ C Function mpn_mul_2s_m2(tp, up - i + 1, i - 1, up - i)
         mov     %rdx, w3
         xor     R32(w0), R32(w0)
         xor     R32(w1), R32(w1)
-       mov     %rax, (%rsp)
+       mov     %rax, 8(rp)
         jmp     L(m2)
  
         ALIGN(16)
@@ -704,11 +716,9 @@ C Function mpn_addmul_2s_2
  C Function mpn_sqr_diag_addlsh1
         lea     -4(n,n), j
  
-       mov     (%rsp), %r11
-
-       lea     (rp,j,8), rp
+       mov     8(rp), %r11
         lea     -8(up), up
-       lea     8(%rsp,j,8), tp
+       lea     (rp,j,8), rp
         neg     j
         mov     (up,j,4), %rax
         mul     %rax
@@ -736,9 +746,9 @@ L(top):     mov     (up,j,4), %rax
         adc     %rdx, %r11
         mov     %r10, (rp,j,8)
  L(d0): mov     %r11, 8(rp,j,8)
-       mov     (tp,j,8), %r10
+       mov     16(rp,j,8), %r10
         adc     %r10, %r10
-       mov     8(tp,j,8), %r11
+       mov     24(rp,j,8), %r11
         adc     %r11, %r11
         nop
         sbb     R32(%rbp), R32(%rbp)            C save CF
@@ -749,9 +759,9 @@ L(d0):      mov     %r11, 8(rp,j,8)
         adc     %rdx, %r11
         mov     %r10, 16(rp,j,8)
  L(d1): mov     %r11, 24(rp,j,8)
-       mov     16(tp,j,8), %r10
+       mov     32(rp,j,8), %r10
         adc     %r10, %r10
-       mov     24(tp,j,8), %r11
+       mov     40(rp,j,8), %r11
         adc     %r11, %r11
         sbb     R32(%rbx), R32(%rbx)            C save CF
         add     $4, j
@@ -764,7 +774,7 @@ L(d1):      mov     %r11, 24(rp,j,8)
         adc     %rdx, %r11
         mov     %r10, (rp)
         mov     %r11, 8(rp)
-       mov     (tp), %r10
+       mov     16(rp), %r10
         adc     %r10, %r10
         sbb     R32(%rbp), R32(%rbp)            C save CF
         neg     R32(%rbp)
@@ -776,11 +786,11 @@ L(d1):    mov     %r11, 24(rp,j,8)
         mov     %r10, 16(rp)
         mov     %rdx, 24(rp)
  
-       add     $eval(8+STACK_ALLOC), %rsp
         pop     %r14
         pop     %r13
         pop     %r12
         pop     %rbp
         pop     %rbx
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/sublsh1_n.asm b/mpn/x86_64/sublsh1_n.asm

index 6f67fae363f9a590d693fe67d511f9f38bc2b11d..678a60b976b7cb51013373b68c6a02ec9db0837c 100644 (file)
--- a/mpn/x86_64/sublsh1_n.asm
+++ b/mpn/x86_64/sublsh1_n.asm
@@ -1,6 +1,7 @@
  dnl  AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
  
-dnl  Copyright 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2006, 2007, 2011, 2012 Free Software Foundation,
+dnl  Inc.
  
  dnl  This file is part of the GNU MP Library.
  
@@ -21,13 +22,13 @@ include(`../config.m4')
  
  
  C           cycles/limb
-C K8,K9:        2.2
-C K10:          2.2
-C P4:          12.75
-C P6 core2:     3.45
-C P6 corei7:    3.45
-C P6 atom:      ?
-
+C AMD K8,K9     2.2
+C AMD K10       2.2
+C Intel P4     12.75
+C Intel core2   3.45
+C Intel corei   ?
+C Intel atom    ?
+C VIA nano      3.25
  
  C Sometimes speed degenerates, supposedly related to that some operand
  C alignments cause cache conflicts.
@@ -41,10 +42,14 @@ define(`up',`%rsi')
  define(`vp',`%rdx')
  define(`n', `%rcx')
  
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
  ASM_START()
         TEXT
         ALIGN(16)
  PROLOGUE(mpn_sublsh1_n)
+       FUNC_ENTRY(4)
         push    %rbx
         push    %rbp
  
@@ -140,5 +145,6 @@ L(end):     add     R32(%rbp), R32(%rax)
  
         pop     %rbp
         pop     %rbx
+       FUNC_EXIT()
         ret
  EPILOGUE()
diff --git a/mpn/x86_64/tabselect.asm b/mpn/x86_64/tabselect.asm

new file mode 100644 (file)

index 0000000..e578b6e
--- /dev/null
+++ b/mpn/x86_64/tabselect.asm
@@ -0,0 +1,115 @@
+dnl  AMD64 mpn_tabselect.
+
+dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C           cycles/limb
+C AMD K8,K9     2.5
+C AMD K10       2.5
+C AMD bobcat    3.5
+C Intel P4      4
+C Intel core2   2.33
+C Intel NHM     2.5
+C Intel SBR     2.2
+C Intel atom    5
+C VIA nano      3.5
+
+C NOTES
+C  * This has not been tuned for any specific processor.  Its speed should not
+C    be too bad, though.
+C  * Using SSE2/AVX2 could result in many-fold speedup.
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp',     `%rdi')
+define(`tp',     `%rsi')
+define(`n',      `%rdx')
+define(`nents',  `%rcx')
+define(`which',  `%r8')
+
+define(`i',      `%rbp')
+define(`maskp',  `%r11')
+define(`maskn',  `%r12')
+
+C rax rbx  rcx  rdx rdi rsi rbp (rsp)  r8   r9 r10 r11 r12 r13 r14 r15
+C         nents  n  rp  tab           which
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+       TEXT
+       ALIGN(16)
+PROLOGUE(mpn_tabselect)
+       FUNC_ENTRY(4)
+IFDOS(`        mov     56(%rsp), %r8d  ')
+       push    %rbx
+       push    %rbp
+       push    %r12
+
+       lea     (rp,n,8), rp
+       lea     (tp,n,8), tp
+       sub     nents, which
+L(outer):
+       lea     (which,nents), %rax
+       neg     %rax                    C set CF iff 'which' != k
+       sbb     maskn, maskn
+       mov     maskn, maskp
+       not     maskp
+
+       mov     n, i
+       neg     i
+       test    $1, R32(n)
+       je      L(top)
+       mov     (tp,i,8), %rax
+       and     maskp, %rax
+       mov     (rp,i,8), %r9
+       and     maskn, %r9
+       or      %r9, %rax
+       mov     %rax, (rp,i,8)
+       add     $1, i
+       jns     L(end)
+
+       ALIGN(16)
+L(top):        mov     (tp,i,8), %rax
+       mov     8(tp,i,8), %rbx
+       and     maskp, %rax
+       and     maskp, %rbx
+       mov     (rp,i,8), %r9
+       mov     8(rp,i,8), %r10
+       and     maskn, %r9
+       and     maskn, %r10
+       or      %r9, %rax
+       or      %r10, %rbx
+       mov     %rax, (rp,i,8)
+       mov     %rbx, 8(rp,i,8)
+       add     $2, i
+       js      L(top)
+
+L(end):        lea     (tp,n,8), tp
+       dec     nents
+       jne     L(outer)
+
+L(outer_end):
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+       FUNC_EXIT()
+       ret
+EPILOGUE()
diff --git a/mpn/x86_64/x86_64-defs.m4 b/mpn/x86_64/x86_64-defs.m4

index 6942a788231e809defe574b55ea66649f07bc380..0e9a8b16ca45f0026184e6ac2a6edcda93e1947d 100644 (file)
--- a/mpn/x86_64/x86_64-defs.m4
+++ b/mpn/x86_64/x86_64-defs.m4
@@ -2,8 +2,8 @@ divert(-1)
  
  dnl  m4 macros for amd64 assembler.
  
-dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free
-dnl  Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2011,
+dnl  2012, 2013 Free Software Foundation, Inc.
  dnl
  dnl  This file is part of the GNU MP Library.
  dnl
@@ -28,24 +28,39 @@ dnl  order they appear in that structure.
  
  define(CPUVEC_FUNCS_LIST,
  ``add_n',
+`addlsh1_n',
+`addlsh2_n',
  `addmul_1',
+`addmul_2',
+`bdiv_dbm1c',
+`com',
  `copyd',
  `copyi',
  `divexact_1',
-`divexact_by3c',
  `divrem_1',
  `gcd_1',
  `lshift',
+`lshiftc',
  `mod_1',
+`mod_1_1p',
+`mod_1_1p_cps',
+`mod_1s_2p',
+`mod_1s_2p_cps',
+`mod_1s_4p',
+`mod_1s_4p_cps',
  `mod_34lsub1',
  `modexact_1c_odd',
  `mul_1',
  `mul_basecase',
+`mullo_basecase',
  `preinv_divrem_1',
  `preinv_mod_1',
+`redc_1',
+`redc_2',
  `rshift',
  `sqr_basecase',
  `sub_n',
+`sublsh1_n',
  `submul_1'')
  
  
@@ -108,8 +123,11 @@ define(`ASSERT_counter',incr(ASSERT_counter))')')')
  
  define(ASSERT_counter,1)
  
-define(`LEA',`
-       mov     $1@GOTPCREL(%rip), $2
+define(`LEA',`dnl
+ifdef(`PIC',
+       `mov    $1@GOTPCREL(%rip), $2'
+,
+       `movabs `$'$1, $2')
  ')
  
  
@@ -169,4 +187,95 @@ ifdef(`PIC',
  
  define(`JUMPTABSECT', `.section        .data.rel.ro.local,"aw",@progbits')
  
+
+dnl  Usage: JMPENT(targlabel,tablabel)
+
+define(`JMPENT',`dnl
+ifdef(`PIC',
+       `.long  $1-$2'
+,
+       `.quad  $1'
+)')
+
+
+dnl  These macros are defined just for DOS64, where they provide calling
+dnl  sequence glue code.
+
+define(`FUNC_ENTRY',`')
+define(`FUNC_EXIT',`')
+
+
+dnl  Target ABI macros.
+
+define(`IFDOS',   `')
+define(`IFSTD',   `$1')
+define(`IFELF',   `$1')
+
+
+dnl  Usage: PROTECT(symbol)
+dnl
+dnl  Used for private GMP symbols that should never be overridden by users.
+dnl  This can save reloc entries and improve shlib sharing as well as
+dnl  application startup times
+
+define(`PROTECT',  `.hidden $1')
+
+
+dnl  Usage: x86_lookup(target, key,value, key,value, ...)
+dnl
+dnl  Look for `target' among the `key' parameters.
+dnl
+dnl  x86_lookup expands to the corresponding `value', or generates an error
+dnl  if `target' isn't found.
+
+define(x86_lookup,
+m4_assert_numargs_range(1,999)
+`ifelse(eval($#<3),1,
+`m4_error(`unrecognised part of x86 instruction: $1
+')',
+`ifelse(`$1',`$2', `$3',
+`x86_lookup(`$1',shift(shift(shift($@))))')')')
+
+
+dnl  Usage: x86_opcode_regxmm(reg)
+dnl
+dnl  Validate the given xmm register, and return its number, 0 to 7.
+
+define(x86_opcode_regxmm,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_regxmm_list)')
+
+define(x86_opcode_regxmm_list,
+``%xmm0',0,
+`%xmm1',1,
+`%xmm2',2,
+`%xmm3',3,
+`%xmm4',4,
+`%xmm5',5,
+`%xmm6',6,
+`%xmm7',7,
+`%xmm8',8,
+`%xmm9',9,
+`%xmm10',10,
+`%xmm11',11,
+`%xmm12',12,
+`%xmm13',13,
+`%xmm14',14,
+`%xmm15',15')
+
+dnl  Usage: palignr($imm,%srcreg,%dstreg)
+dnl
+dnl  Emit a palignr instruction, using a .byte sequence, since obsolete but
+dnl  still distributed versions of gas don't know SSSE3 instructions.
+
+define(`palignr',
+m4_assert_numargs(3)
+`.byte 0x66,dnl
+ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1,
+       `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl
+0x0f,0x3a,0x0f,dnl
+eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl
+substr($1,1)')
+
+
  divert`'dnl
diff --git a/mpn/z8000/README b/mpn/z8000/README

index e1cf22df4296ab34b55e816aeb20f07381a7f9a9..c58420e194376258d5e6f3b7c4cef56f41794129 100644 (file)
--- a/mpn/z8000/README
+++ b/mpn/z8000/README
@@ -32,14 +32,3 @@ This code is old and has not been used for a long time.
  mpn/z8000 uses a 16-bit limb, it's possible this doesn't really work, on
  account of various bits of C code assuming limb>=long and of course long is
  invariably at least 32 bits.
-
-mpn/z8000x uses a 32-bit limb, this could perhaps be an ABI choice.
-Currently it's reached only by an MPN_PATH override.
-
-
-
-----------------
-Local variables:
-mode: text
-fill-column: 76
-End:
diff --git a/mpn/z8000x/add_n.s b/mpn/z8000x/add_n.s

deleted file mode 100644 (file)

index 26b47e2..0000000
--- a/mpn/z8000x/add_n.s
+++ /dev/null
@@ -1,54 +0,0 @@
-! Z8000 (32 bit limb version) __gmpn_add_n -- Add two limb vectors of equal,
-! non-zero length.
-
-! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 3 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-! INPUT PARAMETERS
-! res_ptr      r7
-! s1_ptr       r6
-! s2_ptr       r5
-! size         r4
-
-! If we are really crazy, we can use push to write a few result words
-! backwards, using push just because it is faster than reg+disp.  We'd
-! then add 2x the number of words written to r7...
-
-       segm
-       .text
-       even
-       global ___gmpn_add_n
-___gmpn_add_n:
-       popl    rr0,@r6
-       popl    rr8,@r5
-       addl    rr0,rr8
-       ldl     @r7,rr0
-       dec     r4
-       jr      eq,Lend
-Loop:  popl    rr0,@r6
-       popl    rr8,@r5
-       adc     r1,r9
-       adc     r0,r8
-       inc     r7,#4
-       ldl     @r7,rr0
-       dec     r4
-       jr      ne,Loop
-Lend:  ld      r2,r4           ! use 0 already in r4
-       ld      r3,r4
-       adc     r2,r2
-       ret     t
diff --git a/mpn/z8000x/sub_n.s b/mpn/z8000x/sub_n.s

deleted file mode 100644 (file)

index 837ecef..0000000
--- a/mpn/z8000x/sub_n.s
+++ /dev/null
@@ -1,54 +0,0 @@
-! Z8000 (32 bit limb version) __gmpn_sub_n -- Subtract two limb vectors of the
-! same length > 0 and store difference in a third limb vector.
-
-! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 3 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-! INPUT PARAMETERS
-! res_ptr      r7
-! s1_ptr       r6
-! s2_ptr       r5
-! size         r4
-
-! If we are really crazy, we can use push to write a few result words
-! backwards, using push just because it is faster than reg+disp.  We'd
-! then add 2x the number of words written to r7...
-
-       segm
-       .text
-       even
-       global ___gmpn_sub_n
-___gmpn_sub_n:
-       popl    rr0,@r6
-       popl    rr8,@r5
-       subl    rr0,rr8
-       ldl     @r7,rr0
-       dec     r4
-       jr      eq,Lend
-Loop:  popl    rr0,@r6
-       popl    rr8,@r5
-       sbc     r1,r9
-       sbc     r0,r8
-       inc     r7,#4
-       ldl     @r7,rr0
-       dec     r4
-       jr      ne,Loop
-Lend:  ld      r2,r4           ! use 0 already in r4
-       ld      r3,r4
-       adc     r2,r2
-       ret     t
diff --git a/mpq/Makefile.in b/mpq/Makefile.in

index d3e90a73ae16078e025b13217c088b031719a613..e87ec18092adb8168299da0ed013baff96a35425 100644 (file)
--- a/mpq/Makefile.in
+++ b/mpq/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -33,6 +33,23 @@
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -51,12 +68,11 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  subdir = mpq
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -65,13 +81,12 @@ CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  LTLIBRARIES = $(noinst_LTLIBRARIES)
  libmpq_la_LIBADD =
-am_libmpq_la_OBJECTS = abs$U.lo aors$U.lo canonicalize$U.lo clear$U.lo \
-       clears$U.lo cmp$U.lo cmp_si$U.lo cmp_ui$U.lo div$U.lo \
-       equal$U.lo get_d$U.lo get_den$U.lo get_num$U.lo get_str$U.lo \
-       init$U.lo inits$U.lo inp_str$U.lo inv$U.lo md_2exp$U.lo \
-       mul$U.lo neg$U.lo out_str$U.lo set$U.lo set_den$U.lo \
-       set_num$U.lo set_si$U.lo set_str$U.lo set_ui$U.lo set_z$U.lo \
-       set_d$U.lo set_f$U.lo swap$U.lo
+am_libmpq_la_OBJECTS = abs.lo aors.lo canonicalize.lo clear.lo \
+       clears.lo cmp.lo cmp_si.lo cmp_ui.lo div.lo equal.lo get_d.lo \
+       get_den.lo get_num.lo get_str.lo init.lo inits.lo inp_str.lo \
+       inv.lo md_2exp.lo mul.lo neg.lo out_str.lo set.lo set_den.lo \
+       set_num.lo set_si.lo set_str.lo set_ui.lo set_z.lo set_d.lo \
+       set_f.lo swap.lo
  libmpq_la_OBJECTS = $(am_libmpq_la_OBJECTS)
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
  depcomp =
@@ -87,6 +102,11 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(libmpq_la_SOURCES)
  DIST_SOURCES = $(libmpq_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -188,8 +208,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -236,7 +256,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -303,7 +322,7 @@ clean-noinstLTLIBRARIES:
           echo "rm -f \"$${dir}/so_locations\""; \
           rm -f "$${dir}/so_locations"; \
         done
-libmpq.la: $(libmpq_la_OBJECTS) $(libmpq_la_DEPENDENCIES) 
+libmpq.la: $(libmpq_la_OBJECTS) $(libmpq_la_DEPENDENCIES) $(EXTRA_libmpq_la_DEPENDENCIES) 
         $(LINK)  $(libmpq_la_OBJECTS) $(libmpq_la_LIBADD) $(LIBS)
  
  mostlyclean-compile:
@@ -311,11 +330,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -325,85 +339,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-abs_.c: abs.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/abs.c; then echo $(srcdir)/abs.c; else echo abs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-aors_.c: aors.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aors.c; then echo $(srcdir)/aors.c; else echo aors.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-canonicalize_.c: canonicalize.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/canonicalize.c; then echo $(srcdir)/canonicalize.c; else echo canonicalize.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clear_.c: clear.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clear.c; then echo $(srcdir)/clear.c; else echo clear.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clears_.c: clears.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clears.c; then echo $(srcdir)/clears.c; else echo clears.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: cmp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_si_.c: cmp_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_si.c; then echo $(srcdir)/cmp_si.c; else echo cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_ui_.c: cmp_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_ui.c; then echo $(srcdir)/cmp_ui.c; else echo cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-div_.c: div.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div.c; then echo $(srcdir)/div.c; else echo div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-equal_.c: equal.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/equal.c; then echo $(srcdir)/equal.c; else echo equal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_.c: get_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_den_.c: get_den.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_den.c; then echo $(srcdir)/get_den.c; else echo get_den.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_num_.c: get_num.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_num.c; then echo $(srcdir)/get_num.c; else echo get_num.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init_.c: init.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init.c; then echo $(srcdir)/init.c; else echo init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inits_.c: inits.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inits.c; then echo $(srcdir)/inits.c; else echo inits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inp_str_.c: inp_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_str.c; then echo $(srcdir)/inp_str.c; else echo inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inv_.c: inv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inv.c; then echo $(srcdir)/inv.c; else echo inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-md_2exp_.c: md_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/md_2exp.c; then echo $(srcdir)/md_2exp.c; else echo md_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-neg_.c: neg.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-out_str_.c: out_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_str.c; then echo $(srcdir)/out_str.c; else echo out_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_.c: set.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set.c; then echo $(srcdir)/set.c; else echo set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_d_.c: set_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_d.c; then echo $(srcdir)/set_d.c; else echo set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_den_.c: set_den.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_den.c; then echo $(srcdir)/set_den.c; else echo set_den.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_f_.c: set_f.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_f.c; then echo $(srcdir)/set_f.c; else echo set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_num_.c: set_num.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_num.c; then echo $(srcdir)/set_num.c; else echo set_num.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_si_.c: set_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_si.c; then echo $(srcdir)/set_si.c; else echo set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_ui_.c: set_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_ui.c; then echo $(srcdir)/set_ui.c; else echo set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_z_.c: set_z.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_z.c; then echo $(srcdir)/set_z.c; else echo set_z.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-swap_.c: swap.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/swap.c; then echo $(srcdir)/swap.c; else echo swap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-abs_.$(OBJEXT) abs_.lo aors_.$(OBJEXT) aors_.lo \
-canonicalize_.$(OBJEXT) canonicalize_.lo clear_.$(OBJEXT) clear_.lo \
-clears_.$(OBJEXT) clears_.lo cmp_.$(OBJEXT) cmp_.lo cmp_si_.$(OBJEXT) \
-cmp_si_.lo cmp_ui_.$(OBJEXT) cmp_ui_.lo div_.$(OBJEXT) div_.lo \
-equal_.$(OBJEXT) equal_.lo get_d_.$(OBJEXT) get_d_.lo \
-get_den_.$(OBJEXT) get_den_.lo get_num_.$(OBJEXT) get_num_.lo \
-get_str_.$(OBJEXT) get_str_.lo init_.$(OBJEXT) init_.lo \
-inits_.$(OBJEXT) inits_.lo inp_str_.$(OBJEXT) inp_str_.lo \
-inv_.$(OBJEXT) inv_.lo md_2exp_.$(OBJEXT) md_2exp_.lo mul_.$(OBJEXT) \
-mul_.lo neg_.$(OBJEXT) neg_.lo out_str_.$(OBJEXT) out_str_.lo \
-set_.$(OBJEXT) set_.lo set_d_.$(OBJEXT) set_d_.lo set_den_.$(OBJEXT) \
-set_den_.lo set_f_.$(OBJEXT) set_f_.lo set_num_.$(OBJEXT) set_num_.lo \
-set_si_.$(OBJEXT) set_si_.lo set_str_.$(OBJEXT) set_str_.lo \
-set_ui_.$(OBJEXT) set_ui_.lo set_z_.$(OBJEXT) set_z_.lo \
-swap_.$(OBJEXT) swap_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -507,10 +442,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -578,7 +518,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -591,7 +531,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool clean-noinstLTLIBRARIES ctags distclean \
@@ -603,9 +543,8 @@ uninstall-am:
         install-pdf install-pdf-am install-ps install-ps-am \
         install-strip installcheck installcheck-am installdirs \
         maintainer-clean maintainer-clean-generic mostlyclean \
-       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
-       uninstall-am
+       mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+       pdf pdf-am ps ps-am tags uninstall uninstall-am
  
  
  # Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/mpq/abs.c b/mpq/abs.c

index 04b8e3418e8812eebef2b1a7a899adc8656207ad..3f0caeba98b077e9c002cb243d509d7c08b7974d 100644 (file)
--- a/mpq/abs.c
+++ b/mpq/abs.c
@@ -1,6 +1,6 @@
  /* mpq_abs -- absolute value of a rational.
  
-Copyright 2000, 2001 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,21 +26,20 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpq_abs (mpq_ptr dst, mpq_srcptr src)
  {
-  mp_size_t  num_size = src->_mp_num._mp_size;
-  mp_size_t  num_abs_size = ABS (num_size);
+  mp_size_t  num_abs_size = ABSIZ(NUM(src));
  
    if (dst != src)
      {
-      mp_size_t  den_size = src->_mp_den._mp_size;
+      mp_size_t  den_size = SIZ(DEN(src));
+      mp_ptr dp;
  
-      MPZ_REALLOC (mpq_numref(dst), num_abs_size);
-      MPZ_REALLOC (mpq_denref(dst), den_size);
+      dp = MPZ_NEWALLOC (NUM(dst), num_abs_size);
+      MPN_COPY (dp, PTR(NUM(src)), num_abs_size);
  
-      MPN_COPY (dst->_mp_num._mp_d, src->_mp_num._mp_d, num_abs_size);
-      MPN_COPY (dst->_mp_den._mp_d, src->_mp_den._mp_d, den_size);
-
-      dst->_mp_den._mp_size = den_size;
+      dp = MPZ_NEWALLOC (DEN(dst), den_size);
+      SIZ(DEN(dst)) = den_size;
+      MPN_COPY (dp, PTR(DEN(src)), den_size);
      }
  
-  dst->_mp_num._mp_size = num_abs_size;
+  SIZ(NUM(dst)) = num_abs_size;
  }
diff --git a/mpq/aors.c b/mpq/aors.c

index 5e09de533d4acc55dc822518235b2528b52c5e6f..6a8c049d980d9e1b006fee8cc239d825ced34e31 100644 (file)
--- a/mpq/aors.c
+++ b/mpq/aors.c
@@ -22,19 +22,19 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  
-static void __gmpq_aors __GMP_PROTO ((REGPARM_3_1 (mpq_ptr, mpq_srcptr, mpq_srcptr, void (*) __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr))))) REGPARM_ATTR (1);
+static void __gmpq_aors (REGPARM_3_1 (mpq_ptr, mpq_srcptr, mpq_srcptr, void (*) (mpz_ptr, mpz_srcptr, mpz_srcptr))) REGPARM_ATTR (1);
  #define mpq_aors(w,x,y,fun)  __gmpq_aors (REGPARM_3_1 (w, x, y, fun))
  
  REGPARM_ATTR (1) static void
  mpq_aors (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2,
-          void (*fun) __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)))
+          void (*fun) (mpz_ptr, mpz_srcptr, mpz_srcptr))
  {
    mpz_t gcd;
    mpz_t tmp1, tmp2;
-  mp_size_t op1_num_size = ABS (op1->_mp_num._mp_size);
-  mp_size_t op1_den_size =      op1->_mp_den._mp_size;
-  mp_size_t op2_num_size = ABS (op2->_mp_num._mp_size);
-  mp_size_t op2_den_size =      op2->_mp_den._mp_size;
+  mp_size_t op1_num_size = ABSIZ(NUM(op1));
+  mp_size_t op1_den_size =   SIZ(DEN(op1));
+  mp_size_t op2_num_size = ABSIZ(NUM(op2));
+  mp_size_t op2_den_size =   SIZ(DEN(op2));
    TMP_DECL;
  
    TMP_MARK;
@@ -47,43 +47,43 @@ mpq_aors (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2,
       dare to overwrite the numerator of ROP when we are finished
       with the numerators of OP1 and OP2.  */
  
-  mpz_gcd (gcd, &(op1->_mp_den), &(op2->_mp_den));
+  mpz_gcd (gcd, DEN(op1), DEN(op2));
    if (! MPZ_EQUAL_1_P (gcd))
      {
        mpz_t t;
  
-      mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd);
-      mpz_mul (tmp1, &(op1->_mp_num), tmp1);
+      MPZ_TMP_INIT (t, MAX (op1_num_size + op2_den_size,
+            op2_num_size + op1_den_size) + 2 - SIZ(gcd));
  
-      mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd);
-      mpz_mul (tmp2, &(op2->_mp_num), tmp2);
+      mpz_divexact_gcd (t, DEN(op2), gcd);
+      mpz_divexact_gcd (tmp2, DEN(op1), gcd);
  
-      MPZ_TMP_INIT (t, MAX (ABS (tmp1->_mp_size), ABS (tmp2->_mp_size)) + 1);
+      mpz_mul (tmp1, NUM(op1), t);
+      mpz_mul (t, NUM(op2), tmp2);
  
-      (*fun) (t, tmp1, tmp2);
-      mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd);
+      (*fun) (t, tmp1, t);
  
        mpz_gcd (gcd, t, gcd);
        if (MPZ_EQUAL_1_P (gcd))
          {
-          mpz_set (&(rop->_mp_num), t);
-          mpz_mul (&(rop->_mp_den), &(op2->_mp_den), tmp2);
+          mpz_set (NUM(rop), t);
+          mpz_mul (DEN(rop), DEN(op2), tmp2);
          }
        else
          {
-          mpz_divexact_gcd (&(rop->_mp_num), t, gcd);
-          mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd);
-          mpz_mul (&(rop->_mp_den), tmp1, tmp2);
+          mpz_divexact_gcd (NUM(rop), t, gcd);
+          mpz_divexact_gcd (tmp1, DEN(op2), gcd);
+          mpz_mul (DEN(rop), tmp1, tmp2);
          }
      }
    else
      {
        /* The common divisor is 1.  This is the case (for random input) with
          probability 6/(pi**2), which is about 60.8%.  */
-      mpz_mul (tmp1, &(op1->_mp_num), &(op2->_mp_den));
-      mpz_mul (tmp2, &(op2->_mp_num), &(op1->_mp_den));
-      (*fun) (&(rop->_mp_num), tmp1, tmp2);
-      mpz_mul (&(rop->_mp_den), &(op1->_mp_den), &(op2->_mp_den));
+      mpz_mul (tmp1, NUM(op1), DEN(op2));
+      mpz_mul (tmp2, NUM(op2), DEN(op1));
+      (*fun) (NUM(rop), tmp1, tmp2);
+      mpz_mul (DEN(rop), DEN(op1), DEN(op2));
      }
    TMP_FREE;
  }
diff --git a/mpq/canonicalize.c b/mpq/canonicalize.c

index e5bedec4c3baab1bb66243134a91b102e8c347b4..ab7925b51207aad650641328a6515b3b064e7d5d 100644 (file)
--- a/mpq/canonicalize.c
+++ b/mpq/canonicalize.c
@@ -28,26 +28,26 @@ mpq_canonicalize (MP_RAT *op)
    mpz_t gcd;
    TMP_DECL;
  
-  if (op->_mp_den._mp_size == 0)
+  if (UNLIKELY (SIZ(DEN(op)) == 0))
      DIVIDE_BY_ZERO;
  
    TMP_MARK;
  
    /* ??? Dunno if the 1+ is needed.  */
-  MPZ_TMP_INIT (gcd, 1 + MAX (ABS (op->_mp_num._mp_size),
-                             ABS (op->_mp_den._mp_size)));
+  MPZ_TMP_INIT (gcd, 1 + MAX (ABSIZ(NUM(op)),
+                             ABSIZ(DEN(op))));
  
-  mpz_gcd (gcd, &(op->_mp_num), &(op->_mp_den));
+  mpz_gcd (gcd, NUM(op), DEN(op));
    if (! MPZ_EQUAL_1_P (gcd))
      {
-      mpz_divexact_gcd (&(op->_mp_num), &(op->_mp_num), gcd);
-      mpz_divexact_gcd (&(op->_mp_den), &(op->_mp_den), gcd);
+      mpz_divexact_gcd (NUM(op), NUM(op), gcd);
+      mpz_divexact_gcd (DEN(op), DEN(op), gcd);
      }
  
-  if (op->_mp_den._mp_size < 0)
+  if (SIZ(DEN(op)) < 0)
      {
-      op->_mp_num._mp_size = -op->_mp_num._mp_size;
-      op->_mp_den._mp_size = -op->_mp_den._mp_size;
+      SIZ(NUM(op)) = -SIZ(NUM(op));
+      SIZ(DEN(op)) = -SIZ(DEN(op));
      }
    TMP_FREE;
  }
diff --git a/mpq/clear.c b/mpq/clear.c

index d6f7d7fb08973876d78bba3fcb00a9670065a391..787dae14bcbb075b9fa429c30b650da8983bd743 100644 (file)
--- a/mpq/clear.c
+++ b/mpq/clear.c
@@ -23,8 +23,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpq_clear (MP_RAT *m)
  {
-  (*__gmp_free_func) (m->_mp_num._mp_d,
-                   m->_mp_num._mp_alloc * BYTES_PER_MP_LIMB);
-  (*__gmp_free_func) (m->_mp_den._mp_d,
-                   m->_mp_den._mp_alloc * BYTES_PER_MP_LIMB);
+  (*__gmp_free_func) (PTR(NUM(m)),
+                   ALLOC(NUM(m)) * BYTES_PER_MP_LIMB);
+  (*__gmp_free_func) (PTR(DEN(m)),
+                   ALLOC(DEN(m)) * BYTES_PER_MP_LIMB);
  }
diff --git a/mpq/cmp.c b/mpq/cmp.c

index 1844c2970daa8757b312ac05a590c17b8946a678..c4bb5e0de27d65b829a4dc7d66fc41dda1ec9ae9 100644 (file)
--- a/mpq/cmp.c
+++ b/mpq/cmp.c
@@ -25,10 +25,10 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  int
  mpq_cmp (const MP_RAT *op1, const MP_RAT *op2)
  {
-  mp_size_t num1_size = op1->_mp_num._mp_size;
-  mp_size_t den1_size = op1->_mp_den._mp_size;
-  mp_size_t num2_size = op2->_mp_num._mp_size;
-  mp_size_t den2_size = op2->_mp_den._mp_size;
+  mp_size_t num1_size = SIZ(NUM(op1));
+  mp_size_t den1_size = SIZ(DEN(op1));
+  mp_size_t num2_size = SIZ(NUM(op2));
+  mp_size_t den2_size = SIZ(DEN(op2));
    mp_size_t tmp1_size, tmp2_size;
    mp_ptr tmp1_ptr, tmp2_ptr;
    mp_size_t num1_sign;
@@ -70,12 +70,12 @@ mpq_cmp (const MP_RAT *op1, const MP_RAT *op2)
      int cnt1, cnt2;
      mp_bitcnt_t bits1, bits2;
  
-    count_leading_zeros (cnt1, op1->_mp_num._mp_d[num1_size - 1]);
-    count_leading_zeros (cnt2, op2->_mp_den._mp_d[den2_size - 1]);
+    count_leading_zeros (cnt1, PTR(NUM(op1))[num1_size - 1]);
+    count_leading_zeros (cnt2, PTR(DEN(op2))[den2_size - 1]);
      bits1 = tmp1_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;
  
-    count_leading_zeros (cnt1, op2->_mp_num._mp_d[num2_size - 1]);
-    count_leading_zeros (cnt2, op1->_mp_den._mp_d[den1_size - 1]);
+    count_leading_zeros (cnt1, PTR(NUM(op2))[num2_size - 1]);
+    count_leading_zeros (cnt2, PTR(DEN(op1))[den1_size - 1]);
      bits2 = tmp2_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;
  
      if (bits1 > bits2 + 1)
@@ -91,21 +91,21 @@ mpq_cmp (const MP_RAT *op1, const MP_RAT *op2)
  
    if (num1_size >= den2_size)
      tmp1_size -= 0 == mpn_mul (tmp1_ptr,
-                              op1->_mp_num._mp_d, num1_size,
-                              op2->_mp_den._mp_d, den2_size);
+                              PTR(NUM(op1)), num1_size,
+                              PTR(DEN(op2)), den2_size);
    else
      tmp1_size -= 0 == mpn_mul (tmp1_ptr,
-                              op2->_mp_den._mp_d, den2_size,
-                              op1->_mp_num._mp_d, num1_size);
+                              PTR(DEN(op2)), den2_size,
+                              PTR(NUM(op1)), num1_size);
  
     if (num2_size >= den1_size)
       tmp2_size -= 0 == mpn_mul (tmp2_ptr,
-                               op2->_mp_num._mp_d, num2_size,
-                               op1->_mp_den._mp_d, den1_size);
+                               PTR(NUM(op2)), num2_size,
+                               PTR(DEN(op1)), den1_size);
     else
       tmp2_size -= 0 == mpn_mul (tmp2_ptr,
-                               op1->_mp_den._mp_d, den1_size,
-                               op2->_mp_num._mp_d, num2_size);
+                               PTR(DEN(op1)), den1_size,
+                               PTR(NUM(op2)), num2_size);
  
  
    cc = tmp1_size - tmp2_size != 0
diff --git a/mpq/cmp_si.c b/mpq/cmp_si.c

index a744a984290d0958af52df83fc2e20be7bfa6bf2..b9482b1db74e0824f30dcee10a8f2cb84f580a2e 100644 (file)
--- a/mpq/cmp_si.c
+++ b/mpq/cmp_si.c
@@ -29,9 +29,9 @@ int
  _mpq_cmp_si (mpq_srcptr q, long n, unsigned long d)
  {
    /* need canonical sign to get right result */
-  ASSERT (q->_mp_den._mp_size > 0);
+  ASSERT (SIZ(DEN(q)) > 0);
  
-  if (q->_mp_num._mp_size >= 0)
+  if (SIZ(NUM(q)) >= 0)
      {
        if (n >= 0)
          return _mpq_cmp_ui (q, n, d);            /* >=0 cmp >=0 */
@@ -45,10 +45,10 @@ _mpq_cmp_si (mpq_srcptr q, long n, unsigned long d)
        else
          {
            mpq_t  qabs;
-          qabs->_mp_num._mp_size = ABS (q->_mp_num._mp_size);
-          qabs->_mp_num._mp_d    = q->_mp_num._mp_d;
-          qabs->_mp_den._mp_size = q->_mp_den._mp_size;
-          qabs->_mp_den._mp_d    = q->_mp_den._mp_d;
+          SIZ(NUM(qabs)) = ABSIZ(NUM(q));
+          PTR(NUM(qabs))    = PTR(NUM(q));
+          SIZ(DEN(qabs)) = SIZ(DEN(q));
+          PTR(DEN(qabs))    = PTR(DEN(q));
  
            return - _mpq_cmp_ui (qabs, -n, d);    /* <0 cmp <0 */
          }
diff --git a/mpq/cmp_ui.c b/mpq/cmp_ui.c

index 8e0b1af1b87cffda78753c03a6b94cbbe18e7d69..a52c2e8e8c98e4a9625b12a08c1acc81f945f2ac 100644 (file)
--- a/mpq/cmp_ui.c
+++ b/mpq/cmp_ui.c
@@ -26,8 +26,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  int
  _mpq_cmp_ui (const MP_RAT *op1, unsigned long int num2, unsigned long int den2)
  {
-  mp_size_t num1_size = op1->_mp_num._mp_size;
-  mp_size_t den1_size = op1->_mp_den._mp_size;
+  mp_size_t num1_size = SIZ(NUM(op1));
+  mp_size_t den1_size = SIZ(DEN(op1));
    mp_size_t tmp1_size, tmp2_size;
    mp_ptr tmp1_ptr, tmp2_ptr;
    mp_limb_t cy_limb;
@@ -50,7 +50,7 @@ _mpq_cmp_ui (const MP_RAT *op1, unsigned long int num2, unsigned long int den2)
    /* need canonical sign to get right result */
    ASSERT (den1_size > 0);
  
-  if (den2 == 0)
+  if (UNLIKELY (den2 == 0))
      DIVIDE_BY_ZERO;
  
    if (num1_size == 0)
@@ -73,12 +73,12 @@ _mpq_cmp_ui (const MP_RAT *op1, unsigned long int num2, unsigned long int den2)
    tmp1_ptr = TMP_ALLOC_LIMBS (num1_size + 1);
    tmp2_ptr = TMP_ALLOC_LIMBS (den1_size + 1);
  
-  cy_limb = mpn_mul_1 (tmp1_ptr, op1->_mp_num._mp_d, num1_size,
+  cy_limb = mpn_mul_1 (tmp1_ptr, PTR(NUM(op1)), num1_size,
                         (mp_limb_t) den2);
    tmp1_ptr[num1_size] = cy_limb;
    tmp1_size = num1_size + (cy_limb != 0);
  
-  cy_limb = mpn_mul_1 (tmp2_ptr, op1->_mp_den._mp_d, den1_size,
+  cy_limb = mpn_mul_1 (tmp2_ptr, PTR(DEN(op1)), den1_size,
                         (mp_limb_t) num2);
    tmp2_ptr[den1_size] = cy_limb;
    tmp2_size = den1_size + (cy_limb != 0);
diff --git a/mpq/div.c b/mpq/div.c

index efba32a2f396eced758417364ebebd4ffd3529a8..1f64f6d478c036ac784e8c4a166f7a55f86009ef 100644 (file)
--- a/mpq/div.c
+++ b/mpq/div.c
@@ -34,24 +34,26 @@ mpq_div (mpq_ptr quot, mpq_srcptr op1, mpq_srcptr op2)
    mp_size_t alloc;
    TMP_DECL;
  
-  op1_num_size = ABS (op1->_mp_num._mp_size);
-  op1_den_size =      op1->_mp_den._mp_size;
-  op2_num_size = ABS (op2->_mp_num._mp_size);
-  op2_den_size =      op2->_mp_den._mp_size;
+  op2_num_size = ABSIZ(NUM(op2));
  
-  if (op2_num_size == 0)
+  if (UNLIKELY (op2_num_size == 0))
      DIVIDE_BY_ZERO;
  
+  op1_num_size = ABSIZ(NUM(op1));
+
    if (op1_num_size == 0)
      {
        /* We special case this to simplify allocation logic; gcd(0,x) = x
          is a singular case for the allocations.  */
-      quot->_mp_num._mp_size = 0;
-      quot->_mp_den._mp_d[0] = 1;
-      quot->_mp_den._mp_size = 1;
+      SIZ(NUM(quot)) = 0;
+      PTR(DEN(quot))[0] = 1;
+      SIZ(DEN(quot)) = 1;
        return;
      }
  
+  op2_den_size =   SIZ(DEN(op2));
+  op1_den_size =   SIZ(DEN(op1));
+
    TMP_MARK;
  
    alloc = MIN (op1_num_size, op2_num_size);
@@ -74,28 +76,28 @@ mpq_div (mpq_ptr quot, mpq_srcptr op1, mpq_srcptr op2)
       numerator of QUOT when we are finished with the numerators of OP1 and
       OP2.  */
  
-  mpz_gcd (gcd1, &(op1->_mp_num), &(op2->_mp_num));
-  mpz_gcd (gcd2, &(op2->_mp_den), &(op1->_mp_den));
+  mpz_gcd (gcd1, NUM(op1), NUM(op2));
+  mpz_gcd (gcd2, DEN(op2), DEN(op1));
  
-  mpz_divexact_gcd (tmp1, &(op1->_mp_num), gcd1);
-  mpz_divexact_gcd (tmp2, &(op2->_mp_den), gcd2);
+  mpz_divexact_gcd (tmp1, NUM(op1), gcd1);
+  mpz_divexact_gcd (tmp2, DEN(op2), gcd2);
  
    mpz_mul (numtmp, tmp1, tmp2);
  
-  mpz_divexact_gcd (tmp1, &(op2->_mp_num), gcd1);
-  mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd2);
+  mpz_divexact_gcd (tmp1, NUM(op2), gcd1);
+  mpz_divexact_gcd (tmp2, DEN(op1), gcd2);
  
-  mpz_mul (&(quot->_mp_den), tmp1, tmp2);
+  mpz_mul (DEN(quot), tmp1, tmp2);
  
    /* We needed to go via NUMTMP to take care of QUOT being the same as OP2.
       Now move NUMTMP to QUOT->_mp_num.  */
-  mpz_set (&(quot->_mp_num), numtmp);
+  mpz_set (NUM(quot), numtmp);
  
    /* Keep the denominator positive.  */
-  if (quot->_mp_den._mp_size < 0)
+  if (SIZ(DEN(quot)) < 0)
      {
-      quot->_mp_den._mp_size = -quot->_mp_den._mp_size;
-      quot->_mp_num._mp_size = -quot->_mp_num._mp_size;
+      SIZ(DEN(quot)) = -SIZ(DEN(quot));
+      SIZ(NUM(quot)) = -SIZ(NUM(quot));
      }
  
    TMP_FREE;
diff --git a/mpq/equal.c b/mpq/equal.c

index 36f7d372d9bd7afa44a258d186700a05ef5d1c62..e55180a5627ee8fec9d30bc08d58e12b5ba0519b 100644 (file)
--- a/mpq/equal.c
+++ b/mpq/equal.c
@@ -31,25 +31,25 @@ mpq_equal (mpq_srcptr op1, mpq_srcptr op2) __GMP_NOTHROW
    ASSERT_MPQ_CANONICAL (op1);
    ASSERT_MPQ_CANONICAL (op2);
  
-  num1_size = op1->_mp_num._mp_size;
-  num2_size = op2->_mp_num._mp_size;
+  num1_size = SIZ(NUM(op1));
+  num2_size = SIZ(NUM(op2));
    if (num1_size != num2_size)
      return 0;
  
-  num1_ptr = op1->_mp_num._mp_d;
-  num2_ptr = op2->_mp_num._mp_d;
+  num1_ptr = PTR(NUM(op1));
+  num2_ptr = PTR(NUM(op2));
    num1_size = ABS (num1_size);
    for (i = 0; i < num1_size; i++)
      if (num1_ptr[i] != num2_ptr[i])
        return 0;
  
-  den1_size = op1->_mp_den._mp_size;
-  den2_size = op2->_mp_den._mp_size;
+  den1_size = SIZ(DEN(op1));
+  den2_size = SIZ(DEN(op2));
    if (den1_size != den2_size)
      return 0;
  
-  den1_ptr = op1->_mp_den._mp_d;
-  den2_ptr = op2->_mp_den._mp_d;
+  den1_ptr = PTR(DEN(op1));
+  den2_ptr = PTR(DEN(op2));
    for (i = 0; i < den1_size; i++)
      if (den1_ptr[i] != den2_ptr[i])
        return 0;
diff --git a/mpq/get_d.c b/mpq/get_d.c

index 0caefed896c47c1cde93398d0ca6cf63ab0d71c1..3db822689e36540343093d56c3a1d987fb32095b 100644 (file)
--- a/mpq/get_d.c
+++ b/mpq/get_d.c
@@ -96,8 +96,8 @@ mpq_get_d (const MP_RAT *src)
    double res;
    mp_srcptr np, dp;
    mp_ptr remp, tp;
-  mp_size_t nsize = src->_mp_num._mp_size;
-  mp_size_t dsize = src->_mp_den._mp_size;
+  mp_size_t nsize = SIZ(NUM(src));
+  mp_size_t dsize = SIZ(DEN(src));
    mp_size_t qsize, prospective_qsize, zeros, chop, tsize;
    mp_size_t sign_quotient = nsize;
    long exp;
@@ -115,8 +115,8 @@ mpq_get_d (const MP_RAT *src)
    TMP_MARK;
    nsize = ABS (nsize);
    dsize = ABS (dsize);
-  np = src->_mp_num._mp_d;
-  dp = src->_mp_den._mp_d;
+  np = PTR(NUM(src));
+  dp = PTR(DEN(src));
  
    prospective_qsize = nsize - dsize + 1;   /* from using given n,d */
    qsize = N_QLIMBS + 1;                    /* desired qsize */
diff --git a/mpq/get_den.c b/mpq/get_den.c

index c3104e6a36a62c6d006ae572bbef554d01f43eef..6a969dfe3ea351a621101ba4ad4d6133f88146ad 100644 (file)
--- a/mpq/get_den.c
+++ b/mpq/get_den.c
@@ -1,6 +1,6 @@
  /* mpq_get_den(den,rat_src) -- Set DEN to the denominator of RAT_SRC.
  
-Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,13 +21,12 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpq_get_den (MP_INT *den, const MP_RAT *src)
+mpq_get_den (mpz_ptr den, mpq_srcptr src)
  {
-  mp_size_t size = src->_mp_den._mp_size;
+  mp_size_t size = SIZ(DEN(src));
+  mp_ptr dp;
  
-  if (den->_mp_alloc < size)
-    _mpz_realloc (den, size);
-
-  MPN_COPY (den->_mp_d, src->_mp_den._mp_d, size);
-  den->_mp_size = size;
+  dp = MPZ_NEWALLOC (den, size);
+  SIZ(den) = size;
+  MPN_COPY (dp, PTR(DEN(src)), size);
  }
diff --git a/mpq/get_num.c b/mpq/get_num.c

index c45e7ede2c182c4b9bf2170d7c06c86aad213706..bbc6448fd0a92f5e57d6f66ed45497b04404ac1c 100644 (file)
--- a/mpq/get_num.c
+++ b/mpq/get_num.c
@@ -1,6 +1,6 @@
   /* mpq_get_num(num,rat_src) -- Set NUM to the numerator of RAT_SRC.
  
-Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,14 +21,14 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpq_get_num (MP_INT *num, const MP_RAT *src)
+mpq_get_num (mpz_ptr num, mpq_srcptr src)
  {
-  mp_size_t size = src->_mp_num._mp_size;
+  mp_size_t size = SIZ(NUM(src));
    mp_size_t abs_size = ABS (size);
+  mp_ptr dp;
  
-  if (num->_mp_alloc < abs_size)
-    _mpz_realloc (num, abs_size);
+  dp = MPZ_NEWALLOC (num, abs_size);
+  SIZ(num) = size;
  
-  MPN_COPY (num->_mp_d, src->_mp_num._mp_d, abs_size);
-  num->_mp_size = size;
+  MPN_COPY (dp, PTR(NUM(src)), abs_size);
  }
diff --git a/mpq/get_str.c b/mpq/get_str.c

index 68ca34f6f86c08c5fa3e5d84bfc1201780884345..74398a5380be65509b97100a8f8720f7646b6ebd 100644 (file)
--- a/mpq/get_str.c
+++ b/mpq/get_str.c
@@ -1,6 +1,6 @@
  /* mpq_get_str -- mpq to string conversion.
  
-Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2006, 2011 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,25 +21,25 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include <string.h>
  #include "gmp.h"
  #include "gmp-impl.h"
+#include "longlong.h"
  
  char *
  mpq_get_str (char *str, int base, mpq_srcptr q)
  {
    size_t  str_alloc, len;
  
-  ASSERT (ABS(base) >= 2);
-  ASSERT (ABS(base) <= 62);
+  if (base > 62 || base < -36)
+    return NULL;
  
    str_alloc = 0;
    if (str == NULL)
      {
        /* This is an overestimate since we don't bother checking how much of
-         the high limbs of num and den are used.  +2 for rounding up the
-         chars per bit of num and den.  +3 for sign, slash and '\0'.  */
-      str_alloc = ((size_t) ((ABS (q->_mp_num._mp_size) + q->_mp_den._mp_size)
-                             * GMP_LIMB_BITS
-                             * mp_bases[ABS(base)].chars_per_bit_exactly))
-                   + 5;
+        the high limbs of num and den are used.  +2 for rounding up the
+        chars per bit of num and den.  +3 for sign, slash and '\0'.  */
+      DIGITS_IN_BASE_PER_LIMB (str_alloc, ABSIZ(NUM(q)) + SIZ(DEN(q)), ABS(base));
+      str_alloc += 6;
+
        str = (char *) (*__gmp_allocate_func) (str_alloc);
      }
  
@@ -55,8 +55,8 @@ mpq_get_str (char *str, int base, mpq_srcptr q)
    ASSERT (len == strlen(str));
    ASSERT (str_alloc == 0 || len+1 <= str_alloc);
    ASSERT (len+1 <=  /* size recommended to applications */
-          mpz_sizeinbase (mpq_numref(q), ABS(base)) +
-          mpz_sizeinbase (mpq_denref(q), ABS(base)) + 3);
+         mpz_sizeinbase (mpq_numref(q), ABS(base)) +
+         mpz_sizeinbase (mpq_denref(q), ABS(base)) + 3);
  
    if (str_alloc != 0)
      __GMP_REALLOCATE_FUNC_MAYBE_TYPE (str, str_alloc, len+1, char);
diff --git a/mpq/init.c b/mpq/init.c

index 4cec0c101c15159b80560778bfdc32076f8bafcc..f075477d634c1a85003aa755132c1f713e675896 100644 (file)
--- a/mpq/init.c
+++ b/mpq/init.c
@@ -23,16 +23,16 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpq_init (MP_RAT *x)
  {
-  x->_mp_num._mp_alloc = 1;
-  x->_mp_num._mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
-  x->_mp_num._mp_size = 0;
-  x->_mp_den._mp_alloc = 1;
-  x->_mp_den._mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
-  x->_mp_den._mp_d[0] = 1;
-  x->_mp_den._mp_size = 1;
+  ALLOC(NUM(x)) = 1;
+  PTR(NUM(x)) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  SIZ(NUM(x)) = 0;
+  ALLOC(DEN(x)) = 1;
+  PTR(DEN(x)) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  PTR(DEN(x))[0] = 1;
+  SIZ(DEN(x)) = 1;
  
  #ifdef __CHECKER__
    /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
-  x->_mp_num._mp_d[0] = 0;
+  PTR(NUM(x))[0] = 0;
  #endif
  }
diff --git a/mpq/inp_str.c b/mpq/inp_str.c

index 9df6d8006450ed9efb993e633f5294865a911984..fef2f987dd2786c9d3090f0444026faaec3efe86 100644 (file)
--- a/mpq/inp_str.c
+++ b/mpq/inp_str.c
@@ -32,8 +32,8 @@ mpq_inp_str (mpq_ptr q, FILE *fp, int base)
    if (fp == NULL)
      fp = stdin;
  
-  q->_mp_den._mp_size = 1;
-  q->_mp_den._mp_d[0] = 1;
+  SIZ(DEN(q)) = 1;
+  PTR(DEN(q))[0] = 1;
  
    nread = mpz_inp_str (mpq_numref(q), fp, base);
    if (nread == 0)
@@ -49,11 +49,11 @@ mpq_inp_str (mpq_ptr q, FILE *fp, int base)
  
        nread = mpz_inp_str_nowhite (mpq_denref(q), fp, base, c, nread);
        if (nread == 0)
-        {
-          q->_mp_num._mp_size = 0;
-          q->_mp_den._mp_size = 1;
-          q->_mp_den._mp_d[0] = 1;
-        }
+       {
+         SIZ(NUM(q)) = 0;
+         SIZ(DEN(q)) = 1;
+         PTR(DEN(q))[0] = 1;
+       }
      }
    else
      {
diff --git a/mpq/inv.c b/mpq/inv.c

index 5143686be51d21f2b688be9326a779ae615d02c2..48f0ae75f2f05d3e112157520e0d4707bba8adfe 100644 (file)
--- a/mpq/inv.c
+++ b/mpq/inv.c
@@ -1,7 +1,7 @@
  /* mpq_inv(dest,src) -- invert a rational number, i.e. set DEST to SRC
     with the numerator and denominator swapped.
  
-Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2000, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -22,46 +22,39 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpq_inv (MP_RAT *dest, const MP_RAT *src)
+mpq_inv (mpq_ptr dest, mpq_srcptr src)
  {
-  mp_size_t num_size = src->_mp_num._mp_size;
-  mp_size_t den_size = src->_mp_den._mp_size;
-
-  if (num_size == 0)
-    DIVIDE_BY_ZERO;
+  mp_size_t num_size = SIZ(NUM(src));
+  mp_size_t den_size = SIZ(DEN(src));
  
    if (num_size < 0)
      {
        num_size = -num_size;
        den_size = -den_size;
      }
-  dest->_mp_den._mp_size = num_size;
-  dest->_mp_num._mp_size = den_size;
+  else if (UNLIKELY (num_size == 0))
+    DIVIDE_BY_ZERO;
+
+  SIZ(DEN(dest)) = num_size;
+  SIZ(NUM(dest)) = den_size;
  
-  /* If dest == src we may just swap the numerator and denominator, but
-     we have to ensure the new denominator is positive.  */
+  /* If dest == src we may just swap the numerator and denominator;
+     we ensured that the new denominator is positive.  */
  
    if (dest == src)
      {
-      mp_size_t alloc = dest->_mp_num._mp_alloc;
-      mp_ptr limb_ptr = dest->_mp_num._mp_d;
-
-      dest->_mp_num._mp_alloc = dest->_mp_den._mp_alloc;
-      dest->_mp_num._mp_d = dest->_mp_den._mp_d;
-
-      dest->_mp_den._mp_alloc = alloc;
-      dest->_mp_den._mp_d = limb_ptr;
+      MP_PTR_SWAP (PTR(NUM(dest)), PTR(DEN(dest)));
+      MP_SIZE_T_SWAP (ALLOC(NUM(dest)), ALLOC(DEN(dest)));
      }
    else
      {
-      den_size = ABS (den_size);
-      if (dest->_mp_num._mp_alloc < den_size)
-       _mpz_realloc (&(dest->_mp_num), den_size);
+      mp_ptr dp;
  
-      if (dest->_mp_den._mp_alloc < num_size)
-       _mpz_realloc (&(dest->_mp_den), num_size);
+      den_size = ABS (den_size);
+      dp = MPZ_NEWALLOC (NUM(dest), den_size);
+      MPN_COPY (dp, PTR(DEN(src)), den_size);
  
-      MPN_COPY (dest->_mp_num._mp_d, src->_mp_den._mp_d, den_size);
-      MPN_COPY (dest->_mp_den._mp_d, src->_mp_num._mp_d, num_size);
+      dp = MPZ_NEWALLOC (DEN(dest), num_size);
+      MPN_COPY (dp, PTR(NUM(src)), num_size);
      }
  }
diff --git a/mpq/md_2exp.c b/mpq/md_2exp.c

index 6179ca3f0fb8bf8a5c11ce0c89d2ef8efad3bfee..7b85793d27ec8af4c3e3db7499b8c613dc583bfa 100644 (file)
--- a/mpq/md_2exp.c
+++ b/mpq/md_2exp.c
@@ -1,7 +1,7 @@
  /* mpq_mul_2exp, mpq_div_2exp - multiply or divide by 2^N */
  
  /*
-Copyright 2000, 2002 Free Software Foundation, Inc.
+Copyright 2000, 2002, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -48,14 +48,13 @@ mord_2exp (mpz_ptr ldst, mpz_ptr rdst, mpz_srcptr lsrc, mpz_srcptr rsrc,
  
    /* no realloc here if rsrc==rdst, so p and rsrc_ptr remain valid */
    len -= (p - rsrc_ptr);
-  MPZ_REALLOC (rdst, len);
-  rdst_ptr = PTR(rdst);
+  rdst_ptr = MPZ_REALLOC (rdst, len);
  
    if ((plow & 1) || n == 0)
      {
-      /* need DECR when src==dst */
+      /* need INCR when src==dst */
        if (p != rdst_ptr)
-        MPN_COPY_DECR (rdst_ptr, p, len);
+        MPN_COPY_INCR (rdst_ptr, p, len);
      }
    else
      {
@@ -83,21 +82,19 @@ mord_2exp (mpz_ptr ldst, mpz_ptr rdst, mpz_srcptr lsrc, mpz_srcptr rsrc,
  void
  mpq_mul_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)
  {
-  mord_2exp (mpq_numref (dst), mpq_denref (dst),
-             mpq_numref (src), mpq_denref (src), n);
+  mord_2exp (NUM(dst), DEN(dst), NUM(src), DEN(src), n);
  }
  
  void
  mpq_div_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)
  {
-  if (SIZ (mpq_numref(src)) == 0)
+  if (SIZ(NUM(src)) == 0)
      {
-      dst->_mp_num._mp_size = 0;
-      dst->_mp_den._mp_size = 1;
-      dst->_mp_den._mp_d[0] = 1;
+      SIZ(NUM(dst)) = 0;
+      SIZ(DEN(dst)) = 1;
+      PTR(DEN(dst))[0] = 1;
        return;
      }
  
-  mord_2exp (mpq_denref (dst), mpq_numref (dst),
-             mpq_denref (src), mpq_numref (src), n);
+  mord_2exp (DEN(dst), NUM(dst), DEN(src), NUM(src), n);
  }
diff --git a/mpq/mul.c b/mpq/mul.c

index 0214b31d2772e97821f78602bb04a68ab04bbdd8..839308b42a6bddcde43c063beb6dad6eeb0c5aa5 100644 (file)
--- a/mpq/mul.c
+++ b/mpq/mul.c
@@ -42,18 +42,18 @@ mpq_mul (mpq_ptr prod, mpq_srcptr op1, mpq_srcptr op2)
        return;
      }
  
-  op1_num_size = ABS (op1->_mp_num._mp_size);
-  op1_den_size =      op1->_mp_den._mp_size;
-  op2_num_size = ABS (op2->_mp_num._mp_size);
-  op2_den_size =      op2->_mp_den._mp_size;
+  op1_num_size = ABSIZ(NUM(op1));
+  op1_den_size =   SIZ(DEN(op1));
+  op2_num_size = ABSIZ(NUM(op2));
+  op2_den_size =   SIZ(DEN(op2));
  
    if (op1_num_size == 0 || op2_num_size == 0)
      {
        /* We special case this to simplify allocation logic; gcd(0,x) = x
          is a singular case for the allocations.  */
-      prod->_mp_num._mp_size = 0;
-      prod->_mp_den._mp_d[0] = 1;
-      prod->_mp_den._mp_size = 1;
+      SIZ(NUM(prod)) = 0;
+      PTR(DEN(prod))[0] = 1;
+      SIZ(DEN(prod)) = 1;
        return;
      }
  
@@ -76,18 +76,18 @@ mpq_mul (mpq_ptr prod, mpq_srcptr op1, mpq_srcptr op2)
       numerator of PROD when we are finished with the numerators of OP1 and
       OP2.  */
  
-  mpz_gcd (gcd1, &(op1->_mp_num), &(op2->_mp_den));
-  mpz_gcd (gcd2, &(op2->_mp_num), &(op1->_mp_den));
+  mpz_gcd (gcd1, NUM(op1), DEN(op2));
+  mpz_gcd (gcd2, NUM(op2), DEN(op1));
  
-  mpz_divexact_gcd (tmp1, &(op1->_mp_num), gcd1);
-  mpz_divexact_gcd (tmp2, &(op2->_mp_num), gcd2);
+  mpz_divexact_gcd (tmp1, NUM(op1), gcd1);
+  mpz_divexact_gcd (tmp2, NUM(op2), gcd2);
  
-  mpz_mul (&(prod->_mp_num), tmp1, tmp2);
+  mpz_mul (NUM(prod), tmp1, tmp2);
  
-  mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd1);
-  mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd2);
+  mpz_divexact_gcd (tmp1, DEN(op2), gcd1);
+  mpz_divexact_gcd (tmp2, DEN(op1), gcd2);
  
-  mpz_mul (&(prod->_mp_den), tmp1, tmp2);
+  mpz_mul (DEN(prod), tmp1, tmp2);
  
    TMP_FREE;
  }
diff --git a/mpq/neg.c b/mpq/neg.c

index 972f3347029c0ff6fb98a1346f03780b513b39c0..32115c2fcb2e928e78e332b6c52d238c39d1bbd2 100644 (file)
--- a/mpq/neg.c
+++ b/mpq/neg.c
@@ -1,6 +1,6 @@
  /* mpq_neg -- negate a rational.
  
-Copyright 2000, 2001 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,21 +26,22 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpq_neg (mpq_ptr dst, mpq_srcptr src)
  {
-  mp_size_t  num_size = src->_mp_num._mp_size;
+  mp_size_t  num_size = SIZ(NUM(src));
  
    if (src != dst)
      {
-      mp_size_t  num_abs_size = ABS(num_size);
-      mp_size_t  den_size = src->_mp_den._mp_size;
+      mp_size_t  size;
+      mp_ptr dp;
  
-      MPZ_REALLOC (mpq_numref(dst), num_abs_size);
-      MPZ_REALLOC (mpq_denref(dst), den_size);
+      size = ABS(num_size);
+      dp = MPZ_NEWALLOC (NUM(dst), size);
+      MPN_COPY (dp, PTR(NUM(src)), size);
  
-      MPN_COPY (dst->_mp_num._mp_d, src->_mp_num._mp_d, num_abs_size);
-      MPN_COPY (dst->_mp_den._mp_d, src->_mp_den._mp_d, den_size);
-
-      dst->_mp_den._mp_size = den_size;
+      size = SIZ(DEN(src));
+      dp = MPZ_NEWALLOC (DEN(dst), size);
+      SIZ(DEN(dst)) = size;
+      MPN_COPY (dp, PTR(DEN(src)), size);
      }
  
-  dst->_mp_num._mp_size = -num_size;
+  SIZ(NUM(dst)) = -num_size;
  }
diff --git a/mpq/set.c b/mpq/set.c

index 5d527be95a6c0fc0efd132684d288a2b3fdc7f00..b2dad7e591296d62f11d729f3e99bff29e849dcd 100644 (file)
--- a/mpq/set.c
+++ b/mpq/set.c
@@ -1,6 +1,6 @@
  /* mpq_set(dest,src) -- Set DEST to SRC.
  
-Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,21 +21,20 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpq_set (MP_RAT *dest, const MP_RAT *src)
+mpq_set (mpq_ptr dest, mpq_srcptr src)
  {
    mp_size_t num_size, den_size;
    mp_size_t abs_num_size;
+  mp_ptr dp;
  
-  num_size = src->_mp_num._mp_size;
+  num_size = SIZ(NUM(src));
    abs_num_size = ABS (num_size);
-  if (dest->_mp_num._mp_alloc < abs_num_size)
-    _mpz_realloc (&(dest->_mp_num), abs_num_size);
-  MPN_COPY (dest->_mp_num._mp_d, src->_mp_num._mp_d, abs_num_size);
-  dest->_mp_num._mp_size = num_size;
-
-  den_size = src->_mp_den._mp_size;
-  if (dest->_mp_den._mp_alloc < den_size)
-    _mpz_realloc (&(dest->_mp_den), den_size);
-  MPN_COPY (dest->_mp_den._mp_d, src->_mp_den._mp_d, den_size);
-  dest->_mp_den._mp_size = den_size;
+  dp = MPZ_NEWALLOC (NUM(dest), abs_num_size);
+  SIZ(NUM(dest)) = num_size;
+  MPN_COPY (dp, PTR(NUM(src)), abs_num_size);
+
+  den_size = SIZ(DEN(src));
+  dp = MPZ_NEWALLOC (DEN(dest), den_size);
+  SIZ(DEN(dest)) = den_size;
+  MPN_COPY (dp, PTR(DEN(src)), den_size);
  }
diff --git a/mpq/set_d.c b/mpq/set_d.c

index 1e806f80abd8437076eaeb48cc8758ca3cd09725..c56ef5d69c7371591a28b6d3da280cd4078b37f9 100644 (file)
--- a/mpq/set_d.c
+++ b/mpq/set_d.c
@@ -1,6 +1,6 @@
  /* mpq_set_d(mpq_t q, double d) -- Set q to d without rounding.
  
-Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
+Copyright 2000, 2002, 2003, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -64,15 +64,14 @@ mpq_set_d (mpq_ptr dest, double d)
      {
        if (d == 0.0)
         {
-         SIZ(&(dest->_mp_num)) = 0;
-         SIZ(&(dest->_mp_den)) = 1;
-         PTR(&(dest->_mp_den))[0] = 1;
+         SIZ(NUM(dest)) = 0;
+         SIZ(DEN(dest)) = 1;
+         PTR(DEN(dest))[0] = 1;
           return;
         }
  
        dn = -exp;
-      MPZ_REALLOC (&(dest->_mp_num), 3);
-      np = PTR(&(dest->_mp_num));
+      np = MPZ_NEWALLOC (NUM(dest), 3);
  #if LIMBS_PER_DOUBLE == 4
        if ((tp[0] | tp[1] | tp[2]) == 0)
         np[0] = tp[3], nn = 1;
@@ -99,8 +98,7 @@ mpq_set_d (mpq_ptr dest, double d)
  #endif
        dn += nn + 1;
        ASSERT_ALWAYS (dn > 0);
-      MPZ_REALLOC (&(dest->_mp_den), dn);
-      dp = PTR(&(dest->_mp_den));
+      dp = MPZ_NEWALLOC (DEN(dest), dn);
        MPN_ZERO (dp, dn - 1);
        dp[dn - 1] = 1;
        count_trailing_zeros (c, np[0] | dp[0]);
@@ -111,14 +109,13 @@ mpq_set_d (mpq_ptr dest, double d)
           mpn_rshift (dp, dp, dn, c);
           dn -= dp[dn - 1] == 0;
         }
-      SIZ(&(dest->_mp_den)) = dn;
-      SIZ(&(dest->_mp_num)) = negative ? -nn : nn;
+      SIZ(DEN(dest)) = dn;
+      SIZ(NUM(dest)) = negative ? -nn : nn;
      }
    else
      {
        nn = exp;
-      MPZ_REALLOC (&(dest->_mp_num), nn);
-      np = PTR(&(dest->_mp_num));
+      np = MPZ_NEWALLOC (NUM(dest), nn);
        switch (nn)
          {
         default:
@@ -150,9 +147,9 @@ mpq_set_d (mpq_ptr dest, double d)
           break;
  #endif
         }
-      dp = PTR(&(dest->_mp_den));
+      dp = PTR(DEN(dest));
        dp[0] = 1;
-      SIZ(&(dest->_mp_den)) = 1;
-      SIZ(&(dest->_mp_num)) = negative ? -nn : nn;
+      SIZ(DEN(dest)) = 1;
+      SIZ(NUM(dest)) = negative ? -nn : nn;
      }
  }
diff --git a/mpq/set_den.c b/mpq/set_den.c

index 641c97da647fc011900b4e313a4a8c84319cbdab..e9ee17b71e528889251952e7834e1f66f0b4f6b9 100644 (file)
--- a/mpq/set_den.c
+++ b/mpq/set_den.c
@@ -1,6 +1,7 @@
  /* mpq_set_den(dest,den) -- Set the denominator of DEST from DEN.
  
-Copyright 1991, 1994, 1995, 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 1996, 2000, 2001, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,14 +22,14 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpq_set_den (MP_RAT *dest, const MP_INT *den)
+mpq_set_den (mpq_ptr dest, mpz_srcptr den)
  {
-  mp_size_t size = den->_mp_size;
+  mp_size_t size = SIZ (den);
    mp_size_t abs_size = ABS (size);
+  mp_ptr dp;
  
-  if (dest->_mp_den._mp_alloc < abs_size)
-    _mpz_realloc (&(dest->_mp_den), abs_size);
+  dp = MPZ_NEWALLOC (DEN(dest), abs_size);
  
-  MPN_COPY (dest->_mp_den._mp_d, den->_mp_d, abs_size);
-  dest->_mp_den._mp_size = size;
+  SIZ(DEN(dest)) = size;
+  MPN_COPY (dp, PTR(den), abs_size);
  }
diff --git a/mpq/set_f.c b/mpq/set_f.c

index ba15844abaf79db59d9e1ac4b25078329add80e2..d1e14867be0a3722b8d301ca93fe45862050265f 100644 (file)
--- a/mpq/set_f.c
+++ b/mpq/set_f.c
@@ -34,9 +34,9 @@ mpq_set_f (mpq_ptr q, mpf_srcptr f)
    if (fsize == 0)
      {
        /* set q=0 */
-      q->_mp_num._mp_size = 0;
-      q->_mp_den._mp_size = 1;
-      q->_mp_den._mp_d[0] = 1;
+      SIZ(NUM(q)) = 0;
+      SIZ(DEN(q)) = 1;
+      PTR(DEN(q))[0] = 1;
        return;
      }
  
@@ -49,14 +49,13 @@ mpq_set_f (mpq_ptr q, mpf_srcptr f)
        /* radix point is to the right of the limbs, no denominator */
        mp_ptr  num_ptr;
  
-      MPZ_REALLOC (mpq_numref (q), fexp);
-      num_ptr = q->_mp_num._mp_d;
+      num_ptr = MPZ_NEWALLOC (mpq_numref (q), fexp);
        MPN_ZERO (num_ptr, fexp - abs_fsize);
        MPN_COPY (num_ptr + fexp - abs_fsize, fptr, abs_fsize);
  
-      q->_mp_num._mp_size = fsize >= 0 ? fexp : -fexp;
-      q->_mp_den._mp_size = 1;
-      q->_mp_den._mp_d[0] = 1;
+      SIZ(NUM(q)) = fsize >= 0 ? fexp : -fexp;
+      SIZ(DEN(q)) = 1;
+      PTR(DEN(q))[0] = 1;
      }
    else
      {
@@ -65,10 +64,8 @@ mpq_set_f (mpq_ptr q, mpf_srcptr f)
        mp_size_t  den_size;
  
        den_size = abs_fsize - fexp;
-      MPZ_REALLOC (mpq_numref (q), abs_fsize);
-      MPZ_REALLOC (mpq_denref (q), den_size+1);
-      num_ptr = q->_mp_num._mp_d;
-      den_ptr = q->_mp_den._mp_d;
+      num_ptr = MPZ_NEWALLOC (mpq_numref (q), abs_fsize);
+      den_ptr = MPZ_NEWALLOC (mpq_denref (q), den_size+1);
  
        if (flow & 1)
          {
@@ -93,7 +90,7 @@ mpq_set_f (mpq_ptr q, mpf_srcptr f)
            den_ptr[den_size] = GMP_LIMB_HIGHBIT >> (shift-1);
          }
  
-      q->_mp_num._mp_size = fsize >= 0 ? abs_fsize : -abs_fsize;
-      q->_mp_den._mp_size = den_size + 1;
+      SIZ(NUM(q)) = fsize >= 0 ? abs_fsize : -abs_fsize;
+      SIZ(DEN(q)) = den_size + 1;
      }
  }
diff --git a/mpq/set_num.c b/mpq/set_num.c

index 6c3c5646ac6c3c49f95c74016bb3a00bc12e1985..5b018c808bd095113f2e1210e5a49f7d9c704402 100644 (file)
--- a/mpq/set_num.c
+++ b/mpq/set_num.c
@@ -1,6 +1,6 @@
  /* mpq_set_num(dest,num) -- Set the numerator of DEST from NUM.
  
-Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,14 +21,14 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpq_set_num (MP_RAT *dest, const MP_INT *num)
+mpq_set_num (mpq_ptr dest, mpz_srcptr num)
  {
-  mp_size_t size = num->_mp_size;
+  mp_size_t size = SIZ (num);
    mp_size_t abs_size = ABS (size);
+  mp_ptr dp;
  
-  if (dest->_mp_num._mp_alloc < abs_size)
-    _mpz_realloc (&(dest->_mp_num), abs_size);
+  dp = MPZ_NEWALLOC (NUM(dest), abs_size);
  
-  MPN_COPY (dest->_mp_num._mp_d, num->_mp_d, abs_size);
-  dest->_mp_num._mp_size = size;
+  SIZ(NUM(dest)) = size;
+  MPN_COPY (dp, PTR(num), abs_size);
  }
diff --git a/mpq/set_si.c b/mpq/set_si.c

index 2d2bd4aa04c33c23f8f1a9c88261df69ae320196..f7499aa7c927fd6a756cf7157543459d5286b0ec 100644 (file)
--- a/mpq/set_si.c
+++ b/mpq/set_si.c
@@ -41,14 +41,14 @@ mpq_set_si (MP_RAT *dest, signed long int num, unsigned long int den)
      {
        /* Canonicalize 0/d to 0/1.  */
        den = 1;
-      dest->_mp_num._mp_size = 0;
+      SIZ(NUM(dest)) = 0;
      }
    else
      {
-      dest->_mp_num._mp_d[0] = abs_num;
-      dest->_mp_num._mp_size = num > 0 ? 1 : -1;
+      PTR(NUM(dest))[0] = abs_num;
+      SIZ(NUM(dest)) = num > 0 ? 1 : -1;
      }
  
-  dest->_mp_den._mp_d[0] = den;
-  dest->_mp_den._mp_size = (den != 0);
+  PTR(DEN(dest))[0] = den;
+  SIZ(DEN(dest)) = (den != 0);
  }
diff --git a/mpq/set_str.c b/mpq/set_str.c

index fd1c415f98605ec33f2ba2fdcbbfd7c3cf2ab2ae..07a18bb17fd782ddc74ff5fd0052ae8cd44ab174 100644 (file)
--- a/mpq/set_str.c
+++ b/mpq/set_str.c
@@ -38,8 +38,8 @@ mpq_set_str (mpq_ptr q, const char *str, int base)
    slash = strchr (str, '/');
    if (slash == NULL)
      {
-      q->_mp_den._mp_size = 1;
-      q->_mp_den._mp_d[0] = 1;
+      SIZ(DEN(q)) = 1;
+      PTR(DEN(q))[0] = 1;
  
        return mpz_set_str (mpq_numref(q), str, base);
      }
diff --git a/mpq/set_ui.c b/mpq/set_ui.c

index 8f0a9cd0cb934ba9d423bb641e77d87be045bdc8..231e07d2a44c9de0a74b108416468edb274dcf9d 100644 (file)
--- a/mpq/set_ui.c
+++ b/mpq/set_ui.c
@@ -37,14 +37,14 @@ mpq_set_ui (MP_RAT *dest, unsigned long int num, unsigned long int den)
      {
        /* Canonicalize 0/n to 0/1.  */
        den = 1;
-      dest->_mp_num._mp_size = 0;
+      SIZ(NUM(dest)) = 0;
      }
    else
      {
-      dest->_mp_num._mp_d[0] = num;
-      dest->_mp_num._mp_size = 1;
+      PTR(NUM(dest))[0] = num;
+      SIZ(NUM(dest)) = 1;
      }
  
-  dest->_mp_den._mp_d[0] = den;
-  dest->_mp_den._mp_size = (den != 0);
+  PTR(DEN(dest))[0] = den;
+  SIZ(DEN(dest)) = (den != 0);
  }
diff --git a/mpq/set_z.c b/mpq/set_z.c

index 8ca980c4a6a2329b0e1de0d3c8fc2bf0d80c88c7..e6f3ff0b62294655d3a1f06ccf1680480105a00c 100644 (file)
--- a/mpq/set_z.c
+++ b/mpq/set_z.c
@@ -1,6 +1,6 @@
  /* mpq_set_z (dest,src) -- Set DEST to SRC.
  
-Copyright 1996, 2001 Free Software Foundation, Inc.
+Copyright 1996, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,14 +25,14 @@ mpq_set_z (mpq_ptr dest, mpz_srcptr src)
  {
    mp_size_t num_size;
    mp_size_t abs_num_size;
+  mp_ptr dp;
  
-  num_size = src->_mp_size;
+  num_size = SIZ (src);
    abs_num_size = ABS (num_size);
-  if (dest->_mp_num._mp_alloc < abs_num_size)
-    _mpz_realloc (&(dest->_mp_num), abs_num_size);
-  MPN_COPY (dest->_mp_num._mp_d, src->_mp_d, abs_num_size);
-  dest->_mp_num._mp_size = num_size;
+  dp = MPZ_NEWALLOC (NUM(dest), abs_num_size);
+  SIZ(NUM(dest)) = num_size;
+  MPN_COPY (dp, PTR(src), abs_num_size);
  
-  dest->_mp_den._mp_d[0] = 1;
-  dest->_mp_den._mp_size = 1;
+  PTR(DEN(dest))[0] = 1;
+  SIZ(DEN(dest)) = 1;
  }
diff --git a/mpq/swap.c b/mpq/swap.c

index e1d96cc47fba9e6d6cb7e02076e5af283a6d84a2..0736c8779fabada4dc26af32983767ab33025ddb 100644 (file)
--- a/mpq/swap.c
+++ b/mpq/swap.c
@@ -27,34 +27,34 @@ mpq_swap (mpq_ptr u, mpq_ptr v) __GMP_NOTHROW
    mp_size_t usize, vsize;
    mp_size_t ualloc, valloc;
  
-  ualloc = u->_mp_num._mp_alloc;
-  valloc = v->_mp_num._mp_alloc;
-  v->_mp_num._mp_alloc = ualloc;
-  u->_mp_num._mp_alloc = valloc;
-
-  usize = u->_mp_num._mp_size;
-  vsize = v->_mp_num._mp_size;
-  v->_mp_num._mp_size = usize;
-  u->_mp_num._mp_size = vsize;
-
-  up = u->_mp_num._mp_d;
-  vp = v->_mp_num._mp_d;
-  v->_mp_num._mp_d = up;
-  u->_mp_num._mp_d = vp;
-
-
-  ualloc = u->_mp_den._mp_alloc;
-  valloc = v->_mp_den._mp_alloc;
-  v->_mp_den._mp_alloc = ualloc;
-  u->_mp_den._mp_alloc = valloc;
-
-  usize = u->_mp_den._mp_size;
-  vsize = v->_mp_den._mp_size;
-  v->_mp_den._mp_size = usize;
-  u->_mp_den._mp_size = vsize;
-
-  up = u->_mp_den._mp_d;
-  vp = v->_mp_den._mp_d;
-  v->_mp_den._mp_d = up;
-  u->_mp_den._mp_d = vp;
+  ualloc = ALLOC(NUM(u));
+  valloc = ALLOC(NUM(v));
+  ALLOC(NUM(v)) = ualloc;
+  ALLOC(NUM(u)) = valloc;
+
+  usize = SIZ(NUM(u));
+  vsize = SIZ(NUM(v));
+  SIZ(NUM(v)) = usize;
+  SIZ(NUM(u)) = vsize;
+
+  up = PTR(NUM(u));
+  vp = PTR(NUM(v));
+  PTR(NUM(v)) = up;
+  PTR(NUM(u)) = vp;
+
+
+  ualloc = ALLOC(DEN(u));
+  valloc = ALLOC(DEN(v));
+  ALLOC(DEN(v)) = ualloc;
+  ALLOC(DEN(u)) = valloc;
+
+  usize = SIZ(DEN(u));
+  vsize = SIZ(DEN(v));
+  SIZ(DEN(v)) = usize;
+  SIZ(DEN(u)) = vsize;
+
+  up = PTR(DEN(u));
+  vp = PTR(DEN(v));
+  PTR(DEN(v)) = up;
+  PTR(DEN(u)) = vp;
  }
diff --git a/mpz/2fac_ui.c b/mpz/2fac_ui.c

new file mode 100644 (file)

index 0000000..2fd7c7f
--- /dev/null
+++ b/mpz/2fac_ui.c
@@ -0,0 +1,89 @@
+/* mpz_2fac_ui(RESULT, N) -- Set RESULT to N!!.
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)               \
+  do {                                                         \
+    if ((PR) > (MAX_PR)) {                                     \
+      (VEC)[(I)++] = (PR);                                     \
+      (PR) = (P);                                              \
+    } else                                                     \
+      (PR) *= (P);                                             \
+  } while (0)
+
+#define FAC_2DSC_THRESHOLD ((FAC_DSC_THRESHOLD << 1) | (FAC_DSC_THRESHOLD & 1))
+#define FACTORS_PER_LIMB   (GMP_NUMB_BITS / (LOG2C(FAC_2DSC_THRESHOLD-1)+1))
+
+/* Computes n!!, the 2-multi-factorial of n. (aka double-factorial or semi-factorial)
+   WARNING: it assumes that n fits in a limb!
+ */
+void
+mpz_2fac_ui (mpz_ptr x, unsigned long n)
+{
+  ASSERT (n <= GMP_NUMB_MAX);
+
+  if ((n & 1) == 0) { /* n is even, n = 2k, (2k)!! = k! 2^k */
+    mp_limb_t count;
+
+    if ((n <= TABLE_LIMIT_2N_MINUS_POPC_2N) & (n != 0))
+      count = __gmp_fac2cnt_table[n / 2 - 1];
+    else
+      {
+       popc_limb (count, n);   /* popc(n) == popc(k) */
+       count = n - count;              /* n - popc(n) == k + k - popc(k) */
+      }
+    mpz_oddfac_1 (x, n >> 1, 0);
+    mpz_mul_2exp (x, x, count);
+  } else { /* n is odd */
+    if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT) {
+       PTR (x)[0] = __gmp_odd2fac_table[n >> 1];
+       SIZ (x) = 1;
+    } else if (BELOW_THRESHOLD (n, FAC_2DSC_THRESHOLD)) { /* odd basecase, */
+      mp_limb_t *factors, prod, max_prod, j;
+      TMP_SDECL;
+
+      /* FIXME: we might alloc a fixed ammount 1+FAC_2DSC_THRESHOLD/FACTORS_PER_LIMB */
+      TMP_SMARK;
+      factors = TMP_SALLOC_LIMBS (1 + n / (2 * FACTORS_PER_LIMB));
+
+      factors[0] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
+      j = 1;
+      prod = n;
+
+      max_prod = GMP_NUMB_MAX / FAC_2DSC_THRESHOLD;
+      while ((n -= 2) > ODD_DOUBLEFACTORIAL_TABLE_LIMIT)
+       FACTOR_LIST_STORE (n, prod, max_prod, factors, j);
+
+      factors[j++] = prod;
+      mpz_prodlimbs (x, factors, j);
+
+      TMP_SFREE;
+    } else { /* for the asymptotically fast odd case, let oddfac do the job. */
+      mpz_oddfac_1 (x, n, 1);
+    }
+  }
+}
+
+#undef FACTORS_PER_LIMB
+#undef FACTOR_LIST_STORE
+#undef FAC_2DSC_THRESHOLD
diff --git a/mpz/Makefile.am b/mpz/Makefile.am

index 74c2c344432128e04ea82fff638326ede006fa22..cfc2d71ce7648526df959532e13d5982b2d143a4 100644 (file)
--- a/mpz/Makefile.am
+++ b/mpz/Makefile.am
@@ -1,6 +1,6 @@
  ## Process this file with automake to generate Makefile.in
  
-# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003 Free Software
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2012 Free Software
  # Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
@@ -23,6 +23,7 @@ INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
  
  noinst_LTLIBRARIES = libmpz.la
  libmpz_la_SOURCES = aors.h aors_ui.h fits_s.h mul_i.h \
+  2fac_ui.c \
    add.c add_ui.c abs.c aorsmul.c aorsmul_i.c and.c array_init.c \
    bin_ui.c bin_uiui.c cdiv_q.c \
    cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
@@ -42,19 +43,14 @@ libmpz_la_SOURCES = aors.h aors_ui.h fits_s.h mul_i.h \
    import.c init.c init2.c inits.c inp_raw.c inp_str.c \
    invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
    jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
-  lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c millerrabin.c \
+  lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c mfac_uiui.c millerrabin.c \
    mod.c mul.c mul_2exp.c mul_si.c mul_ui.c n_pow_ui.c neg.c nextprime.c \
+  oddfac_1.c \
    out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
-  powm_sec.c powm_ui.c pprime_p.c random.c random2.c \
+  powm_sec.c powm_ui.c pprime_p.c prodlimbs.c primorial_ui.c random.c random2.c \
    realloc.c realloc2.c remove.c root.c rootrem.c rrandomb.c \
    scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
    set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c sub_ui.c \
    swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \
    tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \
    ui_sub.c urandomb.c urandomm.c xor.c
-
-# These are BUILT_SOURCES at the top-level, so normally they're built before
-# recursing into this directory.
-#
-fac_ui.h:
-       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpz/fac_ui.h
diff --git a/mpz/Makefile.in b/mpz/Makefile.in

index b0b75bafab9316b44f6296ed0e81d72c776baf81..c002ac75b93668b298649592895a855a3973ae46 100644 (file)
--- a/mpz/Makefile.in
+++ b/mpz/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -15,7 +15,7 @@
  
  @SET_MAKE@
  
-# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003 Free Software
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2012 Free Software
  # Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
@@ -34,6 +34,23 @@
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -52,12 +69,11 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  subdir = mpz
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -66,40 +82,35 @@ CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  LTLIBRARIES = $(noinst_LTLIBRARIES)
  libmpz_la_LIBADD =
-am_libmpz_la_OBJECTS = add$U.lo add_ui$U.lo abs$U.lo aorsmul$U.lo \
-       aorsmul_i$U.lo and$U.lo array_init$U.lo bin_ui$U.lo \
-       bin_uiui$U.lo cdiv_q$U.lo cdiv_q_ui$U.lo cdiv_qr$U.lo \
-       cdiv_qr_ui$U.lo cdiv_r$U.lo cdiv_r_ui$U.lo cdiv_ui$U.lo \
-       cfdiv_q_2exp$U.lo cfdiv_r_2exp$U.lo clear$U.lo clears$U.lo \
-       clrbit$U.lo cmp$U.lo cmp_d$U.lo cmp_si$U.lo cmp_ui$U.lo \
-       cmpabs$U.lo cmpabs_d$U.lo cmpabs_ui$U.lo com$U.lo combit$U.lo \
-       cong$U.lo cong_2exp$U.lo cong_ui$U.lo divexact$U.lo \
-       divegcd$U.lo dive_ui$U.lo divis$U.lo divis_ui$U.lo \
-       divis_2exp$U.lo dump$U.lo export$U.lo fac_ui$U.lo fdiv_q$U.lo \
-       fdiv_q_ui$U.lo fdiv_qr$U.lo fdiv_qr_ui$U.lo fdiv_r$U.lo \
-       fdiv_r_ui$U.lo fdiv_ui$U.lo fib_ui$U.lo fib2_ui$U.lo \
-       fits_sint$U.lo fits_slong$U.lo fits_sshort$U.lo fits_uint$U.lo \
-       fits_ulong$U.lo fits_ushort$U.lo gcd$U.lo gcd_ui$U.lo \
-       gcdext$U.lo get_d$U.lo get_d_2exp$U.lo get_si$U.lo \
-       get_str$U.lo get_ui$U.lo getlimbn$U.lo hamdist$U.lo \
-       import$U.lo init$U.lo init2$U.lo inits$U.lo inp_raw$U.lo \
-       inp_str$U.lo invert$U.lo ior$U.lo iset$U.lo iset_d$U.lo \
-       iset_si$U.lo iset_str$U.lo iset_ui$U.lo jacobi$U.lo \
-       kronsz$U.lo kronuz$U.lo kronzs$U.lo kronzu$U.lo lcm$U.lo \
-       lcm_ui$U.lo lucnum_ui$U.lo lucnum2_ui$U.lo millerrabin$U.lo \
-       mod$U.lo mul$U.lo mul_2exp$U.lo mul_si$U.lo mul_ui$U.lo \
-       n_pow_ui$U.lo neg$U.lo nextprime$U.lo out_raw$U.lo \
-       out_str$U.lo perfpow$U.lo perfsqr$U.lo popcount$U.lo \
-       pow_ui$U.lo powm$U.lo powm_sec$U.lo powm_ui$U.lo pprime_p$U.lo \
-       random$U.lo random2$U.lo realloc$U.lo realloc2$U.lo \
-       remove$U.lo root$U.lo rootrem$U.lo rrandomb$U.lo scan0$U.lo \
-       scan1$U.lo set$U.lo set_d$U.lo set_f$U.lo set_q$U.lo \
-       set_si$U.lo set_str$U.lo set_ui$U.lo setbit$U.lo size$U.lo \
-       sizeinbase$U.lo sqrt$U.lo sqrtrem$U.lo sub$U.lo sub_ui$U.lo \
-       swap$U.lo tdiv_ui$U.lo tdiv_q$U.lo tdiv_q_2exp$U.lo \
-       tdiv_q_ui$U.lo tdiv_qr$U.lo tdiv_qr_ui$U.lo tdiv_r$U.lo \
-       tdiv_r_2exp$U.lo tdiv_r_ui$U.lo tstbit$U.lo ui_pow_ui$U.lo \
-       ui_sub$U.lo urandomb$U.lo urandomm$U.lo xor$U.lo
+am_libmpz_la_OBJECTS = 2fac_ui.lo add.lo add_ui.lo abs.lo aorsmul.lo \
+       aorsmul_i.lo and.lo array_init.lo bin_ui.lo bin_uiui.lo \
+       cdiv_q.lo cdiv_q_ui.lo cdiv_qr.lo cdiv_qr_ui.lo cdiv_r.lo \
+       cdiv_r_ui.lo cdiv_ui.lo cfdiv_q_2exp.lo cfdiv_r_2exp.lo \
+       clear.lo clears.lo clrbit.lo cmp.lo cmp_d.lo cmp_si.lo \
+       cmp_ui.lo cmpabs.lo cmpabs_d.lo cmpabs_ui.lo com.lo combit.lo \
+       cong.lo cong_2exp.lo cong_ui.lo divexact.lo divegcd.lo \
+       dive_ui.lo divis.lo divis_ui.lo divis_2exp.lo dump.lo \
+       export.lo fac_ui.lo fdiv_q.lo fdiv_q_ui.lo fdiv_qr.lo \
+       fdiv_qr_ui.lo fdiv_r.lo fdiv_r_ui.lo fdiv_ui.lo fib_ui.lo \
+       fib2_ui.lo fits_sint.lo fits_slong.lo fits_sshort.lo \
+       fits_uint.lo fits_ulong.lo fits_ushort.lo gcd.lo gcd_ui.lo \
+       gcdext.lo get_d.lo get_d_2exp.lo get_si.lo get_str.lo \
+       get_ui.lo getlimbn.lo hamdist.lo import.lo init.lo init2.lo \
+       inits.lo inp_raw.lo inp_str.lo invert.lo ior.lo iset.lo \
+       iset_d.lo iset_si.lo iset_str.lo iset_ui.lo jacobi.lo \
+       kronsz.lo kronuz.lo kronzs.lo kronzu.lo lcm.lo lcm_ui.lo \
+       lucnum_ui.lo lucnum2_ui.lo mfac_uiui.lo millerrabin.lo mod.lo \
+       mul.lo mul_2exp.lo mul_si.lo mul_ui.lo n_pow_ui.lo neg.lo \
+       nextprime.lo oddfac_1.lo out_raw.lo out_str.lo perfpow.lo \
+       perfsqr.lo popcount.lo pow_ui.lo powm.lo powm_sec.lo \
+       powm_ui.lo pprime_p.lo prodlimbs.lo primorial_ui.lo random.lo \
+       random2.lo realloc.lo realloc2.lo remove.lo root.lo rootrem.lo \
+       rrandomb.lo scan0.lo scan1.lo set.lo set_d.lo set_f.lo \
+       set_q.lo set_si.lo set_str.lo set_ui.lo setbit.lo size.lo \
+       sizeinbase.lo sqrt.lo sqrtrem.lo sub.lo sub_ui.lo swap.lo \
+       tdiv_ui.lo tdiv_q.lo tdiv_q_2exp.lo tdiv_q_ui.lo tdiv_qr.lo \
+       tdiv_qr_ui.lo tdiv_r.lo tdiv_r_2exp.lo tdiv_r_ui.lo tstbit.lo \
+       ui_pow_ui.lo ui_sub.lo urandomb.lo urandomm.lo xor.lo
  libmpz_la_OBJECTS = $(am_libmpz_la_OBJECTS)
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
  depcomp =
@@ -115,6 +126,11 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(libmpz_la_SOURCES)
  DIST_SOURCES = $(libmpz_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -216,8 +232,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -264,7 +280,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -281,6 +296,7 @@ top_srcdir = @top_srcdir@
  INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
  noinst_LTLIBRARIES = libmpz.la
  libmpz_la_SOURCES = aors.h aors_ui.h fits_s.h mul_i.h \
+  2fac_ui.c \
    add.c add_ui.c abs.c aorsmul.c aorsmul_i.c and.c array_init.c \
    bin_ui.c bin_uiui.c cdiv_q.c \
    cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
@@ -300,10 +316,11 @@ libmpz_la_SOURCES = aors.h aors_ui.h fits_s.h mul_i.h \
    import.c init.c init2.c inits.c inp_raw.c inp_str.c \
    invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
    jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
-  lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c millerrabin.c \
+  lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c mfac_uiui.c millerrabin.c \
    mod.c mul.c mul_2exp.c mul_si.c mul_ui.c n_pow_ui.c neg.c nextprime.c \
+  oddfac_1.c \
    out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
-  powm_sec.c powm_ui.c pprime_p.c random.c random2.c \
+  powm_sec.c powm_ui.c pprime_p.c prodlimbs.c primorial_ui.c random.c random2.c \
    realloc.c realloc2.c remove.c root.c rootrem.c rrandomb.c \
    scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
    set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c sub_ui.c \
@@ -354,7 +371,7 @@ clean-noinstLTLIBRARIES:
           echo "rm -f \"$${dir}/so_locations\""; \
           rm -f "$${dir}/so_locations"; \
         done
-libmpz.la: $(libmpz_la_OBJECTS) $(libmpz_la_DEPENDENCIES) 
+libmpz.la: $(libmpz_la_OBJECTS) $(libmpz_la_DEPENDENCIES) $(EXTRA_libmpz_la_DEPENDENCIES) 
         $(LINK)  $(libmpz_la_OBJECTS) $(libmpz_la_LIBADD) $(LIBS)
  
  mostlyclean-compile:
@@ -362,11 +379,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -376,375 +388,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-abs_.c: abs.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/abs.c; then echo $(srcdir)/abs.c; else echo abs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_.c: add.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_ui_.c: add_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_ui.c; then echo $(srcdir)/add_ui.c; else echo add_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-and_.c: and.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/and.c; then echo $(srcdir)/and.c; else echo and.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-aorsmul_.c: aorsmul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aorsmul.c; then echo $(srcdir)/aorsmul.c; else echo aorsmul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-aorsmul_i_.c: aorsmul_i.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aorsmul_i.c; then echo $(srcdir)/aorsmul_i.c; else echo aorsmul_i.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-array_init_.c: array_init.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/array_init.c; then echo $(srcdir)/array_init.c; else echo array_init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bin_ui_.c: bin_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bin_ui.c; then echo $(srcdir)/bin_ui.c; else echo bin_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bin_uiui_.c: bin_uiui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bin_uiui.c; then echo $(srcdir)/bin_uiui.c; else echo bin_uiui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_q_.c: cdiv_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_q.c; then echo $(srcdir)/cdiv_q.c; else echo cdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_q_ui_.c: cdiv_q_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_q_ui.c; then echo $(srcdir)/cdiv_q_ui.c; else echo cdiv_q_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_qr_.c: cdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_qr.c; then echo $(srcdir)/cdiv_qr.c; else echo cdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_qr_ui_.c: cdiv_qr_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_qr_ui.c; then echo $(srcdir)/cdiv_qr_ui.c; else echo cdiv_qr_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_r_.c: cdiv_r.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_r.c; then echo $(srcdir)/cdiv_r.c; else echo cdiv_r.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_r_ui_.c: cdiv_r_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_r_ui.c; then echo $(srcdir)/cdiv_r_ui.c; else echo cdiv_r_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_ui_.c: cdiv_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_ui.c; then echo $(srcdir)/cdiv_ui.c; else echo cdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cfdiv_q_2exp_.c: cfdiv_q_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cfdiv_q_2exp.c; then echo $(srcdir)/cfdiv_q_2exp.c; else echo cfdiv_q_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cfdiv_r_2exp_.c: cfdiv_r_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cfdiv_r_2exp.c; then echo $(srcdir)/cfdiv_r_2exp.c; else echo cfdiv_r_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clear_.c: clear.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clear.c; then echo $(srcdir)/clear.c; else echo clear.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clears_.c: clears.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clears.c; then echo $(srcdir)/clears.c; else echo clears.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clrbit_.c: clrbit.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clrbit.c; then echo $(srcdir)/clrbit.c; else echo clrbit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: cmp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_d_.c: cmp_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_d.c; then echo $(srcdir)/cmp_d.c; else echo cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_si_.c: cmp_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_si.c; then echo $(srcdir)/cmp_si.c; else echo cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_ui_.c: cmp_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_ui.c; then echo $(srcdir)/cmp_ui.c; else echo cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmpabs_.c: cmpabs.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmpabs.c; then echo $(srcdir)/cmpabs.c; else echo cmpabs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmpabs_d_.c: cmpabs_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmpabs_d.c; then echo $(srcdir)/cmpabs_d.c; else echo cmpabs_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmpabs_ui_.c: cmpabs_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmpabs_ui.c; then echo $(srcdir)/cmpabs_ui.c; else echo cmpabs_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-com_.c: com.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/com.c; then echo $(srcdir)/com.c; else echo com.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-combit_.c: combit.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/combit.c; then echo $(srcdir)/combit.c; else echo combit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cong_.c: cong.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cong.c; then echo $(srcdir)/cong.c; else echo cong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cong_2exp_.c: cong_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cong_2exp.c; then echo $(srcdir)/cong_2exp.c; else echo cong_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cong_ui_.c: cong_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cong_ui.c; then echo $(srcdir)/cong_ui.c; else echo cong_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dive_ui_.c: dive_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_ui.c; then echo $(srcdir)/dive_ui.c; else echo dive_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divegcd_.c: divegcd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divegcd.c; then echo $(srcdir)/divegcd.c; else echo divegcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divexact_.c: divexact.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divexact.c; then echo $(srcdir)/divexact.c; else echo divexact.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divis_.c: divis.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis.c; then echo $(srcdir)/divis.c; else echo divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divis_2exp_.c: divis_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis_2exp.c; then echo $(srcdir)/divis_2exp.c; else echo divis_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divis_ui_.c: divis_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis_ui.c; then echo $(srcdir)/divis_ui.c; else echo divis_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dump_.c: dump.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-export_.c: export.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/export.c; then echo $(srcdir)/export.c; else echo export.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fac_ui_.c: fac_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fac_ui.c; then echo $(srcdir)/fac_ui.c; else echo fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_q_.c: fdiv_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_q.c; then echo $(srcdir)/fdiv_q.c; else echo fdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_q_ui_.c: fdiv_q_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_q_ui.c; then echo $(srcdir)/fdiv_q_ui.c; else echo fdiv_q_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_qr_.c: fdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_qr.c; then echo $(srcdir)/fdiv_qr.c; else echo fdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_qr_ui_.c: fdiv_qr_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_qr_ui.c; then echo $(srcdir)/fdiv_qr_ui.c; else echo fdiv_qr_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_r_.c: fdiv_r.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_r.c; then echo $(srcdir)/fdiv_r.c; else echo fdiv_r.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_r_ui_.c: fdiv_r_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_r_ui.c; then echo $(srcdir)/fdiv_r_ui.c; else echo fdiv_r_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_ui_.c: fdiv_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_ui.c; then echo $(srcdir)/fdiv_ui.c; else echo fdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fib2_ui_.c: fib2_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib2_ui.c; then echo $(srcdir)/fib2_ui.c; else echo fib2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fib_ui_.c: fib_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib_ui.c; then echo $(srcdir)/fib_ui.c; else echo fib_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_sint_.c: fits_sint.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sint.c; then echo $(srcdir)/fits_sint.c; else echo fits_sint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_slong_.c: fits_slong.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_slong.c; then echo $(srcdir)/fits_slong.c; else echo fits_slong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_sshort_.c: fits_sshort.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sshort.c; then echo $(srcdir)/fits_sshort.c; else echo fits_sshort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_uint_.c: fits_uint.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_uint.c; then echo $(srcdir)/fits_uint.c; else echo fits_uint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_ulong_.c: fits_ulong.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ulong.c; then echo $(srcdir)/fits_ulong.c; else echo fits_ulong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_ushort_.c: fits_ushort.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ushort.c; then echo $(srcdir)/fits_ushort.c; else echo fits_ushort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_.c: gcd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_ui_.c: gcd_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd_ui.c; then echo $(srcdir)/gcd_ui.c; else echo gcd_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_.c: gcdext.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_.c: get_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_2exp_.c: get_d_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d_2exp.c; then echo $(srcdir)/get_d_2exp.c; else echo get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_si_.c: get_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_si.c; then echo $(srcdir)/get_si.c; else echo get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_ui_.c: get_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_ui.c; then echo $(srcdir)/get_ui.c; else echo get_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-getlimbn_.c: getlimbn.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/getlimbn.c; then echo $(srcdir)/getlimbn.c; else echo getlimbn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hamdist_.c: hamdist.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hamdist.c; then echo $(srcdir)/hamdist.c; else echo hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-import_.c: import.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/import.c; then echo $(srcdir)/import.c; else echo import.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init_.c: init.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init.c; then echo $(srcdir)/init.c; else echo init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init2_.c: init2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init2.c; then echo $(srcdir)/init2.c; else echo init2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inits_.c: inits.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inits.c; then echo $(srcdir)/inits.c; else echo inits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inp_raw_.c: inp_raw.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_raw.c; then echo $(srcdir)/inp_raw.c; else echo inp_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inp_str_.c: inp_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_str.c; then echo $(srcdir)/inp_str.c; else echo inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invert_.c: invert.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert.c; then echo $(srcdir)/invert.c; else echo invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ior_.c: ior.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ior.c; then echo $(srcdir)/ior.c; else echo ior.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_.c: iset.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset.c; then echo $(srcdir)/iset.c; else echo iset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_d_.c: iset_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_d.c; then echo $(srcdir)/iset_d.c; else echo iset_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_si_.c: iset_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_si.c; then echo $(srcdir)/iset_si.c; else echo iset_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_str_.c: iset_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_str.c; then echo $(srcdir)/iset_str.c; else echo iset_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_ui_.c: iset_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_ui.c; then echo $(srcdir)/iset_ui.c; else echo iset_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacobi_.c: jacobi.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacobi.c; then echo $(srcdir)/jacobi.c; else echo jacobi.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-kronsz_.c: kronsz.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronsz.c; then echo $(srcdir)/kronsz.c; else echo kronsz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-kronuz_.c: kronuz.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronuz.c; then echo $(srcdir)/kronuz.c; else echo kronuz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-kronzs_.c: kronzs.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronzs.c; then echo $(srcdir)/kronzs.c; else echo kronzs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-kronzu_.c: kronzu.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronzu.c; then echo $(srcdir)/kronzu.c; else echo kronzu.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lcm_.c: lcm.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lcm.c; then echo $(srcdir)/lcm.c; else echo lcm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lcm_ui_.c: lcm_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lcm_ui.c; then echo $(srcdir)/lcm_ui.c; else echo lcm_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lucnum2_ui_.c: lucnum2_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lucnum2_ui.c; then echo $(srcdir)/lucnum2_ui.c; else echo lucnum2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lucnum_ui_.c: lucnum_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lucnum_ui.c; then echo $(srcdir)/lucnum_ui.c; else echo lucnum_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-millerrabin_.c: millerrabin.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/millerrabin.c; then echo $(srcdir)/millerrabin.c; else echo millerrabin.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_.c: mod.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod.c; then echo $(srcdir)/mod.c; else echo mod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_2exp_.c: mul_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2exp.c; then echo $(srcdir)/mul_2exp.c; else echo mul_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_si_.c: mul_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_si.c; then echo $(srcdir)/mul_si.c; else echo mul_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_ui_.c: mul_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_ui.c; then echo $(srcdir)/mul_ui.c; else echo mul_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-n_pow_ui_.c: n_pow_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/n_pow_ui.c; then echo $(srcdir)/n_pow_ui.c; else echo n_pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-neg_.c: neg.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nextprime_.c: nextprime.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nextprime.c; then echo $(srcdir)/nextprime.c; else echo nextprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-out_raw_.c: out_raw.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_raw.c; then echo $(srcdir)/out_raw.c; else echo out_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-out_str_.c: out_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_str.c; then echo $(srcdir)/out_str.c; else echo out_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-perfpow_.c: perfpow.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfpow.c; then echo $(srcdir)/perfpow.c; else echo perfpow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-perfsqr_.c: perfsqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfsqr.c; then echo $(srcdir)/perfsqr.c; else echo perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-popcount_.c: popcount.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/popcount.c; then echo $(srcdir)/popcount.c; else echo popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pow_ui_.c: pow_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_ui.c; then echo $(srcdir)/pow_ui.c; else echo pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_.c: powm.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm.c; then echo $(srcdir)/powm.c; else echo powm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_sec_.c: powm_sec.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_sec.c; then echo $(srcdir)/powm_sec.c; else echo powm_sec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_ui_.c: powm_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_ui.c; then echo $(srcdir)/powm_ui.c; else echo powm_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pprime_p_.c: pprime_p.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pprime_p.c; then echo $(srcdir)/pprime_p.c; else echo pprime_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random_.c: random.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random.c; then echo $(srcdir)/random.c; else echo random.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random2_.c: random2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-realloc_.c: realloc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/realloc.c; then echo $(srcdir)/realloc.c; else echo realloc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-realloc2_.c: realloc2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/realloc2.c; then echo $(srcdir)/realloc2.c; else echo realloc2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-remove_.c: remove.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/remove.c; then echo $(srcdir)/remove.c; else echo remove.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-root_.c: root.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/root.c; then echo $(srcdir)/root.c; else echo root.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rootrem_.c: rootrem.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rootrem.c; then echo $(srcdir)/rootrem.c; else echo rootrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rrandomb_.c: rrandomb.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rrandomb.c; then echo $(srcdir)/rrandomb.c; else echo rrandomb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scan0_.c: scan0.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan0.c; then echo $(srcdir)/scan0.c; else echo scan0.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scan1_.c: scan1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan1.c; then echo $(srcdir)/scan1.c; else echo scan1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_.c: set.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set.c; then echo $(srcdir)/set.c; else echo set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_d_.c: set_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_d.c; then echo $(srcdir)/set_d.c; else echo set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_f_.c: set_f.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_f.c; then echo $(srcdir)/set_f.c; else echo set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_q_.c: set_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_q.c; then echo $(srcdir)/set_q.c; else echo set_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_si_.c: set_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_si.c; then echo $(srcdir)/set_si.c; else echo set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_ui_.c: set_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_ui.c; then echo $(srcdir)/set_ui.c; else echo set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-setbit_.c: setbit.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/setbit.c; then echo $(srcdir)/setbit.c; else echo setbit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-size_.c: size.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/size.c; then echo $(srcdir)/size.c; else echo size.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sizeinbase_.c: sizeinbase.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sizeinbase.c; then echo $(srcdir)/sizeinbase.c; else echo sizeinbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrt_.c: sqrt.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrt.c; then echo $(srcdir)/sqrt.c; else echo sqrt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrtrem_.c: sqrtrem.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrtrem.c; then echo $(srcdir)/sqrtrem.c; else echo sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_.c: sub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_ui_.c: sub_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_ui.c; then echo $(srcdir)/sub_ui.c; else echo sub_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-swap_.c: swap.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/swap.c; then echo $(srcdir)/swap.c; else echo swap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_q_.c: tdiv_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_q.c; then echo $(srcdir)/tdiv_q.c; else echo tdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_q_2exp_.c: tdiv_q_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_q_2exp.c; then echo $(srcdir)/tdiv_q_2exp.c; else echo tdiv_q_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_q_ui_.c: tdiv_q_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_q_ui.c; then echo $(srcdir)/tdiv_q_ui.c; else echo tdiv_q_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_ui_.c: tdiv_qr_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr_ui.c; then echo $(srcdir)/tdiv_qr_ui.c; else echo tdiv_qr_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_r_.c: tdiv_r.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_r.c; then echo $(srcdir)/tdiv_r.c; else echo tdiv_r.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_r_2exp_.c: tdiv_r_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_r_2exp.c; then echo $(srcdir)/tdiv_r_2exp.c; else echo tdiv_r_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_r_ui_.c: tdiv_r_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_r_ui.c; then echo $(srcdir)/tdiv_r_ui.c; else echo tdiv_r_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_ui_.c: tdiv_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_ui.c; then echo $(srcdir)/tdiv_ui.c; else echo tdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tstbit_.c: tstbit.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tstbit.c; then echo $(srcdir)/tstbit.c; else echo tstbit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ui_pow_ui_.c: ui_pow_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_pow_ui.c; then echo $(srcdir)/ui_pow_ui.c; else echo ui_pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ui_sub_.c: ui_sub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_sub.c; then echo $(srcdir)/ui_sub.c; else echo ui_sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-urandomb_.c: urandomb.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/urandomb.c; then echo $(srcdir)/urandomb.c; else echo urandomb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-urandomm_.c: urandomm.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/urandomm.c; then echo $(srcdir)/urandomm.c; else echo urandomm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-xor_.c: xor.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xor.c; then echo $(srcdir)/xor.c; else echo xor.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-abs_.$(OBJEXT) abs_.lo add_.$(OBJEXT) add_.lo add_ui_.$(OBJEXT) \
-add_ui_.lo and_.$(OBJEXT) and_.lo aorsmul_.$(OBJEXT) aorsmul_.lo \
-aorsmul_i_.$(OBJEXT) aorsmul_i_.lo array_init_.$(OBJEXT) \
-array_init_.lo bin_ui_.$(OBJEXT) bin_ui_.lo bin_uiui_.$(OBJEXT) \
-bin_uiui_.lo cdiv_q_.$(OBJEXT) cdiv_q_.lo cdiv_q_ui_.$(OBJEXT) \
-cdiv_q_ui_.lo cdiv_qr_.$(OBJEXT) cdiv_qr_.lo cdiv_qr_ui_.$(OBJEXT) \
-cdiv_qr_ui_.lo cdiv_r_.$(OBJEXT) cdiv_r_.lo cdiv_r_ui_.$(OBJEXT) \
-cdiv_r_ui_.lo cdiv_ui_.$(OBJEXT) cdiv_ui_.lo cfdiv_q_2exp_.$(OBJEXT) \
-cfdiv_q_2exp_.lo cfdiv_r_2exp_.$(OBJEXT) cfdiv_r_2exp_.lo \
-clear_.$(OBJEXT) clear_.lo clears_.$(OBJEXT) clears_.lo \
-clrbit_.$(OBJEXT) clrbit_.lo cmp_.$(OBJEXT) cmp_.lo cmp_d_.$(OBJEXT) \
-cmp_d_.lo cmp_si_.$(OBJEXT) cmp_si_.lo cmp_ui_.$(OBJEXT) cmp_ui_.lo \
-cmpabs_.$(OBJEXT) cmpabs_.lo cmpabs_d_.$(OBJEXT) cmpabs_d_.lo \
-cmpabs_ui_.$(OBJEXT) cmpabs_ui_.lo com_.$(OBJEXT) com_.lo \
-combit_.$(OBJEXT) combit_.lo cong_.$(OBJEXT) cong_.lo \
-cong_2exp_.$(OBJEXT) cong_2exp_.lo cong_ui_.$(OBJEXT) cong_ui_.lo \
-dive_ui_.$(OBJEXT) dive_ui_.lo divegcd_.$(OBJEXT) divegcd_.lo \
-divexact_.$(OBJEXT) divexact_.lo divis_.$(OBJEXT) divis_.lo \
-divis_2exp_.$(OBJEXT) divis_2exp_.lo divis_ui_.$(OBJEXT) divis_ui_.lo \
-dump_.$(OBJEXT) dump_.lo export_.$(OBJEXT) export_.lo \
-fac_ui_.$(OBJEXT) fac_ui_.lo fdiv_q_.$(OBJEXT) fdiv_q_.lo \
-fdiv_q_ui_.$(OBJEXT) fdiv_q_ui_.lo fdiv_qr_.$(OBJEXT) fdiv_qr_.lo \
-fdiv_qr_ui_.$(OBJEXT) fdiv_qr_ui_.lo fdiv_r_.$(OBJEXT) fdiv_r_.lo \
-fdiv_r_ui_.$(OBJEXT) fdiv_r_ui_.lo fdiv_ui_.$(OBJEXT) fdiv_ui_.lo \
-fib2_ui_.$(OBJEXT) fib2_ui_.lo fib_ui_.$(OBJEXT) fib_ui_.lo \
-fits_sint_.$(OBJEXT) fits_sint_.lo fits_slong_.$(OBJEXT) \
-fits_slong_.lo fits_sshort_.$(OBJEXT) fits_sshort_.lo \
-fits_uint_.$(OBJEXT) fits_uint_.lo fits_ulong_.$(OBJEXT) \
-fits_ulong_.lo fits_ushort_.$(OBJEXT) fits_ushort_.lo gcd_.$(OBJEXT) \
-gcd_.lo gcd_ui_.$(OBJEXT) gcd_ui_.lo gcdext_.$(OBJEXT) gcdext_.lo \
-get_d_.$(OBJEXT) get_d_.lo get_d_2exp_.$(OBJEXT) get_d_2exp_.lo \
-get_si_.$(OBJEXT) get_si_.lo get_str_.$(OBJEXT) get_str_.lo \
-get_ui_.$(OBJEXT) get_ui_.lo getlimbn_.$(OBJEXT) getlimbn_.lo \
-hamdist_.$(OBJEXT) hamdist_.lo import_.$(OBJEXT) import_.lo \
-init_.$(OBJEXT) init_.lo init2_.$(OBJEXT) init2_.lo inits_.$(OBJEXT) \
-inits_.lo inp_raw_.$(OBJEXT) inp_raw_.lo inp_str_.$(OBJEXT) \
-inp_str_.lo invert_.$(OBJEXT) invert_.lo ior_.$(OBJEXT) ior_.lo \
-iset_.$(OBJEXT) iset_.lo iset_d_.$(OBJEXT) iset_d_.lo \
-iset_si_.$(OBJEXT) iset_si_.lo iset_str_.$(OBJEXT) iset_str_.lo \
-iset_ui_.$(OBJEXT) iset_ui_.lo jacobi_.$(OBJEXT) jacobi_.lo \
-kronsz_.$(OBJEXT) kronsz_.lo kronuz_.$(OBJEXT) kronuz_.lo \
-kronzs_.$(OBJEXT) kronzs_.lo kronzu_.$(OBJEXT) kronzu_.lo \
-lcm_.$(OBJEXT) lcm_.lo lcm_ui_.$(OBJEXT) lcm_ui_.lo \
-lucnum2_ui_.$(OBJEXT) lucnum2_ui_.lo lucnum_ui_.$(OBJEXT) \
-lucnum_ui_.lo millerrabin_.$(OBJEXT) millerrabin_.lo mod_.$(OBJEXT) \
-mod_.lo mul_.$(OBJEXT) mul_.lo mul_2exp_.$(OBJEXT) mul_2exp_.lo \
-mul_si_.$(OBJEXT) mul_si_.lo mul_ui_.$(OBJEXT) mul_ui_.lo \
-n_pow_ui_.$(OBJEXT) n_pow_ui_.lo neg_.$(OBJEXT) neg_.lo \
-nextprime_.$(OBJEXT) nextprime_.lo out_raw_.$(OBJEXT) out_raw_.lo \
-out_str_.$(OBJEXT) out_str_.lo perfpow_.$(OBJEXT) perfpow_.lo \
-perfsqr_.$(OBJEXT) perfsqr_.lo popcount_.$(OBJEXT) popcount_.lo \
-pow_ui_.$(OBJEXT) pow_ui_.lo powm_.$(OBJEXT) powm_.lo \
-powm_sec_.$(OBJEXT) powm_sec_.lo powm_ui_.$(OBJEXT) powm_ui_.lo \
-pprime_p_.$(OBJEXT) pprime_p_.lo random_.$(OBJEXT) random_.lo \
-random2_.$(OBJEXT) random2_.lo realloc_.$(OBJEXT) realloc_.lo \
-realloc2_.$(OBJEXT) realloc2_.lo remove_.$(OBJEXT) remove_.lo \
-root_.$(OBJEXT) root_.lo rootrem_.$(OBJEXT) rootrem_.lo \
-rrandomb_.$(OBJEXT) rrandomb_.lo scan0_.$(OBJEXT) scan0_.lo \
-scan1_.$(OBJEXT) scan1_.lo set_.$(OBJEXT) set_.lo set_d_.$(OBJEXT) \
-set_d_.lo set_f_.$(OBJEXT) set_f_.lo set_q_.$(OBJEXT) set_q_.lo \
-set_si_.$(OBJEXT) set_si_.lo set_str_.$(OBJEXT) set_str_.lo \
-set_ui_.$(OBJEXT) set_ui_.lo setbit_.$(OBJEXT) setbit_.lo \
-size_.$(OBJEXT) size_.lo sizeinbase_.$(OBJEXT) sizeinbase_.lo \
-sqrt_.$(OBJEXT) sqrt_.lo sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) \
-sub_.lo sub_ui_.$(OBJEXT) sub_ui_.lo swap_.$(OBJEXT) swap_.lo \
-tdiv_q_.$(OBJEXT) tdiv_q_.lo tdiv_q_2exp_.$(OBJEXT) tdiv_q_2exp_.lo \
-tdiv_q_ui_.$(OBJEXT) tdiv_q_ui_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo \
-tdiv_qr_ui_.$(OBJEXT) tdiv_qr_ui_.lo tdiv_r_.$(OBJEXT) tdiv_r_.lo \
-tdiv_r_2exp_.$(OBJEXT) tdiv_r_2exp_.lo tdiv_r_ui_.$(OBJEXT) \
-tdiv_r_ui_.lo tdiv_ui_.$(OBJEXT) tdiv_ui_.lo tstbit_.$(OBJEXT) \
-tstbit_.lo ui_pow_ui_.$(OBJEXT) ui_pow_ui_.lo ui_sub_.$(OBJEXT) \
-ui_sub_.lo urandomb_.$(OBJEXT) urandomb_.lo urandomm_.$(OBJEXT) \
-urandomm_.lo xor_.$(OBJEXT) xor_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -848,10 +491,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -919,7 +567,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -932,7 +580,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool clean-noinstLTLIBRARIES ctags distclean \
@@ -944,17 +592,10 @@ uninstall-am:
         install-pdf install-pdf-am install-ps install-ps-am \
         install-strip installcheck installcheck-am installdirs \
         maintainer-clean maintainer-clean-generic mostlyclean \
-       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
-       uninstall-am
+       mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+       pdf pdf-am ps ps-am tags uninstall uninstall-am
  
  
-# These are BUILT_SOURCES at the top-level, so normally they're built before
-# recursing into this directory.
-#
-fac_ui.h:
-       cd ..; $(MAKE) $(AM_MAKEFLAGS) mpz/fac_ui.h
-
  # Tell versions [3.59,3.63) of GNU make to not export all variables.
  # Otherwise a system limit (for SysV at least) may be exceeded.
  .NOEXPORT:
diff --git a/mpz/abs.c b/mpz/abs.c

index 1ce4c81d2cfb20a67d261eaeab2b6f1c03d89e59..fcbb4a2f1cb3c21d6c2a367c62ea14f3dd0c87c0 100644 (file)
--- a/mpz/abs.c
+++ b/mpz/abs.c
@@ -1,6 +1,6 @@
  /* mpz_abs(dst, src) -- Assign the absolute value of SRC to DST.
  
-Copyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,21 +25,20 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_abs (mpz_ptr w, mpz_srcptr u)
  {
-  mp_ptr wp, up;
+  mp_ptr wp;
+  mp_srcptr up;
    mp_size_t size;
  
-  size = ABS (u->_mp_size);
+  size = ABSIZ (u);
  
    if (u != w)
      {
-      if (w->_mp_alloc < size)
-       _mpz_realloc (w, size);
+      wp = MPZ_NEWALLOC (w, size);
  
-      wp = w->_mp_d;
-      up = u->_mp_d;
+      up = PTR (u);
  
        MPN_COPY (wp, up, size);
      }
  
-  w->_mp_size = size;
+  SIZ (w) = size;
  }
diff --git a/mpz/and.c b/mpz/and.c

index d6355e97d5ccd4a511e0ec5a233487c9d87e5b6a..5d50c769e01429a405a5021b7ef9229404dc9f94 100644 (file)
--- a/mpz/and.c
+++ b/mpz/and.c
@@ -1,7 +1,7 @@
  /* mpz_and -- Logical and.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2003, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2003, 2005, 2012
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -37,7 +37,6 @@ mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
  
    op1_ptr = PTR(op1);
    op2_ptr = PTR(op2);
-  res_ptr = PTR(res);
  
    if (op1_size >= 0)
      {
@@ -52,18 +51,14 @@ mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
  
           /* Handle allocation, now then we know exactly how much space is
              needed for the result.  */
-         if (UNLIKELY (ALLOC(res) < res_size))
-           {
-             _mpz_realloc (res, res_size);
-             res_ptr = PTR(res);
-             /* Don't re-read op1_ptr and op2_ptr.  Since res_size <=
-                MIN(op1_size, op2_size), we will not reach this code when op1
-                is identical to res or op2 is identical to res.  */
-           }
+         res_ptr = MPZ_REALLOC (res, res_size);
+         /* Don't re-read op1_ptr and op2_ptr.  Since res_size <=
+            MIN(op1_size, op2_size), res is not changed when op1
+            is identical to res or op2 is identical to res.  */
  
           SIZ(res) = res_size;
-          if (LIKELY (res_size != 0))
-            mpn_and_n (res_ptr, op1_ptr, op2_ptr, res_size);
+         if (LIKELY (res_size != 0))
+           mpn_and_n (res_ptr, op1_ptr, op2_ptr, res_size);
           return;
         }
        else /* op2_size < 0 */
@@ -75,9 +70,8 @@ mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
      {
        if (op2_size < 0)
         {
-         mp_ptr opx;
+         mp_ptr opx, opy;
           mp_limb_t cy;
-         mp_size_t res_alloc;
  
           /* Both operands are negative, so will be the result.
              -((-OP1) & (-OP2)) = -(~(OP1 - 1) & ~(OP2 - 1)) =
@@ -92,48 +86,29 @@ mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
           op1_size = -op1_size;
           op2_size = -op2_size;
  
-         res_alloc = 1 + MAX (op1_size, op2_size);
+         if (op1_size > op2_size)
+           MPN_SRCPTR_SWAP (op1_ptr, op1_size, op2_ptr, op2_size);
  
-         opx = TMP_ALLOC_LIMBS (op1_size);
+         TMP_ALLOC_LIMBS_2 (opx, op1_size, opy, op2_size);
           mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
           op1_ptr = opx;
  
-         opx = TMP_ALLOC_LIMBS (op2_size);
-         mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
-         op2_ptr = opx;
-
-         if (ALLOC(res) < res_alloc)
-           {
-             _mpz_realloc (res, res_alloc);
-             res_ptr = PTR(res);
-             /* Don't re-read OP1_PTR and OP2_PTR.  They point to temporary
-                space--never to the space PTR(res) used to point to before
-                reallocation.  */
-           }
-
-         if (op1_size >= op2_size)
-           {
-             MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
-                       op1_size - op2_size);
-             for (i = op2_size - 1; i >= 0; i--)
-               res_ptr[i] = op1_ptr[i] | op2_ptr[i];
-             res_size = op1_size;
-           }
-         else
-           {
-             MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
-                       op2_size - op1_size);
-             for (i = op1_size - 1; i >= 0; i--)
-               res_ptr[i] = op1_ptr[i] | op2_ptr[i];
-             res_size = op2_size;
-           }
+         mpn_sub_1 (opy, op2_ptr, op2_size, (mp_limb_t) 1);
+         op2_ptr = opy;
+
+         res_ptr = MPZ_REALLOC (res, 1 + op2_size);
+         /* Don't re-read OP1_PTR and OP2_PTR.  They point to temporary
+            space--never to the space PTR(res) used to point to before
+            reallocation.  */
+
+         MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+                   op2_size - op1_size);
+         mpn_ior_n (res_ptr, op1_ptr, op2_ptr, op1_size);
+         res_size = op2_size;
  
           cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
-         if (cy)
-           {
-             res_ptr[res_size] = cy;
-             res_size++;
-           }
+         res_ptr[res_size] = cy;
+         res_size += (cy != 0);
  
           SIZ(res) = -res_size;
           TMP_FREE;
@@ -143,8 +118,7 @@ mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
         {
           /* We should compute -OP1 & OP2.  Swap OP1 and OP2 and fall
              through to the code that handles OP1 & -OP2.  */
-          MPZ_SRCPTR_SWAP (op1, op2);
-          MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+         MPN_SRCPTR_SWAP (op1_ptr, op1_size, op2_ptr, op2_size);
         }
  
      }
@@ -221,18 +195,13 @@ mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
  
         /* Handle allocation, now then we know exactly how much space is
            needed for the result.  */
-       if (ALLOC(res) < res_size)
-         {
-           _mpz_realloc (res, res_size);
-           res_ptr = PTR(res);
-           /* Don't re-read OP1_PTR or OP2_PTR.  Since res_size = op1_size,
-              we will not reach this code when op1 is identical to res.
-              OP2_PTR points to temporary space.  */
-         }
+       res_ptr = MPZ_REALLOC (res, res_size);
+       /* Don't re-read OP1_PTR or OP2_PTR.  Since res_size = op1_size,
+          op1 is not changed if it is identical to res.
+          OP2_PTR points to temporary space.  */
  
         MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, res_size - op2_size);
-       for (i = op2_size - 1; i >= 0; i--)
-         res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+       mpn_andn_n (res_ptr, op1_ptr, op2_ptr, op2_size);
  
         SIZ(res) = res_size;
        }
@@ -247,18 +216,14 @@ mpz_and (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
  
         /* Handle allocation, now then we know exactly how much space is
            needed for the result.  */
-       if (ALLOC(res) < res_size)
-         {
-           _mpz_realloc (res, res_size);
-           res_ptr = PTR(res);
-           /* Don't re-read OP1_PTR.  Since res_size <= op1_size, we will
-              not reach this code when op1 is identical to res.  */
-           /* Don't re-read OP2_PTR.  It points to temporary space--never
-              to the space PTR(res) used to point to before reallocation.  */
-         }
-
-       for (i = res_size - 1; i >= 0; i--)
-         res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+       res_ptr = MPZ_REALLOC (res, res_size);
+       /* Don't re-read OP1_PTR.  Since res_size <= op1_size,
+          op1 is not changed if it is identical to res.
+          Don't re-read OP2_PTR.  It points to temporary space--never
+          to the space PTR(res) used to point to before reallocation.  */
+
+       if (LIKELY (res_size != 0))
+         mpn_andn_n (res_ptr, op1_ptr, op2_ptr, res_size);
  
         SIZ(res) = res_size;
        }
diff --git a/mpz/aors.h b/mpz/aors.h

index 208c515201762d393c0585758a3af26aaaec665f..79ac5fcd599b413d69246425f5e4ed8ae2c6ed7a 100644 (file)
--- a/mpz/aors.h
+++ b/mpz/aors.h
@@ -1,6 +1,7 @@
  /* mpz_add, mpz_sub -- add or subtract integers.
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2011, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,21 +22,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  
-#ifdef BERKELEY_MP
-
-#include "mp.h"
-#ifdef OPERATION_add
-#define FUNCTION     madd
-#define VARIATION
-#endif
-#ifdef OPERATION_sub
-#define FUNCTION     msub
-#define VARIATION    -
-#endif
-#define ARGUMENTS    mpz_srcptr u, mpz_srcptr v, mpz_ptr w
-
-#else /* normal GMP */
-
  #ifdef OPERATION_add
  #define FUNCTION     mpz_add
  #define VARIATION
@@ -43,9 +29,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #ifdef OPERATION_sub
  #define FUNCTION     mpz_sub
  #define VARIATION    -
-#endif
-#define ARGUMENTS    mpz_ptr w, mpz_srcptr u, mpz_srcptr v
-
  #endif
  
  #ifndef FUNCTION
@@ -54,7 +37,7 @@ Error, need OPERATION_add or OPERATION_sub
  
  
  void
-FUNCTION (ARGUMENTS)
+FUNCTION (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
  {
    mp_srcptr up, vp;
    mp_ptr wp;
@@ -62,8 +45,8 @@ FUNCTION (ARGUMENTS)
    mp_size_t abs_usize;
    mp_size_t abs_vsize;
  
-  usize = u->_mp_size;
-  vsize = VARIATION v->_mp_size;
+  usize = SIZ(u);
+  vsize = VARIATION SIZ(v);
    abs_usize = ABS (usize);
    abs_vsize = ABS (vsize);
  
@@ -79,13 +62,11 @@ FUNCTION (ARGUMENTS)
  
    /* If not space for w (and possible carry), increase space.  */
    wsize = abs_usize + 1;
-  if (w->_mp_alloc < wsize)
-    _mpz_realloc (w, wsize);
+  wp = MPZ_REALLOC (w, wsize);
  
    /* These must be after realloc (u or v may be the same as w).  */
-  up = u->_mp_d;
-  vp = v->_mp_d;
-  wp = w->_mp_d;
+  up = PTR(u);
+  vp = PTR(v);
  
    if ((usize ^ vsize) < 0)
      {
@@ -128,5 +109,5 @@ FUNCTION (ARGUMENTS)
         wsize = -wsize;
      }
  
-  w->_mp_size = wsize;
+  SIZ(w) = wsize;
  }
diff --git a/mpz/aors_ui.h b/mpz/aors_ui.h

index b438b32b7a35c78b59b7ba546b84c236d0c6bc7d..e46726747562583e30bf12665b4e4995d3390801 100644 (file)
--- a/mpz/aors_ui.h
+++ b/mpz/aors_ui.h
@@ -1,8 +1,8 @@
  /* mpz_add_ui, mpz_sub_ui -- Add or subtract an mpz_t and an unsigned
     one-word integer.
  
-Copyright 1991, 1993, 1994, 1996, 1999, 2000, 2001, 2002, 2004 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1999, 2000, 2001, 2002, 2004, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -66,22 +66,20 @@ FUNCTION (mpz_ptr w, mpz_srcptr u, unsigned long int vval)
      }
  #endif
  
-  usize = u->_mp_size;
+  usize = SIZ (u);
    abs_usize = ABS (usize);
  
    /* If not space for W (and possible carry), increase space.  */
    wsize = abs_usize + 1;
-  if (w->_mp_alloc < wsize)
-    _mpz_realloc (w, wsize);
+  wp = MPZ_REALLOC (w, wsize);
  
    /* These must be after realloc (U may be the same as W).  */
-  up = u->_mp_d;
-  wp = w->_mp_d;
+  up = PTR (u);
  
    if (abs_usize == 0)
      {
        wp[0] = vval;
-      w->_mp_size = VARIATION_NEG (vval != 0);
+      SIZ (w) = VARIATION_NEG (vval != 0);
        return;
      }
  
@@ -109,5 +107,5 @@ FUNCTION (mpz_ptr w, mpz_srcptr u, unsigned long int vval)
         }
      }
  
-  w->_mp_size = wsize;
+  SIZ (w) = wsize;
  }
diff --git a/mpz/aorsmul.c b/mpz/aorsmul.c

index 8b145b309fc5e649842796b18929199b5f80e5f7..d8145054343f83886106e4266b3b3829545648ef 100644 (file)
--- a/mpz/aorsmul.c
+++ b/mpz/aorsmul.c
@@ -1,6 +1,6 @@
  /* mpz_addmul, mpz_submul -- add or subtract multiple.
  
-Copyright 2001, 2004, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2004, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -34,7 +34,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     The sign of w is retained for the result, unless the absolute value
     submul underflows, in which case it flips.  */
  
-static void __gmpz_aorsmul __GMP_PROTO ((REGPARM_3_1 (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub))) REGPARM_ATTR (1);
+static void __gmpz_aorsmul (REGPARM_3_1 (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)) REGPARM_ATTR (1);
  #define mpz_aorsmul(w,x,y,sub)  __gmpz_aorsmul (REGPARM_3_1 (w, x, y, sub))
  
  REGPARM_ATTR (1) static void
@@ -76,13 +76,12 @@ mpz_aorsmul (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)
    wsize = ABS(wsize_signed);
  
    tsize = xsize + ysize;
-  MPZ_REALLOC (w, MAX (wsize, tsize) + 1);
-  wp = PTR(w);
+  wp = MPZ_REALLOC (w, MAX (wsize, tsize) + 1);
  
    if (wsize_signed == 0)
      {
        /* Nothing to add to, just set w=x*y.  No w==x or w==y overlap here,
-         since we know x,y!=0 but w==0.  */
+        since we know x,y!=0 but w==0.  */
        high = mpn_mul (wp, PTR(x),xsize, PTR(y),ysize);
        tsize -= (high == 0);
        SIZ(w) = (sub >= 0 ? tsize : -tsize);
@@ -101,14 +100,14 @@ mpz_aorsmul (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)
        mp_size_t usize = wsize;
  
        if (usize < tsize)
-        {
-          up    = tp;
-          usize = tsize;
-          tp    = wp;
-          tsize = wsize;
+       {
+         up    = tp;
+         usize = tsize;
+         tp    = wp;
+         tsize = wsize;
  
-          wsize = usize;
-        }
+         wsize = usize;
+       }
  
        c = mpn_add (wp, up,usize, tp,tsize);
        wp[wsize] = c;
@@ -120,15 +119,15 @@ mpz_aorsmul (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)
        mp_size_t usize = wsize;
  
        if (mpn_cmp_twosizes_lt (up,usize, tp,tsize))
-        {
-          up    = tp;
-          usize = tsize;
-          tp    = wp;
-          tsize = wsize;
-
-          wsize = usize;
-          wsize_signed = -wsize_signed;
-        }
+       {
+         up    = tp;
+         usize = tsize;
+         tp    = wp;
+         tsize = wsize;
+
+         wsize = usize;
+         wsize_signed = -wsize_signed;
+       }
  
        ASSERT_NOCARRY (mpn_sub (wp, up,usize, tp,tsize));
        wsize = usize;
diff --git a/mpz/aorsmul_i.c b/mpz/aorsmul_i.c

index b3c2efae46dd117c0ce7557be612e30228a58194..647b87978294ea0b456506e3fd4cfd839dc943a2 100644 (file)
--- a/mpz/aorsmul_i.c
+++ b/mpz/aorsmul_i.c
@@ -4,7 +4,7 @@
     ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
     COMPLETELY IN FUTURE GNU MP RELEASES.
  
-Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2004, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -75,8 +75,7 @@ mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
    if (wsize_signed == 0)
      {
        /* nothing to add to, just set x*y, "sub" gives the sign */
-      MPZ_REALLOC (w, xsize+1);
-      wp = PTR (w);
+      wp = MPZ_REALLOC (w, xsize+1);
        cy = mpn_mul_1 (wp, PTR(x), xsize, y);
        wp[xsize] = cy;
        xsize += (cy != 0);
@@ -88,8 +87,7 @@ mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
    wsize = ABS (wsize_signed);
  
    new_wsize = MAX (wsize, xsize);
-  MPZ_REALLOC (w, new_wsize+1);
-  wp = PTR (w);
+  wp = MPZ_REALLOC (w, new_wsize+1);
    xp = PTR (x);
    min_size = MIN (wsize, xsize);
  
@@ -104,25 +102,25 @@ mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
        dsize = xsize - wsize;
  #if HAVE_NATIVE_mpn_mul_1c
        if (dsize > 0)
-        cy = mpn_mul_1c (wp, xp, dsize, y, cy);
+       cy = mpn_mul_1c (wp, xp, dsize, y, cy);
        else if (dsize < 0)
-        {
-          dsize = -dsize;
-          cy = mpn_add_1 (wp, wp, dsize, cy);
-        }
+       {
+         dsize = -dsize;
+         cy = mpn_add_1 (wp, wp, dsize, cy);
+       }
  #else
        if (dsize != 0)
-        {
-          mp_limb_t  cy2;
-          if (dsize > 0)
-            cy2 = mpn_mul_1 (wp, xp, dsize, y);
-          else
-            {
-              dsize = -dsize;
-              cy2 = 0;
-            }
-          cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
-        }
+       {
+         mp_limb_t  cy2;
+         if (dsize > 0)
+           cy2 = mpn_mul_1 (wp, xp, dsize, y);
+         else
+           {
+             dsize = -dsize;
+             cy2 = 0;
+           }
+         cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
+       }
  #endif
  
        wp[dsize] = cy;
@@ -134,52 +132,52 @@ mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
  
        cy = mpn_submul_1 (wp, xp, min_size, y);
        if (wsize >= xsize)
-        {
-          /* if w bigger than x, then propagate borrow through it */
-          if (wsize != xsize)
-            cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
-
-          if (cy != 0)
-            {
-              /* Borrow out of w, take twos complement negative to get
-                 absolute value, flip sign of w.  */
-              wp[new_wsize] = ~-cy;  /* extra limb is 0-cy */
-              mpn_com (wp, wp, new_wsize);
-              new_wsize++;
-              MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
-              wsize_signed = -wsize_signed;
-            }
-        }
+       {
+         /* if w bigger than x, then propagate borrow through it */
+         if (wsize != xsize)
+           cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
+
+         if (cy != 0)
+           {
+             /* Borrow out of w, take twos complement negative to get
+                absolute value, flip sign of w.  */
+             wp[new_wsize] = ~-cy;  /* extra limb is 0-cy */
+             mpn_com (wp, wp, new_wsize);
+             new_wsize++;
+             MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
+             wsize_signed = -wsize_signed;
+           }
+       }
        else /* wsize < xsize */
-        {
-          /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
-             take twos complement and use an mpn_mul_1 for the rest.  */
+       {
+         /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
+            take twos complement and use an mpn_mul_1 for the rest.  */
  
-          mp_limb_t  cy2;
+         mp_limb_t  cy2;
  
-          /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
-          mpn_com (wp, wp, wsize);
-          cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
-          cy -= 1;
+         /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
+         mpn_com (wp, wp, wsize);
+         cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
+         cy -= 1;
  
-          /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
-             returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
-          cy2 = (cy == MP_LIMB_T_MAX);
-          cy += cy2;
-          MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
-          wp[new_wsize] = cy;
-          new_wsize += (cy != 0);
+         /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
+            returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
+         cy2 = (cy == MP_LIMB_T_MAX);
+         cy += cy2;
+         MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
+         wp[new_wsize] = cy;
+         new_wsize += (cy != 0);
  
-          /* Apply any -1 from above.  The value at wp+wsize is non-zero
-             because y!=0 and the high limb of x will be non-zero.  */
-          if (cy2)
-            MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
+         /* Apply any -1 from above.  The value at wp+wsize is non-zero
+            because y!=0 and the high limb of x will be non-zero.  */
+         if (cy2)
+           MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
  
-          wsize_signed = -wsize_signed;
-        }
+         wsize_signed = -wsize_signed;
+       }
  
        /* submul can produce high zero limbs due to cancellation, both when w
-         has more limbs or x has more  */
+        has more limbs or x has more  */
        MPN_NORMALIZE (wp, new_wsize);
      }
  
diff --git a/mpz/array_init.c b/mpz/array_init.c

index 0e2f9aaa532df5eb7d897494ea6c59158e8d2f77..cdec4fd0d0a3e66a44f7dd6b040d8b6a1c3f27fa 100644 (file)
--- a/mpz/array_init.c
+++ b/mpz/array_init.c
@@ -1,7 +1,7 @@
  /* mpz_array_init (array, array_size, size_per_elem) --
  
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -33,8 +33,8 @@ mpz_array_init (mpz_ptr arr, mp_size_t arr_size, mp_size_t nbits)
  
    for (i = 0; i < arr_size; i++)
      {
-      arr[i]._mp_alloc = nlimbs + 1; /* Yes, lie a little... */
-      arr[i]._mp_size = 0;
-      arr[i]._mp_d = p + i * nlimbs;
+      ALLOC (&arr[i]) = nlimbs + 1; /* Yes, lie a little... */
+      SIZ (&arr[i]) = 0;
+      PTR (&arr[i]) = p + i * nlimbs;
      }
  }
diff --git a/mpz/bin_ui.c b/mpz/bin_ui.c

index c97ed7cff055a010b283aa552a46857ee70ac102..1370a6c21c8f74b96e908caec10aa2a0a0b603ef 100644 (file)
--- a/mpz/bin_ui.c
+++ b/mpz/bin_ui.c
@@ -1,6 +1,6 @@
  /* mpz_bin_ui - compute n over k.
  
-Copyright 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -46,7 +46,7 @@ mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
    mp_limb_t  kacc;
    mp_size_t  negate;
  
-  if (mpz_sgn (n) < 0)
+  if (SIZ (n) < 0)
      {
        /* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
        mpz_init (ni);
@@ -57,12 +57,12 @@ mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
    else
      {
        /* bin(n,k) == 0 if k>n
-         (no test for this under the n<0 case, since -n+k-1 >= k there) */
+        (no test for this under the n<0 case, since -n+k-1 >= k there) */
        if (mpz_cmp_ui (n, k) < 0)
-        {
-          mpz_set_ui (r, 0L);
-          return;
-        }
+       {
+         SIZ (r) = 0;
+         return;
+       }
  
        /* set ni = n-k */
        mpz_init (ni);
@@ -72,7 +72,7 @@ mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
  
    /* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
       for positive, 1 for negative). */
-  mpz_set_ui (r, 1L);
+  SIZ (r) = 1; PTR (r)[0] = 1;
  
    /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller.  In this case it's
       whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
@@ -108,19 +108,18 @@ mpz_bin_ui (mpz_ptr r, mpz_srcptr n, unsigned long int k)
        mpz_add_ui (ni, ni, 1L);
        mpz_mul (nacc, nacc, ni);
        umul_ppmm (k1, k0, kacc, i << GMP_NAIL_BITS);
-      k0 >>= GMP_NAIL_BITS;
        if (k1 != 0)
         {
           /* Accumulator overflow.  Perform bignum step.  */
           mpz_mul (r, r, nacc);
-         mpz_set_ui (nacc, 1L);
-          DIVIDE ();
+         SIZ (nacc) = 1; PTR (nacc)[0] = 1;
+         DIVIDE ();
           kacc = i;
         }
        else
         {
           /* Save new products in accumulators to keep accumulating.  */
-         kacc = k0;
+         kacc = k0 >> GMP_NAIL_BITS;
         }
      }
  
diff --git a/mpz/bin_uiui.c b/mpz/bin_uiui.c

index 29bbd6036356a7b25ae536dc153b3ba281011177..d86fb298e4243dccfaee32297a56e43973256296 100644 (file)
--- a/mpz/bin_uiui.c
+++ b/mpz/bin_uiui.c
@@ -1,7 +1,8 @@
  /* mpz_bin_uiui - compute n over k.
  
-Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2006 Free Software Foundation,
-Inc.
+Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -22,102 +23,663 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "longlong.h"
  
+#ifndef BIN_GOETGHELUCK_THRESHOLD
+#define BIN_GOETGHELUCK_THRESHOLD  1000
+#endif
+#ifndef BIN_UIUI_ENABLE_SMALLDC
+#define BIN_UIUI_ENABLE_SMALLDC    1
+#endif
+#ifndef BIN_UIUI_RECURSIVE_SMALLDC
+#define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32)
+#endif
  
-/* Enhancement: It ought to be possible to calculate the size of the final
-   result in advance, to a rough approximation at least, and use it to do
-   just one realloc.  Stirling's approximation n! ~= sqrt(2*pi*n)*(n/e)^n
-   (Knuth section 1.2.5) might be of use.  */
-
-/* "inc" in the main loop allocates a chunk more space if not already
-   enough, so as to avoid repeated reallocs.  The final step on the other
-   hand requires only one more limb.  */
-#define MULDIV(inc)                                                     \
-  do {                                                                  \
-    ASSERT (rsize <= ralloc);                                           \
-                                                                        \
-    if (rsize == ralloc)                                                \
-      {                                                                 \
-        mp_size_t  new_ralloc = ralloc + (inc);                         \
-        rp = __GMP_REALLOCATE_FUNC_LIMBS (rp, ralloc, new_ralloc);      \
-        ralloc = new_ralloc;                                            \
-      }                                                                 \
-                                                                        \
-    rp[rsize] = mpn_mul_1 (rp, rp, rsize, nacc);                        \
-    MPN_DIVREM_OR_DIVEXACT_1 (rp, rp, rsize+1, kacc);                   \
-    rsize += (rp[rsize] != 0);                                          \
-                                                                        \
-} while (0)
+/* Algorithm:
  
-void
-mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+   Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8)
+   which are then accumulated into mpn numbers.  The first inner loop
+   accumulates divisor factors, the 2nd inner loop accumulates exactly the same
+   number of dividend factors.  We avoid accumulating more for the divisor,
+   even with its smaller factors, since we else cannot guarantee divisibility.
+
+   Since we know each division will yield an integer, we compute the quotient
+   using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod
+   2^t.
+
+   Improvements:
+
+   (1) An obvious improvement to this code would be to compute mod 2^t
+   everywhere.  Unfortunately, we cannot determine t beforehand, unless we
+   invoke some approximation, such as Stirling's formula.  Of course, we don't
+   need t to be tight.  However, it is not clear that this would help much,
+   our numbers are kept reasonably small already.
+
+   (2) Compute nmax/kmax semi-accurately, without scalar division or a loop.
+   Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index,
+   would make it both reasonably accurate and fast.  (We could use a table
+   stored into a limb, perhaps.)  The table should take the removed factors of
+   2 into account (those done on-the-fly in mulN).
+
+   (3) The first time in the loop we compute the odd part of a
+   factorial in kp, we might use oddfac_1 for this task.
+ */
+
+/* This threshold determines how large divisor to accumulate before we call
+   bdiv.  Perhaps we should never call bdiv, and accumulate all we are told,
+   since we are just basecase code anyway?  Presumably, this depends on the
+   relative speed of the asymptotically fast code and this code.  */
+#define SOME_THRESHOLD 20
+
+/* Multiply-into-limb functions.  These remove factors of 2 on-the-fly.  FIXME:
+   All versions of MAXFACS don't take this 2 removal into account now, meaning
+   that then, shifting just adds some overhead.  (We remove factors from the
+   completed limb anyway.)  */
+
+static mp_limb_t
+mul1 (mp_limb_t m)
+{
+  return m;
+}
+
+static mp_limb_t
+mul2 (mp_limb_t m)
  {
-  unsigned long int  i, j;
-  mp_limb_t          nacc, kacc;
-  unsigned long int  cnt;
-  mp_size_t          rsize, ralloc;
-  mp_ptr             rp;
-
-  /* bin(n,k) = 0 if k>n. */
-  if (n < k)
+  /* We need to shift before multiplying, to avoid an overflow. */
+  mp_limb_t m01 = (m | 1) * ((m + 1) >> 1);
+  return m01;
+}
+
+static mp_limb_t
+mul3 (mp_limb_t m)
+{
+  mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
+  mp_limb_t m2 = (m + 2);
+  return m01 * m2;
+}
+
+static mp_limb_t
+mul4 (mp_limb_t m)
+{
+  mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
+  mp_limb_t m23 = (m + 2) * (m + 3) >> 1;
+  return m01 * m23;
+}
+
+static mp_limb_t
+mul5 (mp_limb_t m)
+{
+  mp_limb_t m012 = (m + 0) * (m + 1) * (m + 2) >> 1;
+  mp_limb_t m34 = (m + 3) * (m + 4) >> 1;
+  return m012 * m34;
+}
+
+static mp_limb_t
+mul6 (mp_limb_t m)
+{
+  mp_limb_t m01 = (m + 0) * (m + 1);
+  mp_limb_t m23 = (m + 2) * (m + 3);
+  mp_limb_t m45 = (m + 4) * (m + 5) >> 1;
+  mp_limb_t m0123 = m01 * m23 >> 3;
+  return m0123 * m45;
+}
+
+static mp_limb_t
+mul7 (mp_limb_t m)
+{
+  mp_limb_t m01 = (m + 0) * (m + 1);
+  mp_limb_t m23 = (m + 2) * (m + 3);
+  mp_limb_t m456 = (m + 4) * (m + 5) * (m + 6) >> 1;
+  mp_limb_t m0123 = m01 * m23 >> 3;
+  return m0123 * m456;
+}
+
+static mp_limb_t
+mul8 (mp_limb_t m)
+{
+  mp_limb_t m01 = (m + 0) * (m + 1);
+  mp_limb_t m23 = (m + 2) * (m + 3);
+  mp_limb_t m45 = (m + 4) * (m + 5);
+  mp_limb_t m67 = (m + 6) * (m + 7);
+  mp_limb_t m0123 = m01 * m23 >> 3;
+  mp_limb_t m4567 = m45 * m67 >> 3;
+  return m0123 * m4567;
+}
+
+typedef mp_limb_t (* mulfunc_t) (mp_limb_t);
+
+static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8};
+#define M (numberof(mulfunc))
+
+/* Number of factors-of-2 removed by the corresponding mulN functon.  */
+static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6};
+
+#if 1
+/* This variant is inaccurate but share the code with other functions.  */
+#define MAXFACS(max,l)                                                 \
+  do {                                                                 \
+    (max) = log_n_max (l);                                             \
+  } while (0)
+#else
+
+/* This variant is exact(?) but uses a loop.  It takes the 2 removal
+ of mulN into account.  */
+static const unsigned long ftab[] =
+#if GMP_NUMB_BITS == 64
+  /* 1 to 8 factors per iteration */
+  {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x100000000),0x32cbfe,0x16a0b,0x24c4,0xa16,0x34b,0x1b2 /*,0xdf,0x8d */};
+#endif
+#if GMP_NUMB_BITS == 32
+  /* 1 to 7 factors per iteration */
+  {0xffffffff,0x10000,0x801,0x16b,0x71,0x42,0x26 /* ,0x1e */};
+#endif
+
+#define MAXFACS(max,l)                                                 \
+  do {                                                                 \
+    int __i;                                                           \
+    for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--)              \
+      ;                                                                        \
+    (max) = __i + 1;                                                   \
+  } while (0)
+#endif
+
+/* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis
+   is an odd integer. */
+static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE };
+
+static void
+mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  int nmax, kmax, nmaxnow, numfac;
+  mp_ptr np, kp;
+  mp_size_t nn, kn, alloc;
+  mp_limb_t i, j, t, iii, jjj, cy, dinv;
+  mp_bitcnt_t i2cnt, j2cnt;
+  int cnt;
+  mp_size_t maxn;
+  TMP_DECL;
+
+  ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT);
+  TMP_MARK;
+
+  maxn = 1 + n / GMP_NUMB_BITS;    /* absolutely largest result size (limbs) */
+
+  /* FIXME: This allocation might be insufficient, but is usually way too
+     large.  */
+  alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD);
+  alloc = MIN (alloc, k) + 1;
+  np = TMP_ALLOC_LIMBS (alloc);
+  kp = TMP_ALLOC_LIMBS (SOME_THRESHOLD + 1);
+
+  MAXFACS (nmax, n);
+  ASSERT (nmax <= M);
+  MAXFACS (kmax, k);
+  ASSERT (kmax <= M);
+  ASSERT (k >= M);
+
+  i = n - k + 1;
+
+  np[0] = 1; nn = 1;
+
+  i2cnt = 0;                           /* total low zeros in dividend */
+  j2cnt = __gmp_fac2cnt_table[ODD_FACTORIAL_TABLE_LIMIT / 2 - 1];
+                                       /* total low zeros in divisor */
+
+  numfac = 1;
+  j = ODD_FACTORIAL_TABLE_LIMIT + 1;
+  jjj = ODD_FACTORIAL_TABLE_MAX;
+  ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX);
+
+  while (1)
+    {
+      kp[0] = jjj;                             /* store new factors */
+      kn = 1;
+      t = k - j + 1;
+      kmax = MIN (kmax, t);
+
+      while (kmax != 0 && kn < SOME_THRESHOLD)
+       {
+         jjj = mulfunc[kmax - 1] (j);
+         j += kmax;                            /* number of factors used */
+         count_trailing_zeros (cnt, jjj);      /* count low zeros */
+         jjj >>= cnt;                          /* remove remaining low zeros */
+         j2cnt += tcnttab[kmax - 1] + cnt;     /* update low zeros count */
+         cy = mpn_mul_1 (kp, kp, kn, jjj);     /* accumulate new factors */
+         kp[kn] = cy;
+         kn += cy != 0;
+         t = k - j + 1;
+         kmax = MIN (kmax, t);
+       }
+      numfac = j - numfac;
+
+      while (numfac != 0)
+       {
+         nmaxnow = MIN (nmax, numfac);
+         iii = mulfunc[nmaxnow - 1] (i);
+         i += nmaxnow;                         /* number of factors used */
+         count_trailing_zeros (cnt, iii);      /* count low zeros */
+         iii >>= cnt;                          /* remove remaining low zeros */
+         i2cnt += tcnttab[nmaxnow - 1] + cnt;  /* update low zeros count */
+         cy = mpn_mul_1 (np, np, nn, iii);     /* accumulate new factors */
+         np[nn] = cy;
+         nn += cy != 0;
+         numfac -= nmaxnow;
+       }
+
+      ASSERT (nn < alloc);
+
+      binvert_limb (dinv, kp[0]);
+      nn += (np[nn - 1] >= kp[kn - 1]);
+      nn -= kn;
+      mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
+
+      if (kmax == 0)
+       break;
+      numfac = j;
+
+      jjj = mulfunc[kmax - 1] (j);
+      j += kmax;                               /* number of factors used */
+      count_trailing_zeros (cnt, jjj);         /* count low zeros */
+      jjj >>= cnt;                             /* remove remaining low zeros */
+      j2cnt += tcnttab[kmax - 1] + cnt;                /* update low zeros count */
+    }
+
+  /* Put back the right number of factors of 2.  */
+  cnt = i2cnt - j2cnt;
+  if (cnt != 0)
      {
-      SIZ(r) = 0;
-      return;
+      ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */
+      cy = mpn_lshift (np, np, nn, cnt);
+      np[nn] = cy;
+      nn += cy != 0;
      }
  
-  rp = PTR(r);
+  nn -= np[nn - 1] == 0;       /* normalisation */
  
-  /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
-  k = MIN (k, n-k);
+  kp = MPZ_NEWALLOC (r, nn);
+  SIZ(r) = nn;
+  MPN_COPY (kp, np, nn);
+  TMP_FREE;
+}
  
-  /* bin(n,0) = 1 */
-  if (k == 0)
+static void
+mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  int nmax, numfac;
+  mp_ptr rp;
+  mp_size_t rn, alloc;
+  mp_limb_t i, iii, cy;
+  mp_bitcnt_t i2cnt, cnt;
+
+  count_leading_zeros (cnt, (mp_limb_t) n);
+  cnt = GMP_LIMB_BITS - cnt;
+  alloc = cnt * k / GMP_NUMB_BITS + 3; /* FIXME: ensure rounding is enough. */
+  rp = MPZ_NEWALLOC (r, alloc);
+
+  MAXFACS (nmax, n);
+  nmax = MIN (nmax, M);
+
+  i = n - k + 1;
+
+  nmax = MIN (nmax, k);
+  rp[0] = mulfunc[nmax - 1] (i);
+  rn = 1;
+  i += nmax;                           /* number of factors used */
+  i2cnt = tcnttab[nmax - 1];           /* low zeros count */
+  numfac = k - nmax;
+  while (numfac != 0)
      {
-      SIZ(r) = 1;
-      rp[0] = 1;
-      return;
+      nmax = MIN (nmax, numfac);
+      iii = mulfunc[nmax - 1] (i);
+      i += nmax;                       /* number of factors used */
+      i2cnt += tcnttab[nmax - 1];      /* update low zeros count */
+      cy = mpn_mul_1 (rp, rp, rn, iii);        /* accumulate new factors */
+      rp[rn] = cy;
+      rn += cy != 0;
+      numfac -= nmax;
      }
  
-  j = n - k + 1;
-  rp[0] = j;
-  rsize = 1;
-  ralloc = ALLOC(r);
+  ASSERT (rn < alloc);
+
+  mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2],
+                   __gmp_fac2cnt_table[k / 2 - 1] - i2cnt);
+  /* A two-fold, branch-free normalisation is possible :*/
+  /* rn -= rp[rn - 1] == 0; */
+  /* rn -= rp[rn - 1] == 0; */
+  MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+  SIZ(r) = rn;
+}
+
+/* Algorithm:
+
+   Plain and simply multiply things together.
+
+   We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such
+   that k!/2^t is odd).
+
+*/
+
+static mp_limb_t
+bc_bin_uiui (unsigned int n, unsigned int k)
+{
+  return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2])
+    << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1]))
+    & GMP_NUMB_MASK;
+}
+
+/* Algorithm:
+
+   Recursively exploit the relation
+   bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) .
+
+   Values for binomial(k,k>>1) that fit in a limb are precomputed
+   (with inverses).
+*/
+
+/* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] =
+   binomial(i*2,i)/2^t (where t is chosen so that it is odd). */
+static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE };
+
+/* bin2kkinv[i] = bin2kk[i]^-1 mod B */
+static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE };
+
+/* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */
+static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE };
+
+static void
+mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  mp_ptr rp;
+  mp_size_t rn;
+  unsigned long int hk;
+
+  hk = k >> 1;
+
+  if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT)
+    mpz_smallk_bin_uiui (r, n, hk);
+  else
+    mpz_smallkdc_bin_uiui (r, n, hk);
+  k -= hk;
+  n -= hk;
+  if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) {
+    mp_limb_t cy;
+    rn = SIZ (r);
+    rp = MPZ_REALLOC (r, rn + 1);
+    cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k));
+    rp [rn] = cy;
+    rn += cy != 0;
+  } else {
+    mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3];
+    mpz_t t;
+
+    ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3;
+    PTR (t) = buffer;
+    if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT)
+      mpz_smallk_bin_uiui (t, n, k);
+    else
+      mpz_smallkdc_bin_uiui (t, n, k);
+    mpz_mul (r, r, t);
+    rp = PTR (r);
+    rn = SIZ (r);
+  }
+
+  mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET],
+                   bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET],
+                   fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk));
+  /* A two-fold, branch-free normalisation is possible :*/
+  /* rn -= rp[rn - 1] == 0; */
+  /* rn -= rp[rn - 1] == 0; */
+  MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+  SIZ(r) = rn;
+}
+
+/* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K).
+ *
+ * Contributed to the GNU project by Marco Bodrato.
+ *
+ * Implementation of the algorithm by P. Goetgheluck, "Computing
+ * Binomial Coefficients", The American Mathematical Monthly, Vol. 94,
+ * No. 4 (April 1987), pp. 360-365.
+ *
+ * Acknowledgment: Peter Luschny did spot the slowness of the previous
+ * code and suggested the reference.
+ */
+
+/* TODO: Remove duplicated constants / macros / static functions...
+ */
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)                 \
+  if ((PR) > (MAX_PR)) {                                       \
+    (VEC)[(I)++] = (PR);                                       \
+    (PR) = 1;                                                  \
+  }
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)               \
+  do {                                                         \
+    if ((PR) > (MAX_PR)) {                                     \
+      (VEC)[(I)++] = (PR);                                     \
+      (PR) = (P);                                              \
+    } else                                                     \
+      (PR) *= (P);                                             \
+  } while (0)
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)                        \
+    __max_i = (end);                                           \
+                                                               \
+    do {                                                       \
+      ++__i;                                                   \
+      if (((sieve)[__index] & __mask) == 0)                    \
+       {                                                       \
+         (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)         \
+  do {                                                         \
+    mp_limb_t __mask, __index, __max_i, __i;                   \
+                                                               \
+    __i = (start)-(off);                                       \
+    __index = __i / GMP_LIMB_BITS;                             \
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);            \
+    __i += (off);                                              \
+                                                               \
+    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
+
+#define LOOP_ON_SIEVE_STOP                                     \
+       }                                                       \
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);      \
+      __index += __mask & 1;                                   \
+    }  while (__i <= __max_i)                                  \
+
+#define LOOP_ON_SIEVE_END                                      \
+    LOOP_ON_SIEVE_STOP;                                                \
+  } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
  
-  /* Initialize accumulators.  */
-  nacc = 1;
-  kacc = 1;
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
  
-  for (i = 2; i <= k; i++)
+/*********************************************************/
+/* Section binomial: fast binomial implementation        */
+/*********************************************************/
+
+#define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I)     \
+  do {                                                 \
+    mp_limb_t __a, __b, __prime, __ma,__mb;            \
+    __prime = (P);                                     \
+    __a = (N); __b = (K); __mb = 0;                    \
+    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);            \
+    do {                                               \
+      __mb += __b % __prime; __b /= __prime;           \
+      __ma = __a % __prime; __a /= __prime;            \
+      if (__ma < __mb) {                               \
+        __mb = 1; (PR) *= __prime;                     \
+      } else  __mb = 0;                                        \
+    } while (__a >= __prime);                          \
+  } while (0)
+
+#define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I)  \
+  do {                                                 \
+    mp_limb_t __prime;                                 \
+    __prime = (P);                                     \
+    if (((N) % __prime) < ((K) % __prime)) {           \
+      FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \
+    }                                                  \
+  } while (0)
+
+/* Returns an approximation of the sqare root of x.  *
+ * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4    */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+  int s;
+
+  ASSERT (x > 2);
+  count_leading_zeros (s, x - 1);
+  s = GMP_LIMB_BITS - 1 - s;
+  return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
+}
+
+static void
+mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  mp_limb_t *sieve, *factors, count;
+  mp_limb_t prod, max_prod, j;
+  TMP_DECL;
+
+  ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13);
+  ASSERT (n >= 25);
+
+  TMP_MARK;
+  sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
+
+  count = gmp_primesieve (sieve, n) + 1;
+  factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1);
+
+  max_prod = GMP_NUMB_MAX / n;
+
+  /* Handle primes = 2, 3 separately. */
+  popc_limb (count, n - k);
+  popc_limb (j, k);
+  count += j;
+  popc_limb (j, n);
+  count -= j;
+  prod = CNST_LIMB(1) << count;
+
+  j = 0;
+  COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j);
+
+  /* Accumulate prime factors from 5 to n/2 */
+    {
+      mp_limb_t s;
+
+      {
+       mp_limb_t prime;
+       s = limb_apprsqrt(n);
+       s = n_to_bit (s);
+       LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
+       COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
+       LOOP_ON_SIEVE_END;
+       s++;
+      }
+
+      ASSERT (max_prod <= GMP_NUMB_MAX / 2);
+      max_prod <<= 1;
+      ASSERT (bit_to_n (s) * bit_to_n (s) > n);
+      ASSERT (s <= n_to_bit (n >> 1));
+      {
+       mp_limb_t prime;
+
+       LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n >> 1), 0,sieve);
+       SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
+       LOOP_ON_SIEVE_END;
+      }
+      max_prod >>= 1;
+    }
+
+  /* Store primes from (n-k)+1 to n */
+  ASSERT (n_to_bit (n - k) < n_to_bit (n));
      {
-      mp_limb_t n1, n0;
-
-      /* Remove common 2 factors.  */
-      cnt = ((nacc | kacc) & 1) ^ 1;
-      nacc >>= cnt;
-      kacc >>= cnt;
-
-      j++;
-      /* Accumulate next multiples.  */
-      umul_ppmm (n1, n0, nacc, (mp_limb_t) j << GMP_NAIL_BITS);
-      n0 >>= GMP_NAIL_BITS;
-      if (n1 == 0)
-        {
-          /* Save new products in accumulators to keep accumulating.  */
-          nacc = n0;
-          kacc = kacc * i;
-        }
-      else
-        {
-          /* Accumulator overflow.  Perform bignum step.  */
-          MULDIV (32);
-          nacc = j;
-          kacc = i;
-        }
+      mp_limb_t prime;
+      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve);
+      FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+      LOOP_ON_SIEVE_END;
      }
  
-  /* Take care of whatever is left in accumulators.  */
-  MULDIV (1);
+  if (LIKELY (j != 0))
+    {
+      factors[j++] = prod;
+      mpz_prodlimbs (r, factors, j);
+    }
+  else
+    {
+      PTR (r)[0] = prod;
+      SIZ (r) = 1;
+    }
+  TMP_FREE;
+}
  
-  ALLOC(r) = ralloc;
-  SIZ(r) = rsize;
-  PTR(r) = rp;
+#undef COUNT_A_PRIME
+#undef SH_COUNT_A_PRIME
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
+
+/*********************************************************/
+/* End of implementation of Goetgheluck's algorithm      */
+/*********************************************************/
+
+void
+mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+  if (UNLIKELY (n < k)) {
+    SIZ (r) = 0;
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+  } else if (UNLIKELY (n > GMP_NUMB_MAX)) {
+    mpz_t tmp;
+
+    mpz_init_set_ui (tmp, n);
+    mpz_bin_ui (r, tmp, k);
+    mpz_clear (tmp);
+#endif
+  } else {
+    ASSERT (n <= GMP_NUMB_MAX);
+    /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
+    k = MIN (k, n - k);
+    if (k < 2) {
+      PTR(r)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */
+      SIZ(r) = 1;
+    } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */
+      PTR(r)[0] = bc_bin_uiui (n, k);
+      SIZ(r) = 1;
+    } else if (k <= ODD_FACTORIAL_TABLE_LIMIT)
+      mpz_smallk_bin_uiui (r, n, k);
+    else if (BIN_UIUI_ENABLE_SMALLDC &&
+            k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2)
+      mpz_smallkdc_bin_uiui (r, n, k);
+    else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) &&
+            k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */
+      mpz_goetgheluck_bin_uiui (r, n, k);
+    else
+      mpz_bdiv_bin_uiui (r, n, k);
+  }
  }
diff --git a/mpz/cdiv_q.c b/mpz/cdiv_q.c

index 2e663637ec4cb31839a2cf61a7eff7b85047cc79..4f00227decb60b005bb11574df9b3dfec55437d2 100644 (file)
--- a/mpz/cdiv_q.c
+++ b/mpz/cdiv_q.c
@@ -1,7 +1,8 @@
  /* mpz_cdiv_q -- Division rounding the quotient towards +infinity.  The
     remainder gets the opposite sign as the denominator.
  
-Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_cdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
  {
-  mp_size_t dividend_size = dividend->_mp_size;
-  mp_size_t divisor_size = divisor->_mp_size;
+  mp_size_t dividend_size = SIZ (dividend);
+  mp_size_t divisor_size = SIZ (divisor);
    mpz_t rem;
    TMP_DECL;
  
@@ -35,7 +36,7 @@ mpz_cdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
  
    mpz_tdiv_qr (quot, rem, dividend, divisor);
  
-  if ((divisor_size ^ dividend_size) >= 0 && rem->_mp_size != 0)
+  if ((divisor_size ^ dividend_size) >= 0 && SIZ (rem) != 0)
      mpz_add_ui (quot, quot, 1L);
  
    TMP_FREE;
diff --git a/mpz/cdiv_q_ui.c b/mpz/cdiv_q_ui.c

index b757ea5fa26dd8dccf4657af20a6c282e61b9be0..c4c6ad4a7cc43ed5c83f9c4ac63f02fbd5e850bc 100644 (file)
--- a/mpz/cdiv_q_ui.c
+++ b/mpz/cdiv_q_ui.c
@@ -3,7 +3,8 @@
     always fit into the return type, the negative of the true remainder is
     returned.
  
-Copyright 1994, 1996, 1999, 2001, 2002, 2004 Free Software Foundation, Inc.
+Copyright 1994, 1996, 1999, 2001, 2002, 2004, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -30,7 +31,7 @@ mpz_cdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
    mp_ptr np, qp;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
@@ -41,8 +42,7 @@ mpz_cdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
      }
  
    nn = ABS(ns);
-  MPZ_REALLOC (quot, nn);
-  qp = PTR(quot);
+  qp = MPZ_REALLOC (quot, nn);
    np = PTR(dividend);
  
  #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
diff --git a/mpz/cdiv_qr.c b/mpz/cdiv_qr.c

index 197ae505e8f388435ca8305713408130df467ed4..e2319272ce86849e57b5ed64ccd4a1060d670dfa 100644 (file)
--- a/mpz/cdiv_qr.c
+++ b/mpz/cdiv_qr.c
@@ -1,7 +1,8 @@
  /* mpz_cdiv_qr -- Division rounding the quotient towards +infinity.  The
     remainder gets the opposite sign as the denominator.
  
-Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,7 +25,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_cdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
  {
-  mp_size_t divisor_size = divisor->_mp_size;
+  mp_size_t divisor_size = SIZ (divisor);
    mp_size_t xsize;
    mpz_t temp_divisor;          /* N.B.: lives until function returns! */
    TMP_DECL;
@@ -41,10 +42,10 @@ mpz_cdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
        divisor = temp_divisor;
      }
  
-  xsize = dividend->_mp_size ^ divisor_size;;
+  xsize = SIZ (dividend) ^ divisor_size;;
    mpz_tdiv_qr (quot, rem, dividend, divisor);
  
-  if (xsize >= 0 && rem->_mp_size != 0)
+  if (xsize >= 0 && SIZ (rem) != 0)
      {
        mpz_add_ui (quot, quot, 1L);
        mpz_sub (rem, rem, divisor);
diff --git a/mpz/cdiv_qr_ui.c b/mpz/cdiv_qr_ui.c

index 67e80b7ac72ead438723740771f38051e10362b3..eeec6eddd06b94c5587dbdb71d0ea7b4834ae1d7 100644 (file)
--- a/mpz/cdiv_qr_ui.c
+++ b/mpz/cdiv_qr_ui.c
@@ -3,8 +3,8 @@
     always fit into the return type, the negative of the true remainder is
     returned.
  
-Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -31,7 +31,7 @@ mpz_cdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long in
    mp_ptr np, qp;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
@@ -43,8 +43,7 @@ mpz_cdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long in
      }
  
    nn = ABS(ns);
-  MPZ_REALLOC (quot, nn);
-  qp = PTR(quot);
+  qp = MPZ_REALLOC (quot, nn);
    np = PTR(dividend);
  
  #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
@@ -54,8 +53,7 @@ mpz_cdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long in
        mp_ptr rp;
        mp_size_t rn;
  
-      MPZ_REALLOC (rem, 2);
-      rp = PTR(rem);
+      rp = MPZ_REALLOC (rem, 2);
  
        if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
         {
diff --git a/mpz/cdiv_r.c b/mpz/cdiv_r.c

index 749276fbdbb9aa613922554152eb12af90f8d14c..67e1ee13425f1aef61f58d939d6ebd3936a0711b 100644 (file)
--- a/mpz/cdiv_r.c
+++ b/mpz/cdiv_r.c
@@ -1,7 +1,7 @@
  /* mpz_cdiv_r -- Division rounding the quotient towards +infinity.  The
     remainder gets the opposite sign as the denominator.
  
-Copyright 1994, 1995, 1996, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2001, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,7 +24,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_cdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
  {
-  mp_size_t divisor_size = divisor->_mp_size;
+  mp_size_t divisor_size = SIZ (divisor);
    mpz_t temp_divisor;          /* N.B.: lives until function returns! */
    TMP_DECL;
  
@@ -43,7 +43,7 @@ mpz_cdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
  
    mpz_tdiv_r (rem, dividend, divisor);
  
-  if ((divisor_size ^ dividend->_mp_size) >= 0 && rem->_mp_size != 0)
+  if ((divisor_size ^ SIZ (dividend)) >= 0 && SIZ (rem) != 0)
      mpz_sub (rem, rem, divisor);
  
    TMP_FREE;
diff --git a/mpz/cdiv_r_ui.c b/mpz/cdiv_r_ui.c

index e889d742a2a1e1cb137dcb1ae1016e32fd7c21c0..46e72507b347fa7e1b2c544d90c3fb45a051142e 100644 (file)
--- a/mpz/cdiv_r_ui.c
+++ b/mpz/cdiv_r_ui.c
@@ -3,8 +3,8 @@
     always fit into the return type, the negative of the true remainder is
     returned.
  
-Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -31,7 +31,7 @@ mpz_cdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
    mp_ptr np;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
@@ -51,8 +51,7 @@ mpz_cdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
        mp_size_t rn;
        TMP_DECL;
  
-      MPZ_REALLOC (rem, 2);
-      rp = PTR(rem);
+      rp = MPZ_REALLOC (rem, 2);
  
        if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
         {
diff --git a/mpz/cdiv_ui.c b/mpz/cdiv_ui.c

index 7b99bee9a93c577bf69be18dbb03a943293d3b30..bfbb25b8d497bd7d0cafa57d8117fcdc6bd8d8c0 100644 (file)
--- a/mpz/cdiv_ui.c
+++ b/mpz/cdiv_ui.c
@@ -3,8 +3,8 @@
     always fit into the return type, the negative of the true remainder is
     returned.
  
-Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -31,7 +31,7 @@ mpz_cdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
    mp_ptr np;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
diff --git a/mpz/cfdiv_q_2exp.c b/mpz/cfdiv_q_2exp.c

index 1d326910c707f6b06b2d5bf25f2003b1a7a3f801..c71d4141369fc14b748982280ccb3403a08b9e3d 100644 (file)
--- a/mpz/cfdiv_q_2exp.c
+++ b/mpz/cfdiv_q_2exp.c
@@ -1,7 +1,7 @@
  /* mpz_cdiv_q_2exp, mpz_fdiv_q_2exp -- quotient from mpz divided by 2^n.
  
-Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2001, 2002, 2004 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2001, 2002, 2004, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,7 +24,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /* dir==1 for ceil, dir==-1 for floor */
  
-static void __gmpz_cfdiv_q_2exp __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int))) REGPARM_ATTR (1);
+static void __gmpz_cfdiv_q_2exp (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int)) REGPARM_ATTR (1);
  #define cfdiv_q_2exp(w,u,cnt,dir)  __gmpz_cfdiv_q_2exp (REGPARM_3_1 (w,u,cnt,dir))
  
  REGPARM_ATTR (1) static void
@@ -73,7 +73,7 @@ cfdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)
      {
        if (wsize != 0)
         {
-          mp_limb_t cy;
+         mp_limb_t cy;
           cy = mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
           wp[wsize] = cy;
           wsize += cy;
diff --git a/mpz/cfdiv_r_2exp.c b/mpz/cfdiv_r_2exp.c

index 5611ad675f5151551ef366be12297db182a54542..205a7bd23eccc2afe1fc26b4031c5dfacf6b7ec3 100644 (file)
--- a/mpz/cfdiv_r_2exp.c
+++ b/mpz/cfdiv_r_2exp.c
@@ -1,6 +1,6 @@
  /* mpz_cdiv_r_2exp, mpz_fdiv_r_2exp -- remainder from mpz divided by 2^n.
  
-Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2004, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -27,7 +27,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /* dir==1 for ceil, dir==-1 for floor */
  
-static void __gmpz_cfdiv_r_2exp __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int))) REGPARM_ATTR (1);
+static void __gmpz_cfdiv_r_2exp (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int)) REGPARM_ATTR (1);
  #define cfdiv_r_2exp(w,u,cnt,dir)  __gmpz_cfdiv_r_2exp (REGPARM_3_1 (w, u, cnt, dir))
  
  REGPARM_ATTR (1) static void
@@ -58,26 +58,25 @@ cfdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)
        /* Round towards zero, means just truncate */
  
        if (w == u)
-        {
-          /* if already smaller than limb_cnt then do nothing */
-          if (abs_usize <= limb_cnt)
-            return;
-          wp = PTR(w);
-        }
+       {
+         /* if already smaller than limb_cnt then do nothing */
+         if (abs_usize <= limb_cnt)
+           return;
+         wp = PTR(w);
+       }
        else
-        {
-          i = MIN (abs_usize, limb_cnt+1);
-          MPZ_REALLOC (w, i);
-          wp = PTR(w);
-          MPN_COPY (wp, up, i);
-
-          /* if smaller than limb_cnt then only the copy is needed */
-          if (abs_usize <= limb_cnt)
-            {
-              SIZ(w) = usize;
-              return;
-            }
-        }
+       {
+         i = MIN (abs_usize, limb_cnt+1);
+         wp = MPZ_REALLOC (w, i);
+         MPN_COPY (wp, up, i);
+
+         /* if smaller than limb_cnt then only the copy is needed */
+         if (abs_usize <= limb_cnt)
+           {
+             SIZ(w) = usize;
+             return;
+           }
+       }
      }
    else
      {
@@ -85,16 +84,16 @@ cfdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)
  
        /* if u!=0 and smaller than divisor, then must negate */
        if (abs_usize <= limb_cnt)
-        goto negate;
+       goto negate;
  
        /* if non-zero low limb, then must negate */
        for (i = 0; i < limb_cnt; i++)
-        if (up[i] != 0)
-          goto negate;
+       if (up[i] != 0)
+         goto negate;
  
        /* if non-zero partial limb, then must negate */
        if ((up[limb_cnt] & LOW_MASK (cnt)) != 0)
-        goto negate;
+       goto negate;
  
        /* otherwise low bits of u are zero, so that's the result */
        SIZ(w) = 0;
@@ -103,18 +102,17 @@ cfdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)
      negate:
        /* twos complement negation to get 2**cnt-u */
  
-      MPZ_REALLOC (w, limb_cnt+1);
+      wp = MPZ_REALLOC (w, limb_cnt+1);
        up = PTR(u);
-      wp = PTR(w);
  
        /* Ones complement */
        i = MIN (abs_usize, limb_cnt+1);
        mpn_com (wp, up, i);
        for ( ; i <= limb_cnt; i++)
-        wp[i] = GMP_NUMB_MAX;
+       wp[i] = GMP_NUMB_MAX;
  
        /* Twos complement.  Since u!=0 in the relevant part, the twos
-         complement never gives 0 and a carry, so can use MPN_INCR_U. */
+        complement never gives 0 and a carry, so can use MPN_INCR_U. */
        MPN_INCR_U (wp, limb_cnt+1, CNST_LIMB(1));
  
        usize = -usize;
@@ -130,10 +128,10 @@ cfdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir)
      {
        limb_cnt--;
        if (limb_cnt < 0)
-        {
-          SIZ(w) = 0;
-          return;
-        }
+       {
+         SIZ(w) = 0;
+         return;
+       }
        high = wp[limb_cnt];
      }
  
diff --git a/mpz/clear.c b/mpz/clear.c

index 0902256c03dbe0bbb5bcd09ddaebccf8d8b97299..7dfc8f412b0d37a71b6e16fc9308b0c22de5edd4 100644 (file)
--- a/mpz/clear.c
+++ b/mpz/clear.c
@@ -1,7 +1,8 @@
  /* mpz_clear -- de-allocate the space occupied by the dynamic digit space of
     an integer.
  
-Copyright 1991, 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,5 +25,5 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_clear (mpz_ptr m)
  {
-  (*__gmp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB);
+  (*__gmp_free_func) (PTR (m), ALLOC (m) * BYTES_PER_MP_LIMB);
  }
diff --git a/mpz/clrbit.c b/mpz/clrbit.c

index d08d684775ae37376dfb5abdf072df2b49ccae69..ffd910d0cd39901ef0536b8e8398010c713db306 100644 (file)
--- a/mpz/clrbit.c
+++ b/mpz/clrbit.c
@@ -1,6 +1,7 @@
  /* mpz_clrbit -- clear a specified bit.
  
-Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,30 +22,30 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpz_clrbit (mpz_ptr d, mp_bitcnt_t bit_index)
+mpz_clrbit (mpz_ptr d, mp_bitcnt_t bit_idx)
  {
-  mp_size_t dsize = d->_mp_size;
-  mp_ptr dp = d->_mp_d;
-  mp_size_t limb_index;
+  mp_size_t dsize = SIZ (d);
+  mp_ptr dp = PTR (d);
+  mp_size_t limb_idx;
+  mp_limb_t mask;
  
-  limb_index = bit_index / GMP_NUMB_BITS;
+  limb_idx = bit_idx / GMP_NUMB_BITS;
+  mask = CNST_LIMB(1) << (bit_idx % GMP_NUMB_BITS);
    if (dsize >= 0)
      {
-      if (limb_index < dsize)
+      if (limb_idx < dsize)
         {
-          mp_limb_t  dlimb;
-          dlimb = dp[limb_index];
-          dlimb &= ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
-          dp[limb_index] = dlimb;
+         mp_limb_t  dlimb;
+         dlimb = dp[limb_idx];
+         dlimb &= ~mask;
+         dp[limb_idx] = dlimb;
  
-          if (UNLIKELY (dlimb == 0 && limb_index == dsize-1))
-            {
-              /* high limb became zero, must normalize */
-              do {
-                dsize--;
-              } while (dsize > 0 && dp[dsize-1] == 0);
-              d->_mp_size = dsize;
-            }
+         if (UNLIKELY (dlimb == 0 && limb_idx == dsize-1))
+           {
+             /* high limb became zero, must normalize */
+             MPN_NORMALIZE (dp, limb_idx);
+             SIZ (d) = limb_idx;
+           }
         }
        else
         ;
@@ -60,51 +61,39 @@ mpz_clrbit (mpz_ptr d, mp_bitcnt_t bit_index)
  
        dsize = -dsize;
  
-      /* No upper bound on this loop, we're sure there's a non-zero limb
-        sooner ot later.  */
-      for (zero_bound = 0; ; zero_bound++)
-       if (dp[zero_bound] != 0)
-         break;
+      /* No index upper bound on this loop, we're sure there's a non-zero limb
+        sooner or later.  */
+      zero_bound = 0;
+      while (dp[zero_bound] == 0)
+       zero_bound++;
  
-      if (limb_index > zero_bound)
+      if (limb_idx > zero_bound)
         {
-         if (limb_index < dsize)
-           dp[limb_index] |= (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
+         if (limb_idx < dsize)
+           dp[limb_idx] |= mask;
           else
             {
               /* Ugh.  The bit should be cleared outside of the end of the
                  number.  We have to increase the size of the number.  */
-             if (UNLIKELY (d->_mp_alloc < limb_index + 1))
-                dp = _mpz_realloc (d, limb_index + 1);
-
-             MPN_ZERO (dp + dsize, limb_index - dsize);
-             dp[limb_index] = (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
-             d->_mp_size = -(limb_index + 1);
+             dp = MPZ_REALLOC (d, limb_idx + 1);
+             SIZ (d) = -(limb_idx + 1);
+             MPN_ZERO (dp + dsize, limb_idx - dsize);
+             dp[limb_idx] = mask;
             }
         }
-      else if (limb_index == zero_bound)
+      else if (limb_idx == zero_bound)
         {
-         dp[limb_index] = ((((dp[limb_index] - 1)
-                             | ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS))) + 1)
-                           & GMP_NUMB_MASK);
-         if (dp[limb_index] == 0)
+         dp[limb_idx] = ((((dp[limb_idx] - 1) | mask) + 1) & GMP_NUMB_MASK);
+         if (dp[limb_idx] == 0)
             {
-             mp_size_t i;
-             for (i = limb_index + 1; i < dsize; i++)
-               {
-                 dp[i] = (dp[i] + 1) & GMP_NUMB_MASK;
-                 if (dp[i] != 0)
-                   goto fin;
-               }
-             /* We got carry all way out beyond the end of D.  Increase
-                its size (and allocation if necessary).  */
-             dsize++;
-             if (UNLIKELY (d->_mp_alloc < dsize))
-                dp = _mpz_realloc (d, dsize);
+             /* Increment at limb_idx + 1.  Extend the number with a zero limb
+                for simplicity.  */
+             dp = MPZ_REALLOC (d, dsize + 1);
+             dp[dsize] = 0;
+             MPN_INCR_U (dp + limb_idx + 1, dsize - limb_idx, 1);
+             dsize += dp[dsize];
  
-             dp[i] = 1;
-             d->_mp_size = -dsize;
-           fin:;
+             SIZ (d) = -dsize;
             }
         }
        else
diff --git a/mpz/cmp.c b/mpz/cmp.c

index a6d9d388931f2e1e3c42b0c75a58e49523a30ecf..b7bf0d967f8d331f28dd7cd9c6685811d386c6a8 100644 (file)
--- a/mpz/cmp.c
+++ b/mpz/cmp.c
@@ -1,7 +1,8 @@
  /* mpz_cmp(u,v) -- Compare U, V.  Return positive, zero, or negative
     based on if U > V, U == V, or U < V.
  
-Copyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2002, 2011 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -18,18 +19,11 @@ License for more details.
  You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
  #include "gmp.h"
  #include "gmp-impl.h"
  
  int
-#ifdef BERKELEY_MP
-mcmp (mpz_srcptr u, mpz_srcptr v)
-#else
  mpz_cmp (mpz_srcptr u, mpz_srcptr v) __GMP_NOTHROW
-#endif
  {
    mp_size_t  usize, vsize, dsize, asize;
    mp_srcptr  up, vp;
diff --git a/mpz/cmp_d.c b/mpz/cmp_d.c

index fd635a68e1b7c06e54d9e907438f8fc90b259ba6..a9a3a9623b83b24f643b15641f775812de180094 100644 (file)
--- a/mpz/cmp_d.c
+++ b/mpz/cmp_d.c
@@ -71,13 +71,13 @@ mpz_cmp_d (mpz_srcptr z, double d)
    if (zsize >= 0)
      {
        if (d < 0.0)
-        return 1;    /* >=0 cmp <0 */
+       return 1;    /* >=0 cmp <0 */
        ret = 1;
      }
    else
      {
        if (d >= 0.0)
-        return -1;   /* <0 cmp >=0 */
+       return -1;   /* <0 cmp >=0 */
        ret = -1;
        d = -d;
        zsize = -zsize;
diff --git a/mpz/cmp_si.c b/mpz/cmp_si.c

index 1919bd33ed4740e9f4ec96094da5484b7e944b5f..9feac962d0ef223aab605295446992b8041dd43d 100644 (file)
--- a/mpz/cmp_si.c
+++ b/mpz/cmp_si.c
@@ -1,8 +1,8 @@
  /* mpz_cmp_si(u,v) -- Compare an integer U with a single-word int V.
     Return positive, zero, or negative based on if U > V, U == V, or U < V.
  
-Copyright 1991, 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2012, 2013 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,10 +25,10 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  int
  _mpz_cmp_si (mpz_srcptr u, signed long int v_digit) __GMP_NOTHROW
  {
-  mp_size_t usize = u->_mp_size;
+  mp_size_t usize = SIZ (u);
    mp_size_t vsize;
    mp_limb_t u_digit;
-  unsigned long int absv_digit = (unsigned long int) v_digit;
+  unsigned long int absv_digit;
  
  #if GMP_NAIL_BITS != 0
    /* FIXME.  This isn't very pretty.  */
@@ -46,8 +46,8 @@ _mpz_cmp_si (mpz_srcptr u, signed long int v_digit) __GMP_NOTHROW
    else if (v_digit < 0)
      {
        vsize = -1;
-      absv_digit = -absv_digit;
      }
+  absv_digit = ABS_CAST (unsigned long int, v_digit);
  
    if (usize != vsize)
      return usize - vsize;
@@ -55,7 +55,7 @@ _mpz_cmp_si (mpz_srcptr u, signed long int v_digit) __GMP_NOTHROW
    if (usize == 0)
      return 0;
  
-  u_digit = u->_mp_d[0];
+  u_digit = PTR (u)[0];
  
    if (u_digit == (mp_limb_t) absv_digit)
      return 0;
diff --git a/mpz/cmpabs_d.c b/mpz/cmpabs_d.c

index f7aadc2620d5ef558e40347355f4df0cd7b4cf09..a5f2085607e0630dac016afc3dfa05c4cb4ba028 100644 (file)
--- a/mpz/cmpabs_d.c
+++ b/mpz/cmpabs_d.c
@@ -1,6 +1,6 @@
  /* mpz_cmpabs_d -- compare absolute values of mpz and double.
  
-Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2003, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -62,7 +62,7 @@ mpz_cmpabs_d (mpz_srcptr z, double d)
    if (d == 0.0)
      return (zsize != 0);
    if (zsize == 0)
-    return (d != 0 ? -1 : 0);
+    return -1; /* d != 0 */
  
    /* 2. Ignore signs. */
    zsize = ABS(zsize);
diff --git a/mpz/com.c b/mpz/com.c

index c403b8ce6a87ce556e0af301d0c807e03a931240..719f3158d381a221a7dbbc04b0a54ef45741e7e3 100644 (file)
--- a/mpz/com.c
+++ b/mpz/com.c
@@ -1,7 +1,8 @@
  /* mpz_com(mpz_ptr dst, mpz_ptr src) -- Assign the bit-complemented value of
     SRC to DST.
  
-Copyright 1991, 1993, 1994, 1996, 2001, 2003 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2003, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,7 +25,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_com (mpz_ptr dst, mpz_srcptr src)
  {
-  mp_size_t size = src->_mp_size;
+  mp_size_t size = SIZ (src);
    mp_srcptr src_ptr;
    mp_ptr dst_ptr;
  
@@ -34,33 +35,27 @@ mpz_com (mpz_ptr dst, mpz_srcptr src)
          But this can be simplified using the identity -x = ~x + 1.
          So we're going to compute (~~x) + 1 = x + 1!  */
  
-      if (dst->_mp_alloc < size + 1)
-       _mpz_realloc (dst, size + 1);
-
-      src_ptr = src->_mp_d;
-      dst_ptr = dst->_mp_d;
-
        if (UNLIKELY (size == 0))
         {
           /* special case, as mpn_add_1 wants size!=0 */
-         dst_ptr[0] = 1;
-         dst->_mp_size = -1;
-         return;
+         PTR (dst)[0] = 1;
+         SIZ (dst) = -1;
         }
+      else
+       {
+         mp_limb_t cy;
  
-      {
-       mp_limb_t cy;
+         dst_ptr = MPZ_REALLOC (dst, size + 1);
  
-       cy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
-       if (cy)
-         {
-           dst_ptr[size] = cy;
-           size++;
-         }
-      }
+         src_ptr = PTR (src);
+
+         cy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+         dst_ptr[size] = cy;
+         size += (cy != 0);
  
-      /* Store a negative size, to indicate ones-extension.  */
-      dst->_mp_size = -size;
+         /* Store a negative size, to indicate ones-extension.  */
+         SIZ (dst) = -size;
+      }
      }
    else
      {
@@ -69,16 +64,14 @@ mpz_com (mpz_ptr dst, mpz_srcptr src)
          So we're going to compute ~~(x - 1) = x - 1!  */
        size = -size;
  
-      if (dst->_mp_alloc < size)
-       _mpz_realloc (dst, size);
+      dst_ptr = MPZ_REALLOC (dst, size);
  
-      src_ptr = src->_mp_d;
-      dst_ptr = dst->_mp_d;
+      src_ptr = PTR (src);
  
        mpn_sub_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
        size -= dst_ptr[size - 1] == 0;
  
        /* Store a positive size, to indicate zero-extension.  */
-      dst->_mp_size = size;
+      SIZ (dst) = size;
      }
  }
diff --git a/mpz/combit.c b/mpz/combit.c

index 8a0ce3a93ea393d697db0f7c3e24b18ff83b1fc0..7f3c9f212dadf828335d150fb7794bf4abd5d713 100644 (file)
--- a/mpz/combit.c
+++ b/mpz/combit.c
@@ -1,6 +1,6 @@
  /* mpz_combit -- complement a specified bit.
  
-Copyright 2002, 2003 Free Software Foundation, Inc.
+Copyright 2002, 2003, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,58 +23,67 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_combit (mpz_ptr d, mp_bitcnt_t bit_index)
  {
-  mp_size_t dsize = ABSIZ(d);
-  mp_ptr dp = LIMBS(d);
+  mp_size_t dsize = SIZ(d);
+  mp_ptr dp = PTR(d);
  
    mp_size_t limb_index = bit_index / GMP_NUMB_BITS;
-  mp_limb_t bit = ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
+  mp_limb_t bit = (CNST_LIMB (1) << (bit_index % GMP_NUMB_BITS));
  
-  if (limb_index >= dsize)
-    {
-      MPZ_REALLOC(d, limb_index + 1);
-      dp = LIMBS(d);
-
-      MPN_ZERO(dp + dsize, limb_index + 1 - dsize);
-      dsize = limb_index + 1;
-    }
+  /* Check for the most common case: Positive input, no realloc or
+     normalization needed. */
+  if (limb_index + 1 < dsize)
+    dp[limb_index] ^= bit;
  
-  if (SIZ(d) >= 0)
+  /* Check for the hairy case. d < 0, and we have all zero bits to the
+     right of the bit to toggle. */
+  else if (limb_index < -dsize && mpn_zero_p (dp, limb_index)
+          && (dp[limb_index] & (bit - 1)) == 0)
      {
-      dp[limb_index] ^= bit;
-      MPN_NORMALIZE (dp, dsize);
-      SIZ(d) = dsize;
+      ASSERT (dsize < 0);
+      dsize = -dsize;
+
+      if (dp[limb_index] & bit)
+       {
+         /* We toggle the least significant one bit. Corresponds to
+            an add, with potential carry propagation, on the absolute
+            value. */
+         dp = MPZ_REALLOC (d, 1 + dsize);
+         dp[dsize] = 0;
+         MPN_INCR_U (dp + limb_index, 1 + dsize - limb_index, bit);
+         SIZ(d) -= dp[dsize];
+       }
+      else
+       {
+         /* We toggle a zero bit, subtract from the absolute value. */
+         MPN_DECR_U (dp + limb_index, dsize - limb_index, bit);
+         MPN_NORMALIZE (dp, dsize);
+         ASSERT (dsize > 0);
+         SIZ(d) = -dsize;
+       }
      }
    else
      {
-      mp_limb_t x = -dp[limb_index];
-      mp_size_t i;
-
-      /* non-zero limb below us means ones-complement */
-      for (i = limb_index-1; i >= 0; i--)
-       if (dp[i] != 0)
-         {
-           x--;  /* change twos comp to ones comp */
-           break;
-         }
-
-      if (x & bit)
+      /* Simple case: Toggle the bit in the absolute value. */
+      dsize = ABS(dsize);
+      if (limb_index < dsize)
         {
-         mp_limb_t  c;
-
-         /* Clearing the bit increases the magitude. We might need a carry. */
-         MPZ_REALLOC(d, dsize + 1);
-         dp = LIMBS(d);
-
-         __GMPN_ADD_1 (c, dp+limb_index, dp+limb_index,
-                       dsize - limb_index, bit);
-         dp[dsize] = c;
-         dsize += c;
+         dp[limb_index] ^= bit;
+
+         /* Can happen only when limb_index = dsize - 1. Avoid SIZ(d)
+            bookkeeping in the common case. */
+         if (dp[dsize-1] == 0)
+           {
+             dsize--;
+             MPN_NORMALIZE (dp, dsize);
+             SIZ (d) = SIZ (d) >= 0 ? dsize : -dsize;
+           }
         }
        else
-       /* Setting the bit decreases the magnitude */
-       mpn_sub_1(dp+limb_index, dp+limb_index, dsize + limb_index, bit);
-
-      MPN_NORMALIZE (dp, dsize);
-      SIZ(d) = -dsize;
+       {
+         dp = MPZ_REALLOC (d, limb_index + 1);
+         MPN_ZERO(dp + dsize, limb_index - dsize);
+         dp[limb_index++] = bit;
+         SIZ(d) = SIZ(d) >= 0 ? limb_index : -limb_index;
+       }
      }
  }
diff --git a/mpz/cong.c b/mpz/cong.c

index 127f5cdd08eff3f612b6869cbfc9f4d06766de21..8dcf0777c46eb09e8be1849e3475118bbd981971 100644 (file)
--- a/mpz/cong.c
+++ b/mpz/cong.c
@@ -94,53 +94,53 @@ mpz_congruent_p (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d)
    if (csize == 1)
      {
        if (dsize == 1)
-        {
-        cong_1:
-          if (sign < 0)
-            NEG_MOD (clow, clow, dlow);
-
-          if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
-            {
-              r = mpn_mod_1 (ap, asize, dlow);
-              if (clow < dlow)
-                return r == clow;
-              else
-                return r == (clow % dlow);
-            }
-
-          if ((dlow & 1) == 0)
-            {
-              /* Strip low zero bits to get odd d required by modexact.  If
-                 d==e*2^n then a==c mod d if and only if both a==c mod e and
-                 a==c mod 2^n, the latter having been done above.  */
-              unsigned  twos;
-              count_trailing_zeros (twos, dlow);
-              dlow >>= twos;
-            }
-
-          r = mpn_modexact_1c_odd (ap, asize, dlow, clow);
-          return r == 0 || r == dlow;
-        }
+       {
+       cong_1:
+         if (sign < 0)
+           NEG_MOD (clow, clow, dlow);
+
+         if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
+           {
+             r = mpn_mod_1 (ap, asize, dlow);
+             if (clow < dlow)
+               return r == clow;
+             else
+               return r == (clow % dlow);
+           }
+
+         if ((dlow & 1) == 0)
+           {
+             /* Strip low zero bits to get odd d required by modexact.  If
+                d==e*2^n then a==c mod d if and only if both a==c mod e and
+                a==c mod 2^n, the latter having been done above.  */
+             unsigned  twos;
+             count_trailing_zeros (twos, dlow);
+             dlow >>= twos;
+           }
+
+         r = mpn_modexact_1c_odd (ap, asize, dlow, clow);
+         return r == 0 || r == dlow;
+       }
  
        /* dlow==0 is avoided since we don't want to bother handling extra low
-         zero bits if dsecond is even (would involve borrow if a,c differ in
-         sign and alow,clow!=0).  */
+        zero bits if dsecond is even (would involve borrow if a,c differ in
+        sign and alow,clow!=0).  */
        if (dsize == 2 && dlow != 0)
-        {
-          mp_limb_t  dsecond = dp[1];
-
-          if (dsecond <= dmask)
-            {
-              unsigned   twos;
-              count_trailing_zeros (twos, dlow);
-              dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
-              ASSERT_LIMB (dlow);
-
-              /* dlow will be odd here, so the test for it even under cong_1
-                 is unnecessary, but the rest of that code is wanted. */
-              goto cong_1;
-            }
-        }
+       {
+         mp_limb_t  dsecond = dp[1];
+
+         if (dsecond <= dmask)
+           {
+             unsigned   twos;
+             count_trailing_zeros (twos, dlow);
+             dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
+             ASSERT_LIMB (dlow);
+
+             /* dlow will be odd here, so the test for it even under cong_1
+                is unnecessary, but the rest of that code is wanted. */
+             goto cong_1;
+           }
+       }
      }
  
    TMP_MARK;
@@ -151,9 +151,9 @@ mpz_congruent_p (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d)
      {
        /* same signs, subtract */
        if (asize > csize || mpn_cmp (ap, cp, asize) >= 0)
-        ASSERT_NOCARRY (mpn_sub (xp, ap, asize, cp, csize));
+       ASSERT_NOCARRY (mpn_sub (xp, ap, asize, cp, csize));
        else
-        ASSERT_NOCARRY (mpn_sub_n (xp, cp, ap, asize));
+       ASSERT_NOCARRY (mpn_sub_n (xp, cp, ap, asize));
        MPN_NORMALIZE (xp, asize);
      }
    else
diff --git a/mpz/cong_2exp.c b/mpz/cong_2exp.c

index bf3ae54cc01c3d5be040ec68e67924575937f484..b4fb08013fbeddc8bfec3e48690aa984a09f1e05 100644 (file)
--- a/mpz/cong_2exp.c
+++ b/mpz/cong_2exp.c
@@ -55,23 +55,23 @@ mpz_congruent_2exp_p (mpz_srcptr a, mpz_srcptr c, mp_bitcnt_t d) __GMP_NOTHROW
  
        /* a==c for limbs in common */
        if (mpn_cmp (ap, cp, MIN (csize, dlimbs)) != 0)
-        return 0;
+       return 0;
  
        /* if that's all of dlimbs, then a==c for remaining bits */
        if (csize > dlimbs)
-        return ((ap[dlimbs]-cp[dlimbs]) & dmask) == 0;
+       return ((ap[dlimbs]-cp[dlimbs]) & dmask) == 0;
  
      a_zeros:
        /* a remains, need all zero bits */
  
        /* if d covers all of a and c, then must be exactly equal */
        if (asize <= dlimbs)
-        return asize == csize;
+       return asize == csize;
  
        /* whole limbs zero */
        for (i = csize; i < dlimbs; i++)
-        if (ap[i] != 0)
-          return 0;
+       if (ap[i] != 0)
+         return 0;
  
        /* partial limb zero */
        return (ap[dlimbs] & dmask) == 0;
@@ -81,63 +81,63 @@ mpz_congruent_2exp_p (mpz_srcptr a, mpz_srcptr c, mp_bitcnt_t d) __GMP_NOTHROW
        /* different signs, negated comparison */
  
        /* common low zero limbs, stopping at first non-zeros, which must
-         match twos complement */
+        match twos complement */
        i = 0;
        for (;;)
-        {
-          ASSERT (i < csize);  /* always have a non-zero limb on c */
-          alimb = ap[i];
-          climb = cp[i];
-          sum = (alimb + climb) & GMP_NUMB_MASK;
+       {
+         ASSERT (i < csize);  /* always have a non-zero limb on c */
+         alimb = ap[i];
+         climb = cp[i];
+         sum = (alimb + climb) & GMP_NUMB_MASK;
  
-          if (i >= dlimbs)
-            return (sum & dmask) == 0;
-          i++;
+         if (i >= dlimbs)
+           return (sum & dmask) == 0;
+         i++;
  
-          /* require both zero, or first non-zeros as twos-complements */
-          if (sum != 0)
-            return 0;
+         /* require both zero, or first non-zeros as twos-complements */
+         if (sum != 0)
+           return 0;
  
-          if (alimb != 0)
-            break;
-        }
+         if (alimb != 0)
+           break;
+       }
  
        /* further limbs matching as ones-complement */
        for (;;)
-        {
-          if (i >= csize)
-            break;
+       {
+         if (i >= csize)
+           break;
  
-          alimb = ap[i];
-          climb = cp[i];
-          sum = (alimb + climb + 1) & GMP_NUMB_MASK;
+         alimb = ap[i];
+         climb = cp[i];
+         sum = (alimb + climb + 1) & GMP_NUMB_MASK;
  
-          if (i >= dlimbs)
-            return (sum & dmask) == 0;
+         if (i >= dlimbs)
+           return (sum & dmask) == 0;
  
-          if (sum != 0)
-            return 0;
+         if (sum != 0)
+           return 0;
  
-          i++;
-        }
+         i++;
+       }
  
        /* no more c, so require all 1 bits in a */
  
        if (asize < dlimbs)
-        return 0;   /* not enough a */
+       return 0;   /* not enough a */
  
        /* whole limbs */
        for ( ; i < dlimbs; i++)
-        if (ap[i] != GMP_NUMB_MAX)
-          return 0;
+       if (ap[i] != GMP_NUMB_MAX)
+         return 0;
  
        /* if only whole limbs, no further fetches from a */
        if (dbits == 0)
-        return 1;
+       return 1;
  
        /* need enough a */
        if (asize == dlimbs)
-        return 0;
+       return 0;
  
        return ((ap[dlimbs]+1) & dmask) == 0;
      }
diff --git a/mpz/cong_ui.c b/mpz/cong_ui.c

index ee68c104b9132f813b4ed727814116a3749caa68..539e95fe179e745b77c873a34068d091076045d0 100644 (file)
--- a/mpz/cong_ui.c
+++ b/mpz/cong_ui.c
@@ -1,6 +1,6 @@
  /* mpz_congruent_ui_p -- test congruence of mpz and ulong.
  
-Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -42,9 +42,9 @@ mpz_congruent_ui_p (mpz_srcptr a, unsigned long cu, unsigned long du)
    if (asize == 0)
      {
        if (cu < du)
-        return cu == 0;
+       return cu == 0;
        else
-        return (cu % du) == 0;
+       return (cu % du) == 0;
      }
  
    /* For nails don't try to be clever if c or d is bigger than a limb, just
@@ -80,21 +80,21 @@ mpz_congruent_ui_p (mpz_srcptr a, unsigned long cu, unsigned long du)
      {
        r = mpn_mod_1 (ap, asize, d);
        if (c < d)
-        return r == c;
+       return r == c;
        else
-        return r == (c % d);
+       return r == (c % d);
      }
  
    if ((d & 1) == 0)
      {
        /* Strip low zero bits to get odd d required by modexact.  If
-         d==e*2^n then a==c mod d if and only if both a==c mod 2^n
-         and a==c mod e.  */
+        d==e*2^n then a==c mod d if and only if both a==c mod 2^n
+        and a==c mod e.  */
  
-      unsigned  twos;
+      unsigned twos;
  
        if ((ap[0]-c) & LOW_ZEROS_MASK (d))
-        return 0;
+       return 0;
  
        count_trailing_zeros (twos, d);
        d >>= twos;
diff --git a/mpz/dive_ui.c b/mpz/dive_ui.c

index 53709aa3b7c9db41ea884978e26ff8b5520e29df..34f826dc3cd3c78a0ac3787a1ce5489312cde068 100644 (file)
--- a/mpz/dive_ui.c
+++ b/mpz/dive_ui.c
@@ -1,6 +1,6 @@
  /* mpz_divexact_ui -- exact division mpz by ulong.
  
-Copyright 2001, 2002 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,7 +26,7 @@ mpz_divexact_ui (mpz_ptr dst, mpz_srcptr src, unsigned long divisor)
    mp_size_t  size, abs_size;
    mp_ptr     dst_ptr;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    /* For nails don't try to be clever if d is bigger than a limb, just fake
@@ -50,8 +50,7 @@ mpz_divexact_ui (mpz_ptr dst, mpz_srcptr src, unsigned long divisor)
      }
    abs_size = ABS (size);
  
-  MPZ_REALLOC (dst, abs_size);
-  dst_ptr = PTR(dst);
+  dst_ptr = MPZ_REALLOC (dst, abs_size);
  
    MPN_DIVREM_OR_DIVEXACT_1 (dst_ptr, PTR(src), abs_size, (mp_limb_t) divisor);
    abs_size -= (dst_ptr[abs_size-1] == 0);
diff --git a/mpz/divegcd.c b/mpz/divegcd.c

index e4bf431526056b61130d41039767602f13bd40f0..1069fc66625d67bb9df5bcd77675965490b73500 100644 (file)
--- a/mpz/divegcd.c
+++ b/mpz/divegcd.c
@@ -3,7 +3,7 @@
     THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
     BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
  
-Copyright 2000, 2005 Free Software Foundation, Inc.
+Copyright 2000, 2005, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -44,28 +44,53 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     implementation.  */
  
  
+#if GMP_NUMB_BITS % 2 == 0
  static void
  mpz_divexact_by3 (mpz_ptr q, mpz_srcptr a)
  {
    mp_size_t  size = SIZ(a);
-  if (size == 0)
-    {
-      SIZ(q) = 0;
-      return;
-    }
-  else
-    {
-      mp_size_t  abs_size = ABS(size);
-      mp_ptr     qp;
+  mp_size_t  abs_size = ABS(size);
+  mp_ptr     qp;
  
-      MPZ_REALLOC (q, abs_size);
+  qp = MPZ_REALLOC (q, abs_size);
  
-      qp = PTR(q);
-      mpn_divexact_by3 (qp, PTR(a), abs_size);
+  mpn_bdiv_dbm1 (qp, PTR(a), abs_size, GMP_NUMB_MASK / 3);
  
-      abs_size -= (qp[abs_size-1] == 0);
-      SIZ(q) = (size>0 ? abs_size : -abs_size);
-    }
+  abs_size -= (qp[abs_size-1] == 0);
+  SIZ(q) = (size>0 ? abs_size : -abs_size);
+}
+#endif
+
+#if GMP_NUMB_BITS % 4 == 0
+static void
+mpz_divexact_by5 (mpz_ptr q, mpz_srcptr a)
+{
+  mp_size_t  size = SIZ(a);
+  mp_size_t  abs_size = ABS(size);
+  mp_ptr     qp;
+
+  qp = MPZ_REALLOC (q, abs_size);
+
+  mpn_bdiv_dbm1 (qp, PTR(a), abs_size, GMP_NUMB_MASK / 5);
+
+  abs_size -= (qp[abs_size-1] == 0);
+  SIZ(q) = (size>0 ? abs_size : -abs_size);
+}
+#endif
+
+static void
+mpz_divexact_limb (mpz_ptr q, mpz_srcptr a, mp_limb_t d)
+{
+  mp_size_t  size = SIZ(a);
+  mp_size_t  abs_size = ABS(size);
+  mp_ptr     qp;
+
+  qp = MPZ_REALLOC (q, abs_size);
+
+  mpn_divexact_1 (qp, PTR(a), abs_size, d);
+
+  abs_size -= (qp[abs_size-1] == 0);
+  SIZ(q) = (size>0 ? abs_size : -abs_size);
  }
  
  void
@@ -73,37 +98,48 @@ mpz_divexact_gcd (mpz_ptr q, mpz_srcptr a, mpz_srcptr d)
  {
    ASSERT (mpz_sgn (d) > 0);
  
+  if (SIZ(a) == 0)
+    {
+      SIZ(q) = 0;
+      return;
+    }
+
    if (SIZ(d) == 1)
      {
        mp_limb_t  dl = PTR(d)[0];
        int        twos;
  
-      if (dl == 1)
-        {
-          if (q != a)
-            mpz_set (q, a);
-          return;
-        }
-      if (dl == 3)
-        {
-          mpz_divexact_by3 (q, a);
-          return;
-        }
-
-      count_trailing_zeros (twos, dl);
-      dl >>= twos;
+      if ((dl & 1) == 0)
+       {
+         count_trailing_zeros (twos, dl);
+         dl >>= twos;
+         mpz_tdiv_q_2exp (q, a, twos);
+         a = q;
+       }
  
        if (dl == 1)
-        {
-          mpz_tdiv_q_2exp (q, a, twos);
-          return;
-        }
+       {
+         if (q != a)
+           mpz_set (q, a);
+         return;
+       }
+#if GMP_NUMB_BITS % 2 == 0
        if (dl == 3)
-        {
-          mpz_tdiv_q_2exp (q, a, twos);
-          mpz_divexact_by3 (q, q);
-          return;
-        }
+       {
+         mpz_divexact_by3 (q, a);
+         return;
+       }
+#endif
+#if GMP_NUMB_BITS % 4 == 0
+      if (dl == 5)
+       {
+         mpz_divexact_by5 (q, a);
+         return;
+       }
+#endif
+
+      mpz_divexact_limb (q, a, dl);
+      return;
      }
  
    mpz_divexact (q, a, d);
diff --git a/mpz/divexact.c b/mpz/divexact.c

index 95ba31112f852be800c38b24c5833e93676fabc5..1e5c3ac3e2ae126bc60f7106eb7bc3f251fe8497 100644 (file)
--- a/mpz/divexact.c
+++ b/mpz/divexact.c
@@ -3,7 +3,7 @@
  Contributed to the GNU project by Niels Möller.
  
  Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2005,
-2006, 2007, 2009 Free Software Foundation, Inc.
+2006, 2007, 2009, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -47,9 +47,6 @@ mpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
    nn = ABSIZ (num);
    dn = ABSIZ (den);
  
-  qn = nn - dn + 1;
-  MPZ_REALLOC (quot, qn);
-
    if (nn < dn)
      {
        /* This special case avoids segfaults below when the function is
@@ -59,12 +56,14 @@ mpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
        return;
      }
  
-  TMP_MARK;
+  qn = nn - dn + 1;
  
-  qp = PTR(quot);
+  TMP_MARK;
  
    if (quot == num || quot == den)
      qp = TMP_ALLOC_LIMBS (qn);
+  else
+    qp = MPZ_REALLOC (quot, qn);
  
    np = PTR(num);
    dp = PTR(den);
@@ -72,10 +71,10 @@ mpz_divexact (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
    mpn_divexact (qp, np, nn, dp, dn);
    MPN_NORMALIZE (qp, qn);
  
-  SIZ(quot) = (SIZ(num) ^ SIZ(den)) >= 0 ? qn : -qn;
-
    if (qp != PTR(quot))
-    MPN_COPY (PTR(quot), qp, qn);
+    MPN_COPY (MPZ_REALLOC (quot, qn), qp, qn);
+
+  SIZ(quot) = (SIZ(num) ^ SIZ(den)) >= 0 ? qn : -qn;
  
    TMP_FREE;
  }
diff --git a/mpz/divis_ui.c b/mpz/divis_ui.c

index 69dc21cd801892d521333fa6b02520d170a58242..00d0d1f327ffed034aa2a400b4446f34f8c730db 100644 (file)
--- a/mpz/divis_ui.c
+++ b/mpz/divis_ui.c
@@ -57,10 +57,10 @@ mpz_divisible_ui_p (mpz_srcptr a, unsigned long d)
    if (! (d & 1))
      {
        /* Strip low zero bits to get odd d required by modexact.  If d==e*2^n
-         and a is divisible by 2^n and by e, then it's divisible by d. */
+        and a is divisible by 2^n and by e, then it's divisible by d. */
  
        if ((ap[0] & LOW_ZEROS_MASK (d)) != 0)
-        return 0;
+       return 0;
  
        count_trailing_zeros (twos, (mp_limb_t) d);
        d >>= twos;
diff --git a/mpz/export.c b/mpz/export.c

index 484b9d91bb5e21b557a9cf41bb00200124fb9d42..6173fb6ca6249f03a884c89524e5b6a994abeab9 100644 (file)
--- a/mpz/export.c
+++ b/mpz/export.c
@@ -1,6 +1,6 @@
  /* mpz_export -- create word data from mpz.
  
-Copyright 2002, 2003 Free Software Foundation, Inc.
+Copyright 2002, 2003, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -34,22 +34,9 @@ static const mp_limb_t  endian_test = (CNST_LIMB(1) << (GMP_LIMB_BITS-7)) - 1;
  #define HOST_ENDIAN     (* (signed char *) &endian_test)
  #endif
  
-
-#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp)                \
-  do {                                                                  \
-    int            __cnt;                                               \
-    unsigned long  __totbits;                                           \
-    ASSERT ((size) > 0);                                                \
-    ASSERT ((ptr)[(size)-1] != 0);                                      \
-    count_leading_zeros (__cnt, (ptr)[(size)-1]);                       \
-    __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);       \
-    (result) = (__totbits + (base2exp)-1) / (base2exp);                 \
-  } while (0)
-
-
  void *
  mpz_export (void *data, size_t *countp, int order,
-            size_t size, int endian, size_t nail, mpz_srcptr z)
+           size_t size, int endian, size_t nail, mpz_srcptr z)
  {
    mp_size_t      zsize;
    mp_srcptr      zp;
@@ -60,7 +47,7 @@ mpz_export (void *data, size_t *countp, int order,
    ASSERT (order == 1 || order == -1);
    ASSERT (endian == 1 || endian == 0 || endian == -1);
    ASSERT (nail <= 8*size);
-  ASSERT (8*size-nail > 0);
+  ASSERT (nail <  8*size || SIZ(z) == 0); /* nail < 8*size+(SIZ(z)==0) */
  
    if (countp == NULL)
      countp = &dummy;
@@ -89,29 +76,29 @@ mpz_export (void *data, size_t *countp, int order,
    if (nail == GMP_NAIL_BITS)
      {
        if (size == sizeof (mp_limb_t) && align == 0)
-        {
-          if (order == -1 && endian == HOST_ENDIAN)
-            {
-              MPN_COPY ((mp_ptr) data, zp, (mp_size_t) count);
-              return data;
-            }
-          if (order == 1 && endian == HOST_ENDIAN)
-            {
-              MPN_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
-              return data;
-            }
-
-          if (order == -1 && endian == -HOST_ENDIAN)
-            {
-              MPN_BSWAP ((mp_ptr) data, zp, (mp_size_t) count);
-              return data;
-            }
-          if (order == 1 && endian == -HOST_ENDIAN)
-            {
-              MPN_BSWAP_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
-              return data;
-            }
-        }
+       {
+         if (order == -1 && endian == HOST_ENDIAN)
+           {
+             MPN_COPY ((mp_ptr) data, zp, (mp_size_t) count);
+             return data;
+           }
+         if (order == 1 && endian == HOST_ENDIAN)
+           {
+             MPN_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
+             return data;
+           }
+
+         if (order == -1 && endian == -HOST_ENDIAN)
+           {
+             MPN_BSWAP ((mp_ptr) data, zp, (mp_size_t) count);
+             return data;
+           }
+         if (order == 1 && endian == -HOST_ENDIAN)
+           {
+             MPN_BSWAP_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
+             return data;
+           }
+       }
      }
  
    {
@@ -162,31 +149,31 @@ mpz_export (void *data, size_t *countp, int order,
      limb = 0;
      for (i = 0; i < count; i++)
        {
-        for (j = 0; j < wbytes; j++)
-          {
-            EXTRACT (8, + 0);
-            dp -= endian;
-          }
-        if (wbits != 0)
-          {
-            EXTRACT (wbits, & wbitsmask);
-            dp -= endian;
-            j++;
-          }
-        for ( ; j < size; j++)
-          {
-            *dp = '\0';
-            dp -= endian;
-          }
-        dp += woffset;
+       for (j = 0; j < wbytes; j++)
+         {
+           EXTRACT (8, + 0);
+           dp -= endian;
+         }
+       if (wbits != 0)
+         {
+           EXTRACT (wbits, & wbitsmask);
+           dp -= endian;
+           j++;
+         }
+       for ( ; j < size; j++)
+         {
+           *dp = '\0';
+           dp -= endian;
+         }
+       dp += woffset;
        }
  
      ASSERT (zp == PTR(z) + ABSIZ(z));
  
      /* low byte of word after most significant */
      ASSERT (dp == (unsigned char *) data
-            + (order < 0 ? count*size : - (mp_size_t) size)
-            + (endian >= 0 ? (mp_size_t) size - 1 : 0));
+           + (order < 0 ? count*size : - (mp_size_t) size)
+           + (endian >= 0 ? (mp_size_t) size - 1 : 0));
    }
    return data;
  }
diff --git a/mpz/fac_ui.c b/mpz/fac_ui.c

index 7e394fcb1859c232492cb503b32162cf064ca558..bb7a9c440a1a0e5efecf01fb978fa2c68635d3e1 100644 (file)
--- a/mpz/fac_ui.c
+++ b/mpz/fac_ui.c
@@ -1,7 +1,9 @@
-/* mpz_fac_ui(result, n) -- Set RESULT to N!.
+/* mpz_fac_ui(RESULT, N) -- Set RESULT to N!.
  
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011, 2012
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -20,377 +22,77 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "gmp.h"
  #include "gmp-impl.h"
-#include "longlong.h"
-
-#include "fac_ui.h"
-
-
-static void odd_product __GMP_PROTO ((unsigned long, unsigned long, mpz_t *));
-static void ap_product_small __GMP_PROTO ((mpz_t, mp_limb_t, mp_limb_t, unsigned long, unsigned long));
-
  
-/* must be >=2 */
-#define APCONST        5
-
-/* for single non-zero limb */
-#define MPZ_SET_1_NZ(z,n)      \
-  do {                         \
-    mpz_ptr  __z = (z);                \
-    ASSERT ((n) != 0);         \
-    PTR(__z)[0] = (n);         \
-    SIZ(__z) = 1;              \
-  } while (0)
-
-/* for src>0 and n>0 */
-#define MPZ_MUL_1_POS(dst,src,n)                       \
-  do {                                                 \
-    mpz_ptr    __dst = (dst);                          \
-    mpz_srcptr __src = (src);                          \
-    mp_size_t  __size = SIZ(__src);                    \
-    mp_ptr     __dst_p;                                        \
-    mp_limb_t  __c;                                    \
-                                                       \
-    ASSERT (__size > 0);                               \
-    ASSERT ((n) != 0);                                 \
-                                                       \
-    MPZ_REALLOC (__dst, __size+1);                     \
-    __dst_p = PTR(__dst);                              \
-                                                       \
-    __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n);  \
-    __dst_p[__size] = __c;                             \
-    SIZ(__dst) = __size + (__c != 0);                  \
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)               \
+  do {                                                         \
+    if ((PR) > (MAX_PR)) {                                     \
+      (VEC)[(I)++] = (PR);                                     \
+      (PR) = (P);                                              \
+    } else                                                     \
+      (PR) *= (P);                                             \
    } while (0)
  
-
-#if BITS_PER_ULONG == GMP_LIMB_BITS
-#define BSWAP_ULONG(x,y)       BSWAP_LIMB(x,y)
+#if TUNE_PROGRAM_BUILD
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
+#else
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_ODD_THRESHOLD)+1))
  #endif
  
-/* We used to have a case here for limb==2*long, doing a BSWAP_LIMB followed
-   by a shift down to get the high part.  But it provoked incorrect code
-   from "HP aC++/ANSI C B3910B A.05.52 [Sep 05 2003]" in ILP32 mode.  This
-   case would have been nice for gcc ia64 where BSWAP_LIMB is a mux1, but we
-   can get that directly muxing a 4-byte ulong if it matters enough.  */
-
-#if ! defined (BSWAP_ULONG)
-#define BSWAP_ULONG(dst, src)                                          \
-  do {                                                                 \
-    unsigned long  __bswapl_src = (src);                               \
-    unsigned long  __bswapl_dst = 0;                                   \
-    int               __i;                                                     \
-    for (__i = 0; __i < sizeof(unsigned long); __i++)                  \
-      {                                                                        \
-       __bswapl_dst = (__bswapl_dst << 8) | (__bswapl_src & 0xFF);     \
-       __bswapl_src >>= 8;                                             \
-      }                                                                        \
-    (dst) = __bswapl_dst;                                              \
-  } while (0)
-#endif
-
-/* x is bit reverse of y */
-/* Note the divides below are all exact */
-#define BITREV_ULONG(x,y)                                                 \
-  do {                                                                    \
-   unsigned long __dst;                                                           \
-   BSWAP_ULONG(__dst,y);                                                  \
-   __dst = ((__dst>>4)&(ULONG_MAX/17)) | ((__dst<<4)&((ULONG_MAX/17)*16)); \
-   __dst = ((__dst>>2)&(ULONG_MAX/5) ) | ((__dst<<2)&((ULONG_MAX/5)*4)  ); \
-   __dst = ((__dst>>1)&(ULONG_MAX/3) ) | ((__dst<<1)&((ULONG_MAX/3)*2)  ); \
-   (x) = __dst;                                                                   \
-  } while(0)
-/* above could be improved if cpu has a nibble/bit swap/muxing instruction */
-/* above code is serialized, possible to write as a big parallel expression */
-
-
-
+/* Computes n!, the factorial of n.
+   WARNING: it assumes that n fits in a limb!
+ */
  void
  mpz_fac_ui (mpz_ptr x, unsigned long n)
  {
-  unsigned long z, stt;
-  int i, j;
-  mpz_t t1, st[8 * sizeof (unsigned long) + 1 - APCONST];
-  mp_limb_t d[4];
-
    static const mp_limb_t table[] = { ONE_LIMB_FACTORIAL_TABLE };
  
-  if (n < numberof (table))
-    {
-      MPZ_SET_1_NZ (x, table[n]);
-      return;
-    }
-
-  /*  NOTE : MUST have n>=3 here */
-  ASSERT (n >= 3);
-  /* for estimating the alloc sizes the calculation of these formula's is not
-     exact and also the formulas are only approximations, also we ignore
-     the few "side" calculations, correct allocation seems to speed up the
-     small sizes better, having very little effect on the large sizes */
-
-  /* estimate space for stack entries see below
-     number of bits for n! is
-     (1+log_2(2*pi)/2)-n*log_2(exp(1))+(n+1/2)*log_2(n)=
-     2.325748065-n*1.442695041+(n+0.5)*log_2(n)  */
-  umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) FAC2OVERE);
-  /* d[1] is 2n/e, d[0] ignored        */
-  count_leading_zeros (z, d[1]);
-  z = GMP_LIMB_BITS - z - 1;   /* z=floor(log_2(2n/e))   */
-  umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) z);
-  /* d=n*floor(log_2(2n/e))   */
-  d[0] = (d[0] >> 2) | (d[1] << (GMP_LIMB_BITS - 2));
-  d[1] >>= 2;
-  /* d=n*floor(log_2(2n/e))/4   */
-  z = d[0] + 1;                        /* have to ignore any overflow */
-  /* so z is the number of bits wanted for st[0]    */
+  ASSERT (n <= GMP_NUMB_MAX);
  
-
-  if (n <= ((unsigned long) 1) << (APCONST))
-    {
-      mpz_realloc2 (x, 4 * z);
-      ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n - 1, 4L);
-      return;
-    }
-  if (n <= ((unsigned long) 1) << (APCONST + 1))
-    {                          /*  use n!=odd(1,n)*(n/2)!*2^(n/2)         */
-      mpz_init2 (t1, 2 * z);
-      mpz_realloc2 (x, 4 * z);
-      ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n / 2 - 1, 4L);
-      ap_product_small (t1, CNST_LIMB(3), CNST_LIMB(2), (n - 1) / 2, 4L);
-      mpz_mul (x, x, t1);
-      mpz_clear (t1);
-      mpz_mul_2exp (x, x, n / 2);
-      return;
-    }
-  if (n <= ((unsigned long) 1) << (APCONST + 2))
+  if (n < numberof (table))
      {
-      /* use n!=C_2(1,n/2)^2*C_2(n/2,n)*(n/4)!*2^(n/2+n/4) all int divs
-        so need (BITS_IN_N-APCONST+1)=(APCONST+3-APCONST+1)=4 stack entries */
-      mpz_init2 (t1, 2 * z);
-      mpz_realloc2 (x, 4 * z);
-      for (i = 0; i < 4; i++)
-       {
-         mpz_init2 (st[i], z);
-         z >>= 1;
-       }
-      odd_product (1, n / 2, st);
-      mpz_set (x, st[0]);
-      odd_product (n / 2, n, st);
-      mpz_mul (x, x, x);
-      ASSERT (n / 4 <= FACMUL4 + 6);
-      ap_product_small (t1, CNST_LIMB(2), CNST_LIMB(1), n / 4 - 1, 4L);
-      /* must have 2^APCONST odd numbers max */
-      mpz_mul (t1, t1, st[0]);
-      for (i = 0; i < 4; i++)
-       mpz_clear (st[i]);
-      mpz_mul (x, x, t1);
-      mpz_clear (t1);
-      mpz_mul_2exp (x, x, n / 2 + n / 4);
-      return;
+      PTR (x)[0] = table[n];
+      SIZ (x) = 1;
      }
-
-  count_leading_zeros (stt, (mp_limb_t) n);
-  stt = GMP_LIMB_BITS - stt + 1 - APCONST;
-
-  for (i = 0; i < (signed long) stt; i++)
+  else if (BELOW_THRESHOLD (n, FAC_ODD_THRESHOLD))
      {
-      mpz_init2 (st[i], z);
-      z >>= 1;
-    }
-
-  count_leading_zeros (z, (mp_limb_t) (n / 3));
-  /* find z st 2^z>n/3 range for z is 1 <= z <= 8 * sizeof(unsigned long)-1 */
-  z = GMP_LIMB_BITS - z;
-
-  /*
-     n! = 2^e * PRODUCT_{i=0}^{i=z-1} C_2( n/2^{i+1}, n/2^i )^{i+1}
-     where 2^e || n!   3.2^z>n   C_2(a,b)=PRODUCT of odd z such that a<z<=b
-   */
+      mp_limb_t prod, max_prod;
+      mp_size_t j;
+      mp_ptr    factors;
+      TMP_SDECL;
+
+      TMP_SMARK;
+      factors = TMP_SALLOC_LIMBS (2 + (n - numberof (table)) / FACTORS_PER_LIMB);
+
+      factors[0] = table[numberof (table)-1];
+      j = 1;
+      prod = n;
+#if TUNE_PROGRAM_BUILD
+      max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
+#else
+      max_prod = GMP_NUMB_MAX / (FAC_ODD_THRESHOLD | 1);
+#endif
+      while (--n >= numberof (table))
+       FACTOR_LIST_STORE (n, prod, max_prod, factors, j);
  
+      factors[j++] = prod;
+      mpz_prodlimbs (x, factors, j);
  
-  mpz_init_set_ui (t1, 1);
-  for (j = 8 * sizeof (unsigned long) / 2; j != 0; j >>= 1)
-    {
-      MPZ_SET_1_NZ (x, 1);
-      for (i = 8 * sizeof (unsigned long) - j; i >= j; i -= 2 * j)
-       if ((signed long) z >= i)
-         {
-           odd_product (n >> i, n >> (i - 1), st);
-           /* largest odd product when j=i=1 then we have
-              odd_product(n/2,n,st) which is approx (2n/e)^(n/4)
-              so log_base2(largest oddproduct)=n*log_base2(2n/e)/4
-              number of bits is n*log_base2(2n/e)/4+1  */
-           if (i != j)
-             mpz_pow_ui (st[0], st[0], i / j);
-           mpz_mul (x, x, st[0]);
-         }
-      if ((signed long) z >= j && j != 1)
-       {
-         mpz_mul (t1, t1, x);
-         mpz_mul (t1, t1, t1);
-       }
+      TMP_SFREE;
      }
-  for (i = 0; i < (signed long) stt; i++)
-    mpz_clear (st[i]);
-  mpz_mul (x, x, t1);
-  mpz_clear (t1);
-  popc_limb (i, (mp_limb_t) n);
-  mpz_mul_2exp (x, x, n - i);
-  return;
-}
-
-/* start,step are mp_limb_t although they will fit in unsigned long    */
-static void
-ap_product_small (mpz_t ret, mp_limb_t start, mp_limb_t step,
-                 unsigned long count, unsigned long nm)
-{
-  unsigned long a;
-  mp_limb_t b;
-
-  ASSERT (count <= (((unsigned long) 1) << APCONST));
-/* count can never be zero ? check this and remove test below */
-  if (count == 0)
+  else
      {
-      MPZ_SET_1_NZ (ret, 1);
-      return;
-    }
-  if (count == 1)
-    {
-      MPZ_SET_1_NZ (ret, start);
-      return;
-    }
-  switch (nm)
-    {
-    case 1:
-      MPZ_SET_1_NZ (ret, start);
-      b = start + step;
-      for (a = 0; a < count - 1; b += step, a++)
-       MPZ_MUL_1_POS (ret, ret, b);
-      return;
-    case 2:
-      MPZ_SET_1_NZ (ret, start * (start + step));
-      if (count == 2)
-       return;
-      for (b = start + 2 * step, a = count / 2 - 1; a != 0;
-          a--, b += 2 * step)
-       MPZ_MUL_1_POS (ret, ret, b * (b + step));
-      if (count % 2 == 1)
-       MPZ_MUL_1_POS (ret, ret, b);
-      return;
-    case 3:
-      if (count == 2)
-       {
-         MPZ_SET_1_NZ (ret, start * (start + step));
-         return;
-       }
-      MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
-      if (count == 3)
-       return;
-      for (b = start + 3 * step, a = count / 3 - 1; a != 0;
-          a--, b += 3 * step)
-       MPZ_MUL_1_POS (ret, ret, b * (b + step) * (b + 2 * step));
-      if (count % 3 == 2)
-       b = b * (b + step);
-      if (count % 3 != 0)
-       MPZ_MUL_1_POS (ret, ret, b);
-      return;
-    default:                   /* ie nm=4      */
-      if (count == 2)
-       {
-         MPZ_SET_1_NZ (ret, start * (start + step));
-         return;
-       }
-      if (count == 3)
+      mp_limb_t count;
+      mpz_oddfac_1 (x, n, 0);
+      if (n <= TABLE_LIMIT_2N_MINUS_POPC_2N)
+       count = __gmp_fac2cnt_table[n / 2 - 1];
+      else
         {
-         MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
-         return;
+         popc_limb (count, n);
+         count = n - count;
         }
-      MPZ_SET_1_NZ (ret,
-                   start * (start + step) * (start + 2 * step) * (start +
-                                                                  3 * step));
-      if (count == 4)
-       return;
-      for (b = start + 4 * step, a = count / 4 - 1; a != 0;
-          a--, b += 4 * step)
-       MPZ_MUL_1_POS (ret, ret,
-                      b * (b + step) * (b + 2 * step) * (b + 3 * step));
-      if (count % 4 == 2)
-       b = b * (b + step);
-      if (count % 4 == 3)
-       b = b * (b + step) * (b + 2 * step);
-      if (count % 4 != 0)
-       MPZ_MUL_1_POS (ret, ret, b);
-      return;
+      mpz_mul_2exp (x, x, count);
      }
  }
  
-/* return value in st[0]
-   odd_product(l,h)=sqrt((h/e)^h/(l/e)^l) using Stirling approx and e=exp(1)
-   so st[0] needs enough bits for above, st[1] needs half these bits and
-   st[2] needs 1/4 of these bits etc   */
-static void
-odd_product (unsigned long low, unsigned long high, mpz_t * st)
-{
-  unsigned long stc = 1, stn = 0, n, y, mask, a, nm = 1;
-  signed long z;
-
-  low++;
-  if (low % 2 == 0)
-    low++;
-  if (high == 0)
-    high = 1;
-  if (high % 2 == 0)
-    high--;
-/* must have high>=low ? check this and remove test below */
-  if (high < low)
-    {
-      MPZ_SET_1_NZ (st[0], 1);
-      return;
-    }
-  if (high == low)
-    {
-      MPZ_SET_1_NZ (st[0], low);
-      return;
-    }
-  if (high <= FACMUL2 + 2)
-    {
-      nm = 2;
-      if (high <= FACMUL3 + 4)
-       {
-         nm = 3;
-         if (high <= FACMUL4 + 6)
-           nm = 4;
-       }
-    }
-  high = (high - low) / 2 + 1; /* high is now count,high<=2^(BITS_PER_ULONG-1) */
-  if (high <= (((unsigned long) 1) << APCONST))
-    {
-      ap_product_small (st[0], (mp_limb_t) low, CNST_LIMB(2), high, nm);
-      return;
-    }
-  count_leading_zeros (n, (mp_limb_t) high);
-/* assumes clz above is LIMB based not NUMB based */
-  n = GMP_LIMB_BITS - n - APCONST;
-  mask = (((unsigned long) 1) << n);
-  a = mask << 1;
-  mask--;
-/* have 2^(BITS_IN_N-APCONST) iterations so need
-   (BITS_IN_N-APCONST+1) stack entries */
-  for (z = mask; z >= 0; z--)
-    {
-      BITREV_ULONG (y, z);
-      y >>= (BITS_PER_ULONG - n);
-      ap_product_small (st[stn],
-                       (mp_limb_t) (low + 2 * ((~y) & mask)), (mp_limb_t) a,
-                       (high + y) >> n, nm);
-      ASSERT (((high + y) >> n) <= (((unsigned long) 1) << APCONST));
-      stn++;
-      y = stc++;
-      while ((y & 1) == 0)
-       {
-         mpz_mul (st[stn - 2], st[stn - 2], st[stn - 1]);
-         stn--;
-         y >>= 1;
-       }
-    }
-  ASSERT (stn == 1);
-  return;
-}
+#undef FACTORS_PER_LIMB
+#undef FACTOR_LIST_STORE
diff --git a/mpz/fdiv_q.c b/mpz/fdiv_q.c

index 6b4c2c4d6391553409729bbc1bd611b08a527330..6281b1ee6918b5f836a9de138912bf21c6d205f6 100644 (file)
--- a/mpz/fdiv_q.c
+++ b/mpz/fdiv_q.c
@@ -1,7 +1,8 @@
  /* mpz_fdiv_q -- Division rounding the quotient towards -infinity.
     The remainder gets the same sign as the denominator.
  
-Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_fdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
  {
-  mp_size_t dividend_size = dividend->_mp_size;
-  mp_size_t divisor_size = divisor->_mp_size;
+  mp_size_t dividend_size = SIZ (dividend);
+  mp_size_t divisor_size = SIZ (divisor);
    mpz_t rem;
    TMP_DECL;
  
@@ -35,7 +36,7 @@ mpz_fdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
  
    mpz_tdiv_qr (quot, rem, dividend, divisor);
  
-  if ((divisor_size ^ dividend_size) < 0 && rem->_mp_size != 0)
+  if ((divisor_size ^ dividend_size) < 0 && SIZ (rem) != 0)
      mpz_sub_ui (quot, quot, 1L);
  
    TMP_FREE;
diff --git a/mpz/fdiv_q_ui.c b/mpz/fdiv_q_ui.c

index 9554185cccb9a31f822177f7eb554d186c506a8a..41fa8ab684291df2cc4dbf1eee4f24bed5c0ffe8 100644 (file)
--- a/mpz/fdiv_q_ui.c
+++ b/mpz/fdiv_q_ui.c
@@ -1,8 +1,8 @@
  /* mpz_fdiv_q_ui -- Division rounding the quotient towards -infinity.
     The remainder gets the same sign as the denominator.
  
-Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,7 +29,7 @@ mpz_fdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
    mp_ptr np, qp;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
@@ -40,8 +40,7 @@ mpz_fdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
      }
  
    nn = ABS(ns);
-  MPZ_REALLOC (quot, nn);
-  qp = PTR(quot);
+  qp = MPZ_REALLOC (quot, nn);
    np = PTR(dividend);
  
  #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
diff --git a/mpz/fdiv_qr.c b/mpz/fdiv_qr.c

index 0230db1725d32757674045f29209e9dff33be618..d48654776135ad9e28234f5ba1b5b6f1ca284dc1 100644 (file)
--- a/mpz/fdiv_qr.c
+++ b/mpz/fdiv_qr.c
@@ -1,7 +1,8 @@
  /* mpz_fdiv_qr -- Division rounding the quotient towards -infinity.
     The remainder gets the same sign as the denominator.
  
-Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,7 +25,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
  {
-  mp_size_t divisor_size = divisor->_mp_size;
+  mp_size_t divisor_size = SIZ (divisor);
    mp_size_t xsize;
    mpz_t temp_divisor;          /* N.B.: lives until function returns! */
    TMP_DECL;
@@ -41,10 +42,10 @@ mpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
        divisor = temp_divisor;
      }
  
-  xsize = dividend->_mp_size ^ divisor_size;;
+  xsize = SIZ (dividend) ^ divisor_size;;
    mpz_tdiv_qr (quot, rem, dividend, divisor);
  
-  if (xsize < 0 && rem->_mp_size != 0)
+  if (xsize < 0 && SIZ (rem) != 0)
      {
        mpz_sub_ui (quot, quot, 1L);
        mpz_add (rem, rem, divisor);
diff --git a/mpz/fdiv_qr_ui.c b/mpz/fdiv_qr_ui.c

index 7c41fc842168e1ecdb7746dd95975a2233511062..a4e84eb40980d9bfa991d6bd85c5a629281ccd60 100644 (file)
--- a/mpz/fdiv_qr_ui.c
+++ b/mpz/fdiv_qr_ui.c
@@ -1,7 +1,7 @@
  /* mpz_fdiv_qr_ui -- Division rounding the quotient towards -infinity.
     The remainder gets the same sign as the denominator.
  
-Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,
+Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004, 2012 Free Software Foundation,
  Inc.
  
  This file is part of the GNU MP Library.
@@ -29,7 +29,7 @@ mpz_fdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long in
    mp_ptr np, qp;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
@@ -41,8 +41,7 @@ mpz_fdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long in
      }
  
    nn = ABS(ns);
-  MPZ_REALLOC (quot, nn);
-  qp = PTR(quot);
+  qp = MPZ_REALLOC (quot, nn);
    np = PTR(dividend);
  
  #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
diff --git a/mpz/fdiv_r.c b/mpz/fdiv_r.c

index 56bcf4cb0badfc3de69b59d1e30ac2a01f034223..706e49ec1aca308bbf9f09cb63de268ad35c671b 100644 (file)
--- a/mpz/fdiv_r.c
+++ b/mpz/fdiv_r.c
@@ -1,7 +1,7 @@
  /* mpz_fdiv_r -- Division rounding the quotient towards -infinity.
     The remainder gets the same sign as the denominator.
  
-Copyright 1994, 1995, 1996, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2001, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,7 +24,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_fdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
  {
-  mp_size_t divisor_size = divisor->_mp_size;
+  mp_size_t divisor_size = SIZ (divisor);
    mpz_t temp_divisor;          /* N.B.: lives until function returns! */
    TMP_DECL;
  
@@ -42,7 +42,7 @@ mpz_fdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
  
    mpz_tdiv_r (rem, dividend, divisor);
  
-  if ((divisor_size ^ dividend->_mp_size) < 0 && rem->_mp_size != 0)
+  if ((divisor_size ^ SIZ (dividend)) < 0 && SIZ (rem) != 0)
      mpz_add (rem, rem, divisor);
  
    TMP_FREE;
diff --git a/mpz/fdiv_r_ui.c b/mpz/fdiv_r_ui.c

index d16e43225896665da118828632fe6d3412929204..3d3a00e6225bb535cca64411b79c4146f5918217 100644 (file)
--- a/mpz/fdiv_r_ui.c
+++ b/mpz/fdiv_r_ui.c
@@ -1,7 +1,7 @@
  /* mpz_fdiv_r_ui -- Division rounding the quotient towards -infinity.
     The remainder gets the same sign as the denominator.
  
-Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005, 2012 Free Software Foundation,
  Inc.
  
  This file is part of the GNU MP Library.
@@ -29,7 +29,7 @@ mpz_fdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
    mp_ptr np;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
@@ -49,8 +49,7 @@ mpz_fdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
        mp_size_t rn;
        TMP_DECL;
  
-      MPZ_REALLOC (rem, 2);
-      rp = PTR(rem);
+      rp = MPZ_REALLOC (rem, 2);
  
        if (nn == 1)             /* tdiv_qr requirements; tested above for 0 */
         {
diff --git a/mpz/fdiv_ui.c b/mpz/fdiv_ui.c

index 566b6e72505f431e990bf97221fa645bf97926bd..c4f4ec962a26a020122116aa8709c6cf40d9556e 100644 (file)
--- a/mpz/fdiv_ui.c
+++ b/mpz/fdiv_ui.c
@@ -1,8 +1,8 @@
  /* mpz_fdiv_ui -- Division rounding the quotient towards -infinity.
     The remainder gets the same sign as the denominator.
  
-Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,7 +29,7 @@ mpz_fdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
    mp_ptr np;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
diff --git a/mpz/fib2_ui.c b/mpz/fib2_ui.c

index 8521136804d073699a2a3290d29851d15120175f..1bd61d16e31d2cc588fe50b911e88d7df41219f1 100644 (file)
--- a/mpz/fib2_ui.c
+++ b/mpz/fib2_ui.c
@@ -1,6 +1,6 @@
  /* mpz_fib2_ui -- calculate Fibonacci numbers.
  
-Copyright 2001 Free Software Foundation, Inc.
+Copyright 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,10 +29,8 @@ mpz_fib2_ui (mpz_ptr fn, mpz_ptr fnsub1, unsigned long n)
    mp_size_t  size;
  
    size = MPN_FIB2_SIZE (n);
-  MPZ_REALLOC (fn,     size);
-  MPZ_REALLOC (fnsub1, size);
-  fp = PTR (fn);
-  f1p = PTR (fnsub1);
+  fp =  MPZ_REALLOC (fn,     size);
+  f1p = MPZ_REALLOC (fnsub1, size);
  
    size = mpn_fib2_ui (fp, f1p, n);
  
diff --git a/mpz/fib_ui.c b/mpz/fib_ui.c

index 8c13a8f053c42264041f72ee2447cb4577cff041..daa77bbd9f4368e414ab0ba0fe8f4f31107fc6df 100644 (file)
--- a/mpz/fib_ui.c
+++ b/mpz/fib_ui.c
@@ -1,6 +1,6 @@
  /* mpz_fib_ui -- calculate Fibonacci numbers.
  
-Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -61,17 +61,16 @@ mpz_fib_ui (mpz_ptr fn, unsigned long n)
  
    n2 = n/2;
    xalloc = MPN_FIB2_SIZE (n2) + 1;
-  MPZ_REALLOC (fn, 2*xalloc+1);
-  fp = PTR (fn);
+  fp = MPZ_REALLOC (fn, 2*xalloc+1);
  
    TMP_MARK;
    TMP_ALLOC_LIMBS_2 (xp,xalloc, yp,xalloc);
    size = mpn_fib2_ui (xp, yp, n2);
  
    TRACE (printf ("mpz_fib_ui last step n=%lu size=%ld bit=%lu\n",
-                 n >> 1, size, n&1);
-         mpn_trace ("xp", xp, size);
-         mpn_trace ("yp", yp, size));
+                n >> 1, size, n&1);
+        mpn_trace ("xp", xp, size);
+        mpn_trace ("yp", yp, size));
  
    if (n & 1)
      {
@@ -104,16 +103,16 @@ mpz_fib_ui (mpz_ptr fn, unsigned long n)
        fp[0] += (n & 2 ? -CNST_LIMB(2) : CNST_LIMB(2));
  #else
        if (n & 2)
-        {
-          ASSERT (fp[0] >= 2);
-          fp[0] -= 2;
-        }
+       {
+         ASSERT (fp[0] >= 2);
+         fp[0] -= 2;
+       }
        else
-        {
-          ASSERT (c != GMP_NUMB_MAX); /* because it's the high of a mul */
-          c += mpn_add_1 (fp, fp, size-1, CNST_LIMB(2));
-          fp[size-1] = c;
-        }
+       {
+         ASSERT (c != GMP_NUMB_MAX); /* because it's the high of a mul */
+         c += mpn_add_1 (fp, fp, size-1, CNST_LIMB(2));
+         fp[size-1] = c;
+       }
  #endif
      }
    else
@@ -136,7 +135,7 @@ mpz_fib_ui (mpz_ptr fn, unsigned long n)
    SIZ(fn) = size;
  
    TRACE (printf ("done special, size=%ld\n", size);
-         mpn_trace ("fp ", fp, size));
+        mpn_trace ("fp ", fp, size));
  
    TMP_FREE;
  }
diff --git a/mpz/fits_sint.c b/mpz/fits_sint.c

index 6730b6c11f5e8bc68a276a220416480a7c8ff4dc..3ec3ef6ff172d2867d9f5982fa30e0bce8f7a780 100644 (file)
--- a/mpz/fits_sint.c
+++ b/mpz/fits_sint.c
@@ -1,4 +1,4 @@
-/* int mpz_fits_sint_p (mpz_t z) -- test whether z fits a int.
+/* int mpz_fits_sint_p (mpz_t z) -- test whether z fits an int.
  
  Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
  
diff --git a/mpz/gcd.c b/mpz/gcd.c

index 18787699d16146fb3d9ecfecc2d504aaf5a6ebe9..688b7397d43de20f92b3b28b69978d8841f633d9 100644 (file)
--- a/mpz/gcd.c
+++ b/mpz/gcd.c
@@ -1,7 +1,7 @@
  /* mpz/gcd.c:   Calculate the greatest common divisor of two integers.
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2010 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,63 +21,58 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "longlong.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
  
  
  void
-#ifndef BERKELEY_MP
  mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v)
-#else /* BERKELEY_MP */
-gcd (mpz_srcptr u, mpz_srcptr v, mpz_ptr g)
-#endif /* BERKELEY_MP */
  {
    unsigned long int g_zero_bits, u_zero_bits, v_zero_bits;
    mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;
    mp_ptr tp;
-  mp_ptr up = u->_mp_d;
-  mp_size_t usize = ABS (u->_mp_size);
-  mp_ptr vp = v->_mp_d;
-  mp_size_t vsize = ABS (v->_mp_size);
+  mp_ptr up;
+  mp_size_t usize;
+  mp_ptr vp;
+  mp_size_t vsize;
    mp_size_t gsize;
    TMP_DECL;
  
+  up = PTR(u);
+  usize = ABSIZ (u);
+  vp = PTR(v);
+  vsize = ABSIZ (v);
    /* GCD(0, V) == V.  */
    if (usize == 0)
      {
-      g->_mp_size = vsize;
+      SIZ (g) = vsize;
        if (g == v)
         return;
-      if (g->_mp_alloc < vsize)
-       _mpz_realloc (g, vsize);
-      MPN_COPY (g->_mp_d, vp, vsize);
+      MPZ_REALLOC (g, vsize);
+      MPN_COPY (PTR (g), vp, vsize);
        return;
      }
  
    /* GCD(U, 0) == U.  */
    if (vsize == 0)
      {
-      g->_mp_size = usize;
+      SIZ (g) = usize;
        if (g == u)
         return;
-      if (g->_mp_alloc < usize)
-       _mpz_realloc (g, usize);
-      MPN_COPY (g->_mp_d, up, usize);
+      MPZ_REALLOC (g, usize);
+      MPN_COPY (PTR (g), up, usize);
        return;
      }
  
    if (usize == 1)
      {
-      g->_mp_size = 1;
-      g->_mp_d[0] = mpn_gcd_1 (vp, vsize, up[0]);
+      SIZ (g) = 1;
+      PTR (g)[0] = mpn_gcd_1 (vp, vsize, up[0]);
        return;
      }
  
    if (vsize == 1)
      {
-      g->_mp_size = 1;
-      g->_mp_d[0] = mpn_gcd_1 (up, usize, vp[0]);
+      SIZ(g) = 1;
+      PTR (g)[0] = mpn_gcd_1 (up, usize, vp[0]);
        return;
      }
  
@@ -86,7 +81,7 @@ gcd (mpz_srcptr u, mpz_srcptr v, mpz_ptr g)
    /*  Eliminate low zero bits from U and V and move to temporary storage.  */
    while (*up == 0)
      up++;
-  u_zero_limbs = up - u->_mp_d;
+  u_zero_limbs = up - PTR(u);
    usize -= u_zero_limbs;
    count_trailing_zeros (u_zero_bits, *up);
    tp = up;
@@ -101,7 +96,7 @@ gcd (mpz_srcptr u, mpz_srcptr v, mpz_ptr g)
  
    while (*vp == 0)
      vp++;
-  v_zero_limbs = vp - v->_mp_d;
+  v_zero_limbs = vp - PTR (v);
    vsize -= v_zero_limbs;
    count_trailing_zeros (v_zero_bits, *vp);
    tp = vp;
@@ -141,23 +136,21 @@ gcd (mpz_srcptr u, mpz_srcptr v, mpz_ptr g)
      {
        mp_limb_t cy_limb;
        gsize += (vp[vsize - 1] >> (GMP_NUMB_BITS - g_zero_bits)) != 0;
-      if (g->_mp_alloc < gsize)
-       _mpz_realloc (g, gsize);
-      MPN_ZERO (g->_mp_d, g_zero_limbs);
+      MPZ_REALLOC (g, gsize);
+      MPN_ZERO (PTR (g), g_zero_limbs);
  
-      tp = g->_mp_d + g_zero_limbs;
+      tp = PTR(g) + g_zero_limbs;
        cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);
        if (cy_limb != 0)
         tp[vsize] = cy_limb;
      }
    else
      {
-      if (g->_mp_alloc < gsize)
-       _mpz_realloc (g, gsize);
-      MPN_ZERO (g->_mp_d, g_zero_limbs);
-      MPN_COPY (g->_mp_d + g_zero_limbs, vp, vsize);
+      MPZ_REALLOC (g, gsize);
+      MPN_ZERO (PTR (g), g_zero_limbs);
+      MPN_COPY (PTR (g) + g_zero_limbs, vp, vsize);
      }
  
-  g->_mp_size = gsize;
+  SIZ (g) = gsize;
    TMP_FREE;
  }
diff --git a/mpz/gcdext.c b/mpz/gcdext.c

index 2419e2fe9585c59a48505dce27ef03c7b45bc75d..e284fbd7cd0f5f46ebbd31a62760c291698781f1 100644 (file)
--- a/mpz/gcdext.c
+++ b/mpz/gcdext.c
@@ -1,8 +1,8 @@
  /* mpz_gcdext(g, s, t, a, b) -- Set G to gcd(a, b), and S and T such that
     g = as + bt.
  
-Copyright 1991, 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2005, 2011,
+2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,99 +26,86 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_gcdext (mpz_ptr g, mpz_ptr s, mpz_ptr t, mpz_srcptr a, mpz_srcptr b)
  {
-  mp_size_t asize, bsize, usize, vsize;
-  mp_srcptr ap, bp;
-  mp_ptr up, vp;
+  mp_size_t asize, bsize;
+  mp_ptr tmp_ap, tmp_bp;
    mp_size_t gsize, ssize, tmp_ssize;
-  mp_ptr gp, sp, tmp_gp, tmp_sp;
-  mpz_srcptr u, v;
-  mpz_ptr ss, tt;
-  __mpz_struct stmp, gtmp;
+  mp_ptr gp, tmp_gp, tmp_sp;
    TMP_DECL;
  
-  TMP_MARK;
+  /* mpn_gcdext requires that Usize >= Vsize.  Therefore, we often
+     have to swap U and V.  The computed cofactor will be the
+     "smallest" one, which is faster to produce.  The wanted one will
+     be computed here; this is needed anyway when both are requested.  */
  
-  /* mpn_gcdext requires that U >= V.  Therefore, we often have to swap U and
-     V.  This in turn leads to a lot of complications.  The computed cofactor
-     will be the wrong one, so we have to fix that up at the end.  */
+  asize = ABSIZ (a);
+  bsize = ABSIZ (b);
  
-  asize = ABS (SIZ (a));
-  bsize = ABS (SIZ (b));
-  ap = PTR (a);
-  bp = PTR (b);
-  if (asize > bsize || (asize == bsize && mpn_cmp (ap, bp, asize) > 0))
+  if (asize < bsize)
      {
-      usize = asize;
-      vsize = bsize;
-      up = TMP_ALLOC_LIMBS (usize + 1);
-      vp = TMP_ALLOC_LIMBS (vsize + 1);
-      MPN_COPY (up, ap, usize);
-      MPN_COPY (vp, bp, vsize);
-      u = a;
-      v = b;
-      ss = s;
-      tt = t;
+      MPZ_SRCPTR_SWAP (a, b);
+      MP_SIZE_T_SWAP (asize, bsize);
+      MPZ_PTR_SWAP (s, t);
      }
-  else
+
+  if (bsize == 0)
      {
-      usize = bsize;
-      vsize = asize;
-      up = TMP_ALLOC_LIMBS (usize + 1);
-      vp = TMP_ALLOC_LIMBS (vsize + 1);
-      MPN_COPY (up, bp, usize);
-      MPN_COPY (vp, ap, vsize);
-      u = b;
-      v = a;
-      ss = t;
-      tt = s;
-    }
+      /* g = |a|, s = sgn(a), t = 0. */
+      ssize = SIZ (a) >= 0 ? (asize != 0) : -1;
  
-  tmp_gp = TMP_ALLOC_LIMBS (usize + 1);
-  tmp_sp = TMP_ALLOC_LIMBS (usize + 1);
+      gp = MPZ_REALLOC (g, asize);
+      MPN_COPY (gp, PTR (a), asize);
+      SIZ (g) = asize;
  
-  if (vsize == 0)
-    {
-      tmp_sp[0] = 1;
-      tmp_ssize = 1;
-      MPN_COPY (tmp_gp, up, usize);
-      gsize = usize;
+      if (t != NULL)
+       SIZ (t) = 0;
+      if (s != NULL)
+       {
+         SIZ (s) = ssize;
+         PTR (s)[0] = 1;
+       }
+      return;
      }
-  else
-    gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, up, usize, vp, vsize);
-  ssize = ABS (tmp_ssize);
  
-  PTR (&gtmp) = tmp_gp;
-  SIZ (&gtmp) = gsize;
+  TMP_MARK;
+
+  TMP_ALLOC_LIMBS_2 (tmp_ap, asize, tmp_bp, bsize);
+  MPN_COPY (tmp_ap, PTR (a), asize);
+  MPN_COPY (tmp_bp, PTR (b), bsize);
  
-  PTR (&stmp) = tmp_sp;
-  SIZ (&stmp) = (tmp_ssize ^ SIZ (u)) >= 0 ? ssize : -ssize;
+  TMP_ALLOC_LIMBS_2 (tmp_gp, bsize, tmp_sp, bsize + 1);
  
-  if (tt != NULL)
+  gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, tmp_ap, asize, tmp_bp, bsize);
+
+  ssize = ABS (tmp_ssize);
+  tmp_ssize = SIZ (a) >= 0 ? tmp_ssize : -tmp_ssize;
+
+  if (t != NULL)
      {
-      if (SIZ (v) == 0)
-       SIZ (tt) = 0;
-      else
-       {
-         mpz_t x;
-         MPZ_TMP_INIT (x, ssize + usize + 1);
-         mpz_mul (x, &stmp, u);
-         mpz_sub (x, &gtmp, x);
-         mpz_tdiv_q (tt, x, v);
-       }
+      mpz_t x;
+      __mpz_struct gtmp, stmp;
+
+      PTR (&gtmp) = tmp_gp;
+      SIZ (&gtmp) = gsize;
+
+      PTR (&stmp) = tmp_sp;
+      SIZ (&stmp) = tmp_ssize;
+
+      MPZ_TMP_INIT (x, ssize + asize + 1);
+      mpz_mul (x, &stmp, a);
+      mpz_sub (x, &gtmp, x);
+      mpz_divexact (t, x, b);
      }
  
-  if (ss != NULL)
+  if (s != NULL)
      {
-      if (ALLOC (ss) < ssize)
-       _mpz_realloc (ss, ssize);
-      sp = PTR (ss);
+      mp_ptr sp;
+
+      sp = MPZ_REALLOC (s, ssize);
        MPN_COPY (sp, tmp_sp, ssize);
-      SIZ (ss) = SIZ (&stmp);
+      SIZ (s) = tmp_ssize;
      }
  
-  if (ALLOC (g) < gsize)
-    _mpz_realloc (g, gsize);
-  gp = PTR (g);
+  gp = MPZ_REALLOC (g, gsize);
    MPN_COPY (gp, tmp_gp, gsize);
    SIZ (g) = gsize;
  
diff --git a/mpz/get_d_2exp.c b/mpz/get_d_2exp.c

index c3cf60c9d629de3e9ffeb1ea15318c8a91a4bdf5..b2e2e51ce48fdec15dc351091851ee6622e2cf5b 100644 (file)
--- a/mpz/get_d_2exp.c
+++ b/mpz/get_d_2exp.c
@@ -1,6 +1,6 @@
  /* double mpz_get_d_2exp (signed long int *exp, mpz_t src).
  
-Copyright 2001, 2003, 2004 Free Software Foundation, Inc.
+Copyright 2001, 2003, 2004, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,7 +26,6 @@ mpz_get_d_2exp (signed long int *exp2, mpz_srcptr src)
  {
    mp_size_t size, abs_size;
    mp_srcptr ptr;
-  int cnt;
    long exp;
  
    size = SIZ(src);
@@ -38,8 +37,7 @@ mpz_get_d_2exp (signed long int *exp2, mpz_srcptr src)
  
    ptr = PTR(src);
    abs_size = ABS(size);
-  count_leading_zeros (cnt, ptr[abs_size - 1]);
-  exp = abs_size * GMP_NUMB_BITS - (cnt - GMP_NAIL_BITS);
+  MPN_SIZEINBASE_2EXP(exp, ptr, abs_size, 1);
    *exp2 = exp;
    return mpn_get_d (ptr, abs_size, size, -exp);
  }
diff --git a/mpz/get_si.c b/mpz/get_si.c

index 2f8a4738cdf0e38cb2410739dbadf6e0cfc90ffd..f609d483391b415fc251ed4d28e650c22991e2d7 100644 (file)
--- a/mpz/get_si.c
+++ b/mpz/get_si.c
@@ -1,6 +1,6 @@
  /* mpz_get_si(integer) -- Return the least significant digit from INTEGER.
  
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2006 Free Software
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2006, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -24,8 +24,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  signed long int
  mpz_get_si (mpz_srcptr z) __GMP_NOTHROW
  {
-  mp_ptr zp = z->_mp_d;
-  mp_size_t size = z->_mp_size;
+  mp_ptr zp = PTR (z);
+  mp_size_t size = SIZ (z);
    mp_limb_t zl = zp[0];
  
  #if GMP_NAIL_BITS != 0
diff --git a/mpz/get_str.c b/mpz/get_str.c

index cce519365c1cf973de7ea3d2162ea0e3dade0c34..70ad589cee4bbf4f884b88e7f30ea12a18a4eb96 100644 (file)
--- a/mpz/get_str.c
+++ b/mpz/get_str.c
@@ -4,7 +4,7 @@
     result.  If STRING is not NULL, the caller must ensure enough space is
     available to store the result.
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005 Free Software
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -31,19 +31,18 @@ char *
  mpz_get_str (char *res_str, int base, mpz_srcptr x)
  {
    mp_ptr xp;
-  mp_size_t x_size = x->_mp_size;
-  char *str;
+  mp_size_t x_size = SIZ (x);
    char *return_str;
    size_t str_size;
    size_t alloc_size = 0;
-  char *num_to_text;
+  const char *num_to_text;
    int i;
    TMP_DECL;
  
    if (base >= 0)
      {
        num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
-      if (base == 0)
+      if (base <= 1)
         base = 10;
        else if (base > 36)
         {
@@ -55,6 +54,10 @@ mpz_get_str (char *res_str, int base, mpz_srcptr x)
    else
      {
        base = -base;
+      if (base <= 1)
+       base = 10;
+      else if (base > 36)
+       return NULL;
        num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
      }
  
@@ -76,29 +79,19 @@ mpz_get_str (char *res_str, int base, mpz_srcptr x)
  
    /* mpn_get_str clobbers its input on non power-of-2 bases */
    TMP_MARK;
-  xp = x->_mp_d;
+  xp = PTR (x);
    if (! POW2_P (base))
      {
-      xp = TMP_ALLOC_LIMBS (x_size + 1);  /* +1 in case x_size==0 */
-      MPN_COPY (xp, x->_mp_d, x_size);
+      xp = TMP_ALLOC_LIMBS (x_size | 1);  /* |1 in case x_size==0 */
+      MPN_COPY (xp, PTR (x), x_size);
      }
  
    str_size = mpn_get_str ((unsigned char *) res_str, base, xp, x_size);
    ASSERT (alloc_size == 0 || str_size <= alloc_size - (SIZ(x) < 0));
  
-  /* might have a leading zero, skip it */
-  str = res_str;
-  if (*res_str == 0 && str_size != 1)
-    {
-      str_size--;
-      str++;
-      ASSERT (*str != 0);  /* at most one leading zero */
-    }
-
-  /* Convert result to printable chars, and move down if there was a leading
-     zero.  */
+  /* Convert result to printable chars.  */
    for (i = 0; i < str_size; i++)
-    res_str[i] = num_to_text[(int) str[i]];
+    res_str[i] = num_to_text[(int) res_str[i]];
    res_str[str_size] = 0;
  
    TMP_FREE;
@@ -109,7 +102,7 @@ mpz_get_str (char *res_str, int base, mpz_srcptr x)
        size_t  actual_size = str_size + 1 + (res_str - return_str);
        ASSERT (actual_size == strlen (return_str) + 1);
        __GMP_REALLOCATE_FUNC_MAYBE_TYPE (return_str, alloc_size, actual_size,
-                                        char);
+                                       char);
      }
    return return_str;
  }
diff --git a/mpz/import.c b/mpz/import.c

index 17e3d5875979a99077608b772c05ba0062b53939..1585d67e188e95b77b3823e97c6b4a554007f940 100644 (file)
--- a/mpz/import.c
+++ b/mpz/import.c
@@ -1,6 +1,6 @@
  /* mpz_import -- set mpz from word data.
  
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -37,7 +37,7 @@ static const mp_limb_t  endian_test = (CNST_LIMB(1) << (GMP_LIMB_BITS-7)) - 1;
  
  void
  mpz_import (mpz_ptr z, size_t count, int order,
-            size_t size, int endian, size_t nail, const void *data)
+           size_t size, int endian, size_t nail, const void *data)
  {
    mp_size_t  zsize;
    mp_ptr     zp;
@@ -47,8 +47,7 @@ mpz_import (mpz_ptr z, size_t count, int order,
    ASSERT (nail <= 8*size);
  
    zsize = (count * (8*size - nail) + GMP_NUMB_BITS-1) / GMP_NUMB_BITS;
-  MPZ_REALLOC (z, zsize);
-  zp = PTR(z);
+  zp = MPZ_REALLOC (z, zsize);
  
    if (endian == 0)
      endian = HOST_ENDIAN;
@@ -60,31 +59,31 @@ mpz_import (mpz_ptr z, size_t count, int order,
        unsigned  align = ((char *) data - (char *) NULL) % sizeof (mp_limb_t);
  
        if (order == -1
-          && size == sizeof (mp_limb_t)
-          && endian == HOST_ENDIAN
-          && align == 0)
-        {
-          MPN_COPY (zp, (mp_srcptr) data, (mp_size_t) count);
-          goto done;
-        }
+         && size == sizeof (mp_limb_t)
+         && endian == HOST_ENDIAN
+         && align == 0)
+       {
+         MPN_COPY (zp, (mp_srcptr) data, (mp_size_t) count);
+         goto done;
+       }
  
        if (order == -1
-          && size == sizeof (mp_limb_t)
-          && endian == - HOST_ENDIAN
-          && align == 0)
-        {
-          MPN_BSWAP (zp, (mp_srcptr) data, (mp_size_t) count);
-          goto done;
-        }
+         && size == sizeof (mp_limb_t)
+         && endian == - HOST_ENDIAN
+         && align == 0)
+       {
+         MPN_BSWAP (zp, (mp_srcptr) data, (mp_size_t) count);
+         goto done;
+       }
  
        if (order == 1
-          && size == sizeof (mp_limb_t)
-          && endian == HOST_ENDIAN
-          && align == 0)
-        {
-          MPN_REVERSE (zp, (mp_srcptr) data, (mp_size_t) count);
-          goto done;
-        }
+         && size == sizeof (mp_limb_t)
+         && endian == HOST_ENDIAN
+         && align == 0)
+       {
+         MPN_REVERSE (zp, (mp_srcptr) data, (mp_size_t) count);
+         goto done;
+       }
      }
  
    {
@@ -132,34 +131,34 @@ mpz_import (mpz_ptr z, size_t count, int order,
      lbits = 0;
      for (i = 0; i < count; i++)
        {
-        for (j = 0; j < wbytes; j++)
-          {
-            byte = *dp;
-            dp -= endian;
-            ACCUMULATE (8);
-          }
-        if (wbits != 0)
-          {
-            byte = *dp & wbitsmask;
-            dp -= endian;
-            ACCUMULATE (wbits);
-          }
-        dp += woffset;
+       for (j = 0; j < wbytes; j++)
+         {
+           byte = *dp;
+           dp -= endian;
+           ACCUMULATE (8);
+         }
+       if (wbits != 0)
+         {
+           byte = *dp & wbitsmask;
+           dp -= endian;
+           ACCUMULATE (wbits);
+         }
+       dp += woffset;
        }
  
      if (lbits != 0)
        {
-        ASSERT (lbits <= GMP_NUMB_BITS);
-        ASSERT_LIMB (limb);
-        *zp++ = limb;
+       ASSERT (lbits <= GMP_NUMB_BITS);
+       ASSERT_LIMB (limb);
+       *zp++ = limb;
        }
  
      ASSERT (zp == PTR(z) + zsize);
  
      /* low byte of word after most significant */
      ASSERT (dp == (unsigned char *) data
-            + (order < 0 ? count*size : - (mp_size_t) size)
-            + (endian >= 0 ? (mp_size_t) size - 1 : 0));
+           + (order < 0 ? count*size : - (mp_size_t) size)
+           + (endian >= 0 ? (mp_size_t) size - 1 : 0));
  
    }
  
diff --git a/mpz/init.c b/mpz/init.c

index 5fc0ed9d8e2e262ffd261732a8738762802cb2e5..1ad55b4c69bc881d986b19bae5ac2c7b097c72b7 100644 (file)
--- a/mpz/init.c
+++ b/mpz/init.c
@@ -1,7 +1,7 @@
  /* mpz_init() -- Make a new multiple precision number with value 0.
  
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,12 +24,12 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_init (mpz_ptr x)
  {
-  x->_mp_alloc = 1;
-  x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
-  x->_mp_size = 0;
+  ALLOC (x) = 1;
+  PTR (x) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  SIZ (x) = 0;
  
  #ifdef __CHECKER__
    /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
-  x->_mp_d[0] = 0;
+  PTR (x) = 0;
  #endif
  }
diff --git a/mpz/inp_raw.c b/mpz/inp_raw.c

index 497207e60a41a99355059f8516479fce41d46f69..0da0c61c0fcada6177ee516a55cab83c2e647d80 100644 (file)
--- a/mpz/inp_raw.c
+++ b/mpz/inp_raw.c
@@ -1,6 +1,6 @@
  /* mpz_inp_raw -- read an mpz_t in raw format.
  
-Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -86,75 +86,74 @@ mpz_inp_raw (mpz_ptr x, FILE *fp)
  
    if (abs_xsize != 0)
      {
-      MPZ_REALLOC (x, abs_xsize);
-      xp = PTR(x);
+      xp = MPZ_REALLOC (x, abs_xsize);
  
        /* Get limb boundaries right in the read, for the benefit of the
-         non-nails case.  */
+        non-nails case.  */
        xp[0] = 0;
        cp = (char *) (xp + abs_xsize) - abs_csize;
        if (fread (cp, abs_csize, 1, fp) != 1)
-        return 0;
+       return 0;
  
        if (GMP_NAIL_BITS == 0)
-        {
-          /* Reverse limbs to least significant first, and byte swap.  If
-             abs_xsize is odd then on the last iteration elimb and slimb are
-             the same.  It doesn't seem extra code to handle that case
-             separately, to save an NTOH.  */
-          sp = xp;
-          ep = xp + abs_xsize-1;
-          for (i = 0; i < (abs_xsize+1)/2; i++)
-            {
-              NTOH_LIMB_FETCH (elimb, ep);
-              NTOH_LIMB_FETCH (slimb, sp);
-              *sp++ = elimb;
-              *ep-- = slimb;
-            }
-        }
+       {
+         /* Reverse limbs to least significant first, and byte swap.  If
+            abs_xsize is odd then on the last iteration elimb and slimb are
+            the same.  It doesn't seem extra code to handle that case
+            separately, to save an NTOH.  */
+         sp = xp;
+         ep = xp + abs_xsize-1;
+         for (i = 0; i < (abs_xsize+1)/2; i++)
+           {
+             NTOH_LIMB_FETCH (elimb, ep);
+             NTOH_LIMB_FETCH (slimb, sp);
+             *sp++ = elimb;
+             *ep-- = slimb;
+           }
+       }
        else
-        {
-          /* It ought to be possible to do the transformation in-place, but
-             for now it's easier to use an extra temporary area.  */
-          mp_limb_t  byte, limb;
-          int        bits;
-          mp_size_t  tpos;
-          mp_ptr     tp;
-          TMP_DECL;
-
-          TMP_MARK;
-          tp = TMP_ALLOC_LIMBS (abs_xsize);
-          limb = 0;
-          bits = 0;
-          tpos = 0;
-          for (i = abs_csize-1; i >= 0; i--)
-            {
-              byte = (unsigned char) cp[i];
-              limb |= (byte << bits);
-              bits += 8;
-              if (bits >= GMP_NUMB_BITS)
-                {
-                  ASSERT (tpos < abs_xsize);
-                  tp[tpos++] = limb & GMP_NUMB_MASK;
-                  bits -= GMP_NUMB_BITS;
-                  ASSERT (bits < 8);
-                  limb = byte >> (8 - bits);
-                }
-            }
-          if (bits != 0)
-            {
-              ASSERT (tpos < abs_xsize);
-              tp[tpos++] = limb;
-            }
-          ASSERT (tpos == abs_xsize);
-
-          MPN_COPY (xp, tp, abs_xsize);
-          TMP_FREE;
-        }
+       {
+         /* It ought to be possible to do the transformation in-place, but
+            for now it's easier to use an extra temporary area.  */
+         mp_limb_t  byte, limb;
+         int        bits;
+         mp_size_t  tpos;
+         mp_ptr     tp;
+         TMP_DECL;
+
+         TMP_MARK;
+         tp = TMP_ALLOC_LIMBS (abs_xsize);
+         limb = 0;
+         bits = 0;
+         tpos = 0;
+         for (i = abs_csize-1; i >= 0; i--)
+           {
+             byte = (unsigned char) cp[i];
+             limb |= (byte << bits);
+             bits += 8;
+             if (bits >= GMP_NUMB_BITS)
+               {
+                 ASSERT (tpos < abs_xsize);
+                 tp[tpos++] = limb & GMP_NUMB_MASK;
+                 bits -= GMP_NUMB_BITS;
+                 ASSERT (bits < 8);
+                 limb = byte >> (8 - bits);
+               }
+           }
+         if (bits != 0)
+           {
+             ASSERT (tpos < abs_xsize);
+             tp[tpos++] = limb;
+           }
+         ASSERT (tpos == abs_xsize);
+
+         MPN_COPY (xp, tp, abs_xsize);
+         TMP_FREE;
+       }
  
        /* GMP 1.x mpz_out_raw wrote high zero bytes, strip any high zero
-         limbs resulting from this.  Should be a non-zero value here, but
-         for safety don't assume that. */
+        limbs resulting from this.  Should be a non-zero value here, but
+        for safety don't assume that. */
        MPN_NORMALIZE (xp, abs_xsize);
      }
  
diff --git a/mpz/inp_str.c b/mpz/inp_str.c

index 05c8cde9088069adaf1b07ca7e0f59d733055cf4..4f5e46edaa6bdc41be2de43d0868d8c48fa98d69 100644 (file)
--- a/mpz/inp_str.c
+++ b/mpz/inp_str.c
@@ -5,8 +5,8 @@
     REST ARE INTERNALS AND ARE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE
     CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES.
  
-Copyright 1991, 1993, 1994, 1996, 1998, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1998, 2000, 2001, 2002, 2003, 2011, 2012
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -27,8 +27,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include <ctype.h>
  #include "gmp.h"
  #include "gmp-impl.h"
+#include "longlong.h"
  
-extern const unsigned char __gmp_digit_value_tab[];
  #define digit_value_tab __gmp_digit_value_tab
  
  size_t
@@ -147,17 +147,16 @@ mpz_inp_str_nowhite (mpz_ptr x, FILE *stream, int base, int c, size_t nread)
    /* Make sure the string is not empty, mpn_set_str would fail.  */
    if (str_size == 0)
      {
-      x->_mp_size = 0;
+      SIZ (x) = 0;
      }
    else
      {
-      xsize = 2 + (mp_size_t)
-       (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+      LIMBS_PER_DIGIT_IN_BASE (xsize, str_size, base);
        MPZ_REALLOC (x, xsize);
  
        /* Convert the byte array in base BASE to our bignum format.  */
-      xsize = mpn_set_str (x->_mp_d, (unsigned char *) str, str_size, base);
-      x->_mp_size = negative ? -xsize : xsize;
+      xsize = mpn_set_str (PTR (x), (unsigned char *) str, str_size, base);
+      SIZ (x) = negative ? -xsize : xsize;
      }
    (*__gmp_free_func) (str, alloc_size);
    return nread;
diff --git a/mpz/invert.c b/mpz/invert.c

index 009a03c42d9a6dacc930daa152f38330a1d4b8b2..18bdff8f8f7b740a2163c474e5721665db7e0460 100644 (file)
--- a/mpz/invert.c
+++ b/mpz/invert.c
@@ -2,8 +2,8 @@
     If X has an inverse, return non-zero and store inverse in INVERSE,
     otherwise, return 0 and put garbage in INVERSE.
  
-Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2005 Free Software Foundation,
-Inc.
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2005, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -30,17 +30,15 @@ mpz_invert (mpz_ptr inverse, mpz_srcptr x, mpz_srcptr n)
    mp_size_t xsize, nsize, size;
    TMP_DECL;
  
-  xsize = SIZ (x);
-  nsize = SIZ (n);
-  xsize = ABS (xsize);
-  nsize = ABS (nsize);
-  size = MAX (xsize, nsize) + 1;
+  xsize = ABSIZ (x);
+  nsize = ABSIZ (n);
  
    /* No inverse exists if the leftside operand is 0.  Likewise, no
       inverse exists if the mod operand is 1.  */
    if (xsize == 0 || (nsize == 1 && (PTR (n))[0] == 1))
      return 0;
  
+  size = MAX (xsize, nsize) + 1;
    TMP_MARK;
  
    MPZ_TMP_INIT (gcd, size);
@@ -48,7 +46,7 @@ mpz_invert (mpz_ptr inverse, mpz_srcptr x, mpz_srcptr n)
    mpz_gcdext (gcd, tmp, (mpz_ptr) 0, x, n);
  
    /* If no inverse existed, return with an indication of that.  */
-  if (SIZ (gcd) != 1 || PTR(gcd)[0] != 1)
+  if (!MPZ_EQUAL_1_P (gcd))
      {
        TMP_FREE;
        return 0;
diff --git a/mpz/ior.c b/mpz/ior.c

index 26362c94289d500204d40872fd5c11407cc6668f..3df0f23e2808e9cb3da9795677483b7620297735 100644 (file)
--- a/mpz/ior.c
+++ b/mpz/ior.c
@@ -1,7 +1,7 @@
  /* mpz_ior -- Logical inclusive or.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005, 2012, 2013 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -56,8 +56,8 @@ mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
               if (res_ptr != op1_ptr)
                 MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
                           op1_size - op2_size);
-             for (i = op2_size - 1; i >= 0; i--)
-               res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+             if (LIKELY (op2_size != 0))
+               mpn_ior_n (res_ptr, op1_ptr, op2_ptr, op2_size);
               res_size = op1_size;
             }
           else
@@ -73,8 +73,8 @@ mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
               if (res_ptr != op2_ptr)
                 MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
                           op2_size - op1_size);
-             for (i = op1_size - 1; i >= 0; i--)
-               res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+             if (LIKELY (op1_size != 0))
+               mpn_ior_n (res_ptr, op1_ptr, op2_ptr, op1_size);
               res_size = op2_size;
             }
  
@@ -90,7 +90,7 @@ mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
      {
        if (op2_size < 0)
         {
-         mp_ptr opx;
+         mp_ptr opx, opy;
           mp_limb_t cy;
  
           /* Both operands are negative, so will be the result.
@@ -105,20 +105,12 @@ mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
  
           /* Possible optimization: Decrease mpn_sub precision,
              as we won't use the entire res of both.  */
-         opx = TMP_ALLOC_LIMBS (res_size);
+         TMP_ALLOC_LIMBS_2 (opx, res_size, opy, res_size);
           mpn_sub_1 (opx, op1_ptr, res_size, (mp_limb_t) 1);
           op1_ptr = opx;
  
-         opx = TMP_ALLOC_LIMBS (res_size);
-         mpn_sub_1 (opx, op2_ptr, res_size, (mp_limb_t) 1);
-         op2_ptr = opx;
-
-         if (ALLOC(res) < res_size)
-           {
-             _mpz_realloc (res, res_size);
-             /* op1_ptr and op2_ptr point to temporary space.  */
-             res_ptr = PTR(res);
-           }
+         mpn_sub_1 (opy, op2_ptr, res_size, (mp_limb_t) 1);
+         op2_ptr = opy;
  
           /* First loop finds the size of the result.  */
           for (i = res_size - 1; i >= 0; i--)
@@ -128,9 +120,10 @@ mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
  
           if (res_size != 0)
             {
+             res_ptr = MPZ_REALLOC (res, res_size + 1);
+
               /* Second loop computes the real result.  */
-             for (i = res_size - 1; i >= 0; i--)
-               res_ptr[i] = op1_ptr[i] & op2_ptr[i];
+             mpn_and_n (res_ptr, op1_ptr, op2_ptr, res_size);
  
               cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
               if (cy)
@@ -153,8 +146,8 @@ mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
         {
           /* We should compute -OP1 | OP2.  Swap OP1 and OP2 and fall
              through to the code that handles OP1 | -OP2.  */
-          MPZ_SRCPTR_SWAP (op1, op2);
-          MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+         MPZ_SRCPTR_SWAP (op1, op2);
+         MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
         }
      }
  
@@ -210,8 +203,8 @@ mpz_ior (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
      if (res_size != 0)
        {
         /* Second loop computes the real result.  */
-       for (i = count - 1; i >= 0; i--)
-         res_ptr[i] = ~op1_ptr[i] & op2_ptr[i];
+       if (LIKELY (count != 0))
+         mpn_andn_n (res_ptr, op2_ptr, op1_ptr, count);
  
         cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
         if (cy)
diff --git a/mpz/iset.c b/mpz/iset.c

index 384ca797e70923ef07e7687351d55afc686ea2ed..9c8c93462427cbd52e899eab84bcd344c4406019 100644 (file)
--- a/mpz/iset.c
+++ b/mpz/iset.c
@@ -1,8 +1,8 @@
  /* mpz_init_set (src_integer) -- Make a new multiple precision number with
     a value copied from SRC_INTEGER.
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -28,17 +28,17 @@ mpz_init_set (mpz_ptr w, mpz_srcptr u)
    mp_ptr wp, up;
    mp_size_t usize, size;
  
-  usize = u->_mp_size;
+  usize = SIZ (u);
    size = ABS (usize);
  
-  w->_mp_alloc = MAX (size, 1);
-  w->_mp_d = (mp_ptr) (*__gmp_allocate_func) (w->_mp_alloc * BYTES_PER_MP_LIMB);
+  ALLOC (w) = MAX (size, 1);
+  PTR (w) = (mp_ptr) (*__gmp_allocate_func) (ALLOC (w) * BYTES_PER_MP_LIMB);
  
-  wp = w->_mp_d;
-  up = u->_mp_d;
+  wp = PTR (w);
+  up = PTR (u);
  
    MPN_COPY (wp, up, size);
-  w->_mp_size = usize;
+  SIZ (w) = usize;
  
  #ifdef __CHECKER__
    /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
diff --git a/mpz/iset_d.c b/mpz/iset_d.c

index 004b087e544201c7c28636f4244c428d4d9d15a1..19c245e36f09edebfdf29fd64afc81a26751723b 100644 (file)
--- a/mpz/iset_d.c
+++ b/mpz/iset_d.c
@@ -1,7 +1,7 @@
  /* mpz_init_set_d(integer, val) -- Initialize and assign INTEGER with a double
     value VAL.
  
-Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1996, 2000, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,8 +24,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_init_set_d (mpz_ptr dest, double val)
  {
-  dest->_mp_alloc = 1;
-  dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
-  dest->_mp_size = 0;
+  ALLOC (dest) = 1;
+  PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  SIZ (dest) = 0;
    mpz_set_d (dest, val);
  }
diff --git a/mpz/iset_si.c b/mpz/iset_si.c

index 64e51b386cb4945bd076607c0151adeb09b333ba..ab53e349d63080345442ff0db8c69d233d172526 100644 (file)
--- a/mpz/iset_si.c
+++ b/mpz/iset_si.c
@@ -1,8 +1,8 @@
  /* mpz_init_set_si(dest,val) -- Make a new multiple precision in DEST and
     assign VAL to the new number.
  
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -28,22 +28,22 @@ mpz_init_set_si (mpz_ptr dest, signed long int val)
    mp_size_t size;
    mp_limb_t vl;
  
-  dest->_mp_alloc = 1;
-  dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  ALLOC (dest) = 1;
+  PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
  
    vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
  
-  dest->_mp_d[0] = vl & GMP_NUMB_MASK;
+  PTR (dest)[0] = vl & GMP_NUMB_MASK;
    size = vl != 0;
  
  #if GMP_NAIL_BITS != 0
    if (vl > GMP_NUMB_MAX)
      {
        MPZ_REALLOC (dest, 2);
-      dest->_mp_d[1] = vl >> GMP_NUMB_BITS;
+      PTR (dest)[1] = vl >> GMP_NUMB_BITS;
        size = 2;
      }
  #endif
  
-  dest->_mp_size = val >= 0 ? size : -size;
+  SIZ (dest) = val >= 0 ? size : -size;
  }
diff --git a/mpz/iset_str.c b/mpz/iset_str.c

index 302126fdcf7e5736c9aa66ed744a951d9becf642..5a0ec7de0f1fda967da827c819d6bfdab52238e6 100644 (file)
--- a/mpz/iset_str.c
+++ b/mpz/iset_str.c
@@ -5,8 +5,8 @@
     i.e.  0xhh...h means base 16, 0oo...o means base 8, otherwise
     assume base 10.
  
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,15 +29,15 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  int
  mpz_init_set_str (mpz_ptr x, const char *str, int base)
  {
-  x->_mp_alloc = 1;
-  x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+  ALLOC (x) = 1;
+  PTR (x) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
  
    /* if str has no digits mpz_set_str leaves x->_mp_size unset */
-  x->_mp_size = 0;
+  SIZ (x) = 0;
  
  #ifdef __CHECKER__
    /* let the low limb look initialized, for the benefit of mpz_get_ui etc */
-  x->_mp_d[0] = 0;
+  PTR (x)[0] = 0;
  #endif
  
    return mpz_set_str (x, str, base);
diff --git a/mpz/iset_ui.c b/mpz/iset_ui.c

index 841d91f996d9252dfc4d209a80cbd0430518870a..3090f0825b1e0d39e587c6ab4df7a66045370386 100644 (file)
--- a/mpz/iset_ui.c
+++ b/mpz/iset_ui.c
@@ -1,7 +1,7 @@
  /* mpz_init_set_ui(dest,val) -- Make a new multiple precision in DEST and
     assign VAL to the new number.
  
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2004 Free Software
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2004, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -27,20 +27,23 @@ mpz_init_set_ui (mpz_ptr dest, unsigned long int val)
  {
    mp_size_t size;
  
-  dest->_mp_alloc = 1;
-  dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
-
-  dest->_mp_d[0] = val & GMP_NUMB_MASK;
-  size = val != 0;
-
  #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
    if (val > GMP_NUMB_MAX)
      {
-      MPZ_REALLOC (dest, 2);
-      dest->_mp_d[1] = val >> GMP_NUMB_BITS;
+      ALLOC (dest) = 2;
+      PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB*2);
+      PTR (dest)[1] = val >> GMP_NUMB_BITS;
        size = 2;
      }
+  else
  #endif
+    {
+      ALLOC (dest) = 1;
+      PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+
+      size = val != 0;
+    }
+  PTR (dest)[0] = val & GMP_NUMB_MASK;
  
-  dest->_mp_size = size;
+  SIZ (dest) = size;
  }
diff --git a/mpz/jacobi.c b/mpz/jacobi.c

index cab11f5fe04dcb4a833b294573ba3e71d59e273b..0a8fb29d54a8510f900671e82cbcd50b37afbc26 100644 (file)
--- a/mpz/jacobi.c
+++ b/mpz/jacobi.c
@@ -1,6 +1,7 @@
  /* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.
  
-Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2005, 2010, 2011, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,23 +24,10 @@ with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "longlong.h"
  
  
-/* Change this to "#define TRACE(x) x" for some traces. */
-#define TRACE(x)
-
-
-#define MPN_RSHIFT_OR_COPY(dst,src,size,shift)                  \
-  do {                                                          \
-    if ((shift) != 0)                                           \
-      {                                                         \
-        ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift));    \
-        (size) -= ((dst)[(size)-1] == 0);                       \
-      }                                                         \
-    else                                                        \
-      MPN_COPY (dst, src, size);                                \
-  } while (0)
-
-
-/* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker.
+/* This code does triple duty as mpz_jacobi, mpz_legendre and
+   mpz_kronecker. For ABI compatibility, the link symbol is
+   __gmpz_jacobi, not __gmpz_kronecker, even though the latter would
+   be more logical.
  
     mpz_jacobi could assume b is odd, but the improvements from that seem
     small compared to other operations, and anything significant should be
@@ -51,259 +39,163 @@ with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
     multiple of b), but the checking for that takes little time compared to
     other operations.
  
-   The main loop is just a simple binary GCD with the jacobi symbol result
-   tracked during the reduction.
-
-   The special cases for a or b fitting in one limb let mod_1 or modexact_1
-   get used, without any copying, and end up just as efficient as the mixed
-   precision mpz_kronecker_ui etc.
-
-   When tdiv_qr is called it's not necessary to make "a" odd or make a
-   working copy of it, but tdiv_qr is going to be pretty slow so it's not
-   worth bothering trying to save anything for that case.
-
     Enhancements:
  
     mpn_bdiv_qr should be used instead of mpn_tdiv_qr.
  
-   Some sort of multi-step algorithm should be used.  The current subtract
-   and shift for every bit is very inefficient.  Lehmer (per current gcdext)
-   would need some low bits included in its calculation to apply the sign
-   change for reciprocity.  Binary Lehmer keeps low bits to strip twos
-   anyway, so might be better suited.  Maybe the accelerated GCD style k-ary
-   reduction would work, if sign changes due to the extra factors it
-   introduces can be accounted for (or maybe they can be ignored).  */
-
+*/
  
  int
  mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
  {
    mp_srcptr  asrcp, bsrcp;
    mp_size_t  asize, bsize;
+  mp_limb_t  alow, blow;
    mp_ptr     ap, bp;
-  mp_limb_t  alow, blow, ahigh, bhigh, asecond, bsecond;
-  unsigned   atwos, btwos;
+  unsigned   btwos;
    int        result_bit1;
+  int        res;
    TMP_DECL;
  
-  TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b));
-         mpz_trace (" a", a);
-         mpz_trace (" b", b));
-
    asize = SIZ(a);
    asrcp = PTR(a);
    alow = asrcp[0];
  
    bsize = SIZ(b);
-  if (bsize == 0)
-    return JACOBI_LS0 (alow, asize);  /* (a/0) */
-
    bsrcp = PTR(b);
    blow = bsrcp[0];
  
+  /* The MPN jacobi functions require positive a and b, and b odd. So
+     we must to handle the cases of a or b zero, then signs, and then
+     the case of even b.
+  */
+
+  if (bsize == 0)
+    /* (a/0) = [ a = 1 or a = -1 ] */
+    return JACOBI_LS0 (alow, asize);
+
    if (asize == 0)
-    return JACOBI_0LS (blow, bsize);  /* (0/b) */
+    /* (0/b) = [ b = 1 or b = - 1 ] */
+    return JACOBI_0LS (blow, bsize);
  
-  /* (even/even)=0 */
-  if (((alow | blow) & 1) == 0)
+  if ( (((alow | blow) & 1) == 0))
+    /* Common factor of 2 ==> (a/b) = 0 */
      return 0;
  
-  /* account for effect of sign of b, then ignore it */
-  result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize);
-  bsize = ABS (bsize);
+  if (bsize < 0)
+    {
+      /* (a/-1) = -1 if a < 0, +1 if a >= 0 */
+      result_bit1 = (asize < 0) << 1;
+      bsize = -bsize;
+    }
+  else
+    result_bit1 = 0;
  
-  /* low zero limbs on b can be discarded */
    JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow);
  
    count_trailing_zeros (btwos, blow);
-  TRACE (printf ("b twos %u\n", btwos));
-
-  /* establish shifted blow */
    blow >>= btwos;
-  if (bsize > 1)
+
+  if (bsize > 1 && btwos > 0)
      {
-      bsecond = bsrcp[1];
-      if (btwos != 0)
-        blow |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
+      mp_limb_t b1 = bsrcp[1];
+      blow |= b1 << (GMP_NUMB_BITS - btwos);
+      if (bsize == 2 && (b1 >> btwos) == 0)
+       bsize = 1;
      }
  
-  /* account for effect of sign of a, then ignore it */
-  result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow);
-  asize = ABS (asize);
-
-  if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0))
+  if (asize < 0)
      {
-      /* special case one limb b, use modexact and no copying */
-
-      /* (a/2)=(2/a) with a odd, and if b is even then a is odd here */
-      result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
-
-      if (blow == 1)   /* (a/1)=1 always */
-        return JACOBI_BIT1_TO_PN (result_bit1);
-
-      JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
-      TRACE (printf ("base (%lu/%lu) with %d\n",
-                     alow, blow, JACOBI_BIT1_TO_PN (result_bit1)));
-      return mpn_jacobi_base (alow, blow, result_bit1);
+      /* (-1/b) = -1 iff b = 3 (mod 4) */
+      result_bit1 ^= JACOBI_N1B_BIT1(blow);
+      asize = -asize;
      }
  
-  /* Discard low zero limbs of a.  Usually there won't be anything to
-     strip, hence not bothering with it for the bsize==1 case.  */
    JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
  
-  count_trailing_zeros (atwos, alow);
-  TRACE (printf ("a twos %u\n", atwos));
-  result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow);
+  /* Ensure asize >= bsize. Take advantage of the generalized
+     reciprocity law (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */
  
-  /* establish shifted alow */
-  alow >>= atwos;
-  if (asize > 1)
-    {
-      asecond = asrcp[1];
-      if (atwos != 0)
-        alow |= (asecond << (GMP_NUMB_BITS - atwos)) & GMP_NUMB_MASK;
-    }
-
-  /* (a/2)=(2/a) with a odd */
-  result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
-
-  if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0))
+  if (asize < bsize)
      {
-      /* another special case with modexact and no copying */
-
-      if (alow == 1)  /* (1/b)=1 always */
-        return JACOBI_BIT1_TO_PN (result_bit1);
+      MPN_SRCPTR_SWAP (asrcp, asize, bsrcp, bsize);
+      MP_LIMB_T_SWAP (alow, blow);
  
-      /* b still has its twos, so cancel out their effect */
-      result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
+      /* NOTE: The value of alow (old blow) is a bit subtle. For this code
+        path, we get alow as the low, always odd, limb of shifted A. Which is
+        what we need for the reciprocity update below.
  
-      result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);  /* now (b/a) */
-      JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
-      TRACE (printf ("base (%lu/%lu) with %d\n",
-                     blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
-      return mpn_jacobi_base (blow, alow, result_bit1);
-    }
+        However, all other uses of alow assumes that it is *not*
+        shifted. Luckily, alow matters only when either
  
+        + btwos > 0, in which case A is always odd
  
-  TMP_MARK;
-  TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize);
+        + asize == bsize == 1, in which case this code path is never
+          taken. */
  
-  MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos);
-  ASSERT (alow == ap[0]);
-  TRACE (mpn_trace ("stripped a", ap, asize));
+      count_trailing_zeros (btwos, blow);
+      blow >>= btwos;
  
-  MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos);
-  ASSERT (blow == bp[0]);
-  TRACE (mpn_trace ("stripped b", bp, bsize));
+      if (bsize > 1 && btwos > 0)
+       {
+         mp_limb_t b1 = bsrcp[1];
+         blow |= b1 << (GMP_NUMB_BITS - btwos);
+         if (bsize == 2 && (b1 >> btwos) == 0)
+           bsize = 1;
+       }
  
-  /* swap if necessary to make a longer than b */
-  if (asize < bsize)
-    {
-      TRACE (printf ("swap\n"));
-      MPN_PTR_SWAP (ap,asize, bp,bsize);
-      MP_LIMB_T_SWAP (alow, blow);
        result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
      }
  
-  /* If a is bigger than b then reduce to a mod b.
-     Division is much faster than chipping away at "a" bit-by-bit. */
-  if (asize > bsize)
+  if (bsize == 1)
      {
-      mp_ptr  rp, qp;
+      result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
  
-      TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize));
+      if (blow == 1)
+       return JACOBI_BIT1_TO_PN (result_bit1);
  
-      TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1);
-      mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, bp, bsize);
-      ap = rp;
-      asize = bsize;
-      MPN_NORMALIZE (ap, asize);
+      if (asize > 1)
+       JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
  
-      TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize);
-             mpn_trace (" a", ap, asize);
-             mpn_trace (" b", bp, bsize));
+      return mpn_jacobi_base (alow, blow, result_bit1);
+    }
  
-      if (asize == 0)  /* (0/b)=0 for b!=1 */
-        goto zero;
+  /* Allocation strategy: For A, we allocate a working copy only for A % B, but
+     when A is much larger than B, we have to allocate space for the large
+     quotient. We use the same area, pointed to by bp, for both the quotient
+     A/B and the working copy of B. */
  
-      alow = ap[0];
-      goto strip_a;
-    }
+  TMP_MARK;
  
-  for (;;)
-    {
-      ASSERT (asize >= 1);         /* a,b non-empty */
-      ASSERT (bsize >= 1);
-      ASSERT (ap[asize-1] != 0);   /* a,b normalized (and hence non-zero) */
-      ASSERT (bp[bsize-1] != 0);
-      ASSERT (alow == ap[0]);      /* low limb copies should be correct */
-      ASSERT (blow == bp[0]);
-      ASSERT (alow & 1);           /* a,b odd */
-      ASSERT (blow & 1);
-
-      TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize);
-             mpn_trace (" a", ap, asize);
-             mpn_trace (" b", bp, bsize));
-
-      /* swap if necessary to make a>=b, applying reciprocity
-         high limbs are almost always enough to tell which is bigger */
-      if (asize < bsize
-          || (asize == bsize
-              && ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1])
-                  || (ahigh == bhigh
-                      && mpn_cmp (ap, bp, asize-1) < 0))))
-        {
-          TRACE (printf ("swap\n"));
-          MPN_PTR_SWAP (ap,asize, bp,bsize);
-          MP_LIMB_T_SWAP (alow, blow);
-          result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
-        }
-
-      if (asize == 1)
-        break;
-
-      /* a = a-b */
-      ASSERT (asize >= bsize);
-      ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize));
-      MPN_NORMALIZE (ap, asize);
-      alow = ap[0];
-
-      /* (0/b)=0 for b!=1.  b!=1 when a==0 because otherwise would have had
-         a==1 which is asize==1 and would have exited above.  */
-      if (asize == 0)
-        goto zero;
-
-    strip_a:
-      /* low zero limbs on a can be discarded */
-      JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, ap, asize, alow);
-
-      if ((alow & 1) == 0)
-        {
-          /* factors of 2 from a */
-          unsigned  twos;
-          count_trailing_zeros (twos, alow);
-          TRACE (printf ("twos %u\n", twos));
-          result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow);
-          ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos));
-          asize -= (ap[asize-1] == 0);
-          alow = ap[0];
-        }
-    }
+  if (asize >= 2*bsize)
+    TMP_ALLOC_LIMBS_2 (ap, bsize, bp, asize - bsize + 1);
+  else
+    TMP_ALLOC_LIMBS_2 (ap, bsize, bp, bsize);
  
-  ASSERT (asize == 1 && bsize == 1);  /* just alow and blow left */
-  TMP_FREE;
+  /* In the case of even B, we conceptually shift out the powers of two first,
+     and then divide A mod B. Hence, when taking those powers of two into
+     account, we must use alow *before* the division. Doing the actual division
+     first is ok, because the point is to remove multiples of B from A, and
+     multiples of 2^k B are good enough. */
+  if (asize > bsize)
+    mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, bsize);
+  else
+    MPN_COPY (ap, asrcp, bsize);
+
+  if (btwos > 0)
+    {
+      result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
  
-  /* (1/b)=1 always (in this case have b==1 because a>=b) */
-  if (alow == 1)
-    return JACOBI_BIT1_TO_PN (result_bit1);
+      ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, bsize, btwos));
+      bsize -= (ap[bsize-1] | bp[bsize-1]) == 0;
+    }
+  else
+    MPN_COPY (bp, bsrcp, bsize);
  
-  /* swap with reciprocity and do (b/a) */
-  result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
-  TRACE (printf ("base (%lu/%lu) with %d\n",
-                 blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
-  return mpn_jacobi_base (blow, alow, result_bit1);
+  ASSERT (blow == bp[0]);
+  res = mpn_jacobi_n (ap, bp, bsize,
+                     mpn_jacobi_init (ap[0], blow, (result_bit1>>1) & 1));
  
- zero:
    TMP_FREE;
-  return 0;
+  return res;
  }
diff --git a/mpz/kronsz.c b/mpz/kronsz.c

index 50bf7f6674745b95104c71017dda049519c90cbd..fd9c090957b79bf4d2a3f186d7965c42f9ceee85 100644 (file)
--- a/mpz/kronsz.c
+++ b/mpz/kronsz.c
@@ -64,54 +64,54 @@ mpz_si_kronecker (long a, mpz_srcptr b)
        a_limb = (unsigned long) ABS(a);
  
        if ((a_limb & 1) == 0)
-        {
-          /* (0/b)=1 for b=+/-1, 0 otherwise */
-          if (a_limb == 0)
-            return (b_abs_size == 1 && b_low == 1);
-
-          /* a even, b odd */
-          count_trailing_zeros (twos, a_limb);
-          a_limb >>= twos;
-          /* (a*2^n/b) = (a/b) * twos(n,a) */
-          result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low);
-        }
+       {
+         /* (0/b)=1 for b=+/-1, 0 otherwise */
+         if (a_limb == 0)
+           return (b_abs_size == 1 && b_low == 1);
+
+         /* a even, b odd */
+         count_trailing_zeros (twos, a_limb);
+         a_limb >>= twos;
+         /* (a*2^n/b) = (a/b) * twos(n,a) */
+         result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low);
+       }
      }
    else
      {
        /* (even/even)=0, and (0/b)=0 for b!=+/-1 */
        if ((a & 1) == 0)
-        return 0;
+       return 0;
  
        /* a odd, b even
  
-         Establish shifted b_low with valid bit1 for ASGN and RECIP below.
-         Zero limbs stripped are accounted for, but zero bits on b_low are
-         not because they remain in {b_ptr,b_abs_size} for the
-         JACOBI_MOD_OR_MODEXACT_1_ODD. */
+        Establish shifted b_low with valid bit1 for ASGN and RECIP below.
+        Zero limbs stripped are accounted for, but zero bits on b_low are
+        not because they remain in {b_ptr,b_abs_size} for the
+        JACOBI_MOD_OR_MODEXACT_1_ODD. */
  
        JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);
        if ((b_low & 1) == 0)
-        {
-          if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
-            {
-              /* need b_ptr[1] to get bit1 in b_low */
-              if (b_abs_size == 1)
-                {
-                  /* (a/0x80000000) = (a/2)^(BPML-1) */
-                  if ((GMP_NUMB_BITS % 2) == 0)
-                    result_bit1 ^= JACOBI_TWO_U_BIT1 (a);
-                  return JACOBI_BIT1_TO_PN (result_bit1);
-                }
-
-              /* b_abs_size > 1 */
-              b_low = b_ptr[1] << 1;
-            }
-          else
-            {
-              count_trailing_zeros (twos, b_low);
-              b_low >>= twos;
-            }
-        }
+       {
+         if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
+           {
+             /* need b_ptr[1] to get bit1 in b_low */
+             if (b_abs_size == 1)
+               {
+                 /* (a/0x80000000) = (a/2)^(BPML-1) */
+                 if ((GMP_NUMB_BITS % 2) == 0)
+                   result_bit1 ^= JACOBI_TWO_U_BIT1 (a);
+                 return JACOBI_BIT1_TO_PN (result_bit1);
+               }
+
+             /* b_abs_size > 1 */
+             b_low = b_ptr[1] << 1;
+           }
+         else
+           {
+             count_trailing_zeros (twos, b_low);
+             b_low >>= twos;
+           }
+       }
  
        result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);
        a_limb = (unsigned long) ABS(a);
diff --git a/mpz/kronuz.c b/mpz/kronuz.c

index 82a9962836e165ed7b3daa707fe857f2b1472902..ece0bfb6e7b21768c40f2a2d591e4cc6dd6b5a97 100644 (file)
--- a/mpz/kronuz.c
+++ b/mpz/kronuz.c
@@ -56,57 +56,57 @@ mpz_ui_kronecker (unsigned long a, mpz_srcptr b)
      {
        /* (0/b)=0 for b!=+/-1; and (even/even)=0 */
        if (! (a & 1))
-        return 0;
+       return 0;
  
        /* a odd, b even
  
-         Establish shifted b_low with valid bit1 for the RECIP below.  Zero
-         limbs stripped are accounted for, but zero bits on b_low are not
-         because they remain in {b_ptr,b_abs_size} for
-         JACOBI_MOD_OR_MODEXACT_1_ODD. */
+        Establish shifted b_low with valid bit1 for the RECIP below.  Zero
+        limbs stripped are accounted for, but zero bits on b_low are not
+        because they remain in {b_ptr,b_abs_size} for
+        JACOBI_MOD_OR_MODEXACT_1_ODD. */
  
        JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);
        if (! (b_low & 1))
-        {
-          if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
-            {
-              /* need b_ptr[1] to get bit1 in b_low */
-              if (b_abs_size == 1)
-                {
-                  /* (a/0x80...00) == (a/2)^(NUMB-1) */
-                  if ((GMP_NUMB_BITS % 2) == 0)
-                    {
-                      /* JACOBI_STRIP_LOW_ZEROS does nothing to result_bit1
-                         when GMP_NUMB_BITS is even, so it's still 0. */
-                      ASSERT (result_bit1 == 0);
-                      result_bit1 = JACOBI_TWO_U_BIT1 (a);
-                    }
-                  return JACOBI_BIT1_TO_PN (result_bit1);
-                }
-
-              /* b_abs_size > 1 */
-              b_low = b_ptr[1] << 1;
-            }
-          else
-            {
-              count_trailing_zeros (twos, b_low);
-              b_low >>= twos;
-            }
-        }
+       {
+         if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
+           {
+             /* need b_ptr[1] to get bit1 in b_low */
+             if (b_abs_size == 1)
+               {
+                 /* (a/0x80...00) == (a/2)^(NUMB-1) */
+                 if ((GMP_NUMB_BITS % 2) == 0)
+                   {
+                     /* JACOBI_STRIP_LOW_ZEROS does nothing to result_bit1
+                        when GMP_NUMB_BITS is even, so it's still 0. */
+                     ASSERT (result_bit1 == 0);
+                     result_bit1 = JACOBI_TWO_U_BIT1 (a);
+                   }
+                 return JACOBI_BIT1_TO_PN (result_bit1);
+               }
+
+             /* b_abs_size > 1 */
+             b_low = b_ptr[1] << 1;
+           }
+         else
+           {
+             count_trailing_zeros (twos, b_low);
+             b_low >>= twos;
+           }
+       }
      }
    else
      {
        if (a == 0)        /* (0/b)=1 for b=+/-1, 0 otherwise */
-        return (b_abs_size == 1 && b_low == 1);
+       return (b_abs_size == 1 && b_low == 1);
  
        if (! (a & 1))
-        {
-          /* a even, b odd */
-          count_trailing_zeros (twos, a);
-          a >>= twos;
-          /* (a*2^n/b) = (a/b) * (2/a)^n */
-          result_bit1 = JACOBI_TWOS_U_BIT1 (twos, b_low);
-        }
+       {
+         /* a even, b odd */
+         count_trailing_zeros (twos, a);
+         a >>= twos;
+         /* (a*2^n/b) = (a/b) * (2/a)^n */
+         result_bit1 = JACOBI_TWOS_U_BIT1 (twos, b_low);
+       }
      }
  
    if (a == 1)
diff --git a/mpz/kronzs.c b/mpz/kronzs.c

index 045e5563d94a0d4716409e1e78d69c8940e8a2a4..d0f8117e3b2b6883357b4eb2f158bf9daa8aada2 100644 (file)
--- a/mpz/kronzs.c
+++ b/mpz/kronzs.c
@@ -50,7 +50,7 @@ mpz_kronecker_si (mpz_srcptr a, long b)
  #endif
  
    result_bit1 = JACOBI_BSGN_SS_BIT1 (a_size, b);
-  b_limb = (unsigned long) ABS (b);
+  b_limb = ABS_CAST (unsigned long, b);
    a_ptr = PTR(a);
  
    if ((b_limb & 1) == 0)
@@ -59,10 +59,10 @@ mpz_kronecker_si (mpz_srcptr a, long b)
        int        twos;
  
        if (b_limb == 0)
-        return JACOBI_LS0 (a_low, a_size);   /* (a/0) */
+       return JACOBI_LS0 (a_low, a_size);   /* (a/0) */
  
        if (! (a_low & 1))
-        return 0;  /* (even/even)=0 */
+       return 0;  /* (even/even)=0 */
  
        /* (a/2)=(2/a) for a odd */
        count_trailing_zeros (twos, b_limb);
diff --git a/mpz/kronzu.c b/mpz/kronzu.c

index e73a0f86ae9971ee89f25a6fd7a85d8908bf241b..9b0eb1ed14aa51c2b56c72251c50cc9032af3d81 100644 (file)
--- a/mpz/kronzu.c
+++ b/mpz/kronzu.c
@@ -55,16 +55,16 @@ mpz_kronecker_ui (mpz_srcptr a, unsigned long b)
        int        twos;
  
        if (b == 0)
-        return JACOBI_LS0 (a_low, a_size);   /* (a/0) */
+       return JACOBI_LS0 (a_low, a_size);   /* (a/0) */
  
        if (! (a_low & 1))
-        return 0;  /* (even/even)=0 */
+       return 0;  /* (even/even)=0 */
  
        /* (a/2)=(2/a) for a odd */
        count_trailing_zeros (twos, b);
        b >>= twos;
        result_bit1 = (JACOBI_TWOS_U_BIT1 (twos, a_low)
-                     ^ JACOBI_ASGN_SU_BIT1 (a_size, b));
+                    ^ JACOBI_ASGN_SU_BIT1 (a_size, b));
      }
  
    if (b == 1)
diff --git a/mpz/lcm.c b/mpz/lcm.c

index 22ac04177336512c2fcf451bd91307cc04f98bd2..e871ca1fe32b00d7b2508e22256329588aa600ce 100644 (file)
--- a/mpz/lcm.c
+++ b/mpz/lcm.c
@@ -1,6 +1,6 @@
  /* mpz_lcm -- mpz/mpz least common multiple.
  
-Copyright 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1996, 2000, 2001, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -19,14 +19,12 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "gmp.h"
  #include "gmp-impl.h"
-#include "longlong.h"
-
  
  void
  mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v)
  {
    mpz_t g;
-  mp_size_t usize, vsize, size;
+  mp_size_t usize, vsize;
    TMP_DECL;
  
    usize = SIZ (u);
@@ -39,13 +37,18 @@ mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v)
    usize = ABS (usize);
    vsize = ABS (vsize);
  
-  if (vsize == 1)
+  if (vsize == 1 || usize == 1)
      {
        mp_limb_t  vl, gl, c;
        mp_srcptr  up;
        mp_ptr     rp;
  
-    one:
+      if (usize == 1)
+       {
+         usize = vsize;
+         MPZ_SRCPTR_SWAP (u, v);
+       }
+
        MPZ_REALLOC (r, usize+1);
  
        up = PTR(u);
@@ -61,16 +64,8 @@ mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v)
        return;
      }
  
-  if (usize == 1)
-    {
-      usize = vsize;
-      MPZ_SRCPTR_SWAP (u, v);
-      goto one;
-    }
-
    TMP_MARK;
-  size = MAX (usize, vsize);
-  MPZ_TMP_INIT (g, size);
+  MPZ_TMP_INIT (g, usize); /* v != 0 implies |gcd(u,v)| <= |u| */
  
    mpz_gcd (g, u, v);
    mpz_divexact (g, u, g);
diff --git a/mpz/lucnum2_ui.c b/mpz/lucnum2_ui.c

index a6b6cfb689d171824307551dccd0a949a60a5217..fa09d5cac0dbb60d2067c7795cc62385e7cba7c7 100644 (file)
--- a/mpz/lucnum2_ui.c
+++ b/mpz/lucnum2_ui.c
@@ -1,6 +1,6 @@
  /* mpz_lucnum2_ui -- calculate Lucas numbers.
  
-Copyright 2001, 2003, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2003, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -53,10 +53,8 @@ mpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, unsigned long n)
    size = MPN_FIB2_SIZE (n);
    f1p = TMP_ALLOC_LIMBS (size);
  
-  MPZ_REALLOC (ln,     size+1);
-  MPZ_REALLOC (lnsub1, size+1);
-  lp  = PTR(ln);
-  l1p = PTR(lnsub1);
+  lp  = MPZ_REALLOC (ln,     size+1);
+  l1p = MPZ_REALLOC (lnsub1, size+1);
  
    size = mpn_fib2_ui (l1p, f1p, n);
  
diff --git a/mpz/lucnum_ui.c b/mpz/lucnum_ui.c

index 1fb8ec86c371f0f0b6622aa0581d64a75a298885..cf0b7c5c67088c35cf78b33d30c3aceb10c2f85d 100644 (file)
--- a/mpz/lucnum_ui.c
+++ b/mpz/lucnum_ui.c
@@ -1,6 +1,6 @@
  /* mpz_lucnum_ui -- calculate Lucas number.
  
-Copyright 2001, 2003, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2003, 2005, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -66,8 +66,7 @@ mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
       since square or mul used below might need an extra limb over the true
       size */
    lalloc = MPN_FIB2_SIZE (n) + 2;
-  MPZ_REALLOC (ln, lalloc);
-  lp = PTR (ln);
+  lp = MPZ_REALLOC (ln, lalloc);
  
    TMP_MARK;
    xalloc = lalloc;
@@ -80,84 +79,85 @@ mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
    for (;;)
      {
        if (n & 1)
-        {
-          /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
+       {
+         /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
  
-          mp_size_t  yalloc, ysize;
-          mp_ptr     yp;
+         mp_size_t  yalloc, ysize;
+         mp_ptr     yp;
  
-          TRACE (printf ("  initial odd n=%lu\n", n));
+         TRACE (printf ("  initial odd n=%lu\n", n));
  
-          yalloc = MPN_FIB2_SIZE (n/2);
-          yp = TMP_ALLOC_LIMBS (yalloc);
-          ASSERT (xalloc >= yalloc);
+         yalloc = MPN_FIB2_SIZE (n/2);
+         yp = TMP_ALLOC_LIMBS (yalloc);
+         ASSERT (xalloc >= yalloc);
  
-          xsize = mpn_fib2_ui (xp, yp, n/2);
+         xsize = mpn_fib2_ui (xp, yp, n/2);
  
-          /* possible high zero on F[k-1] */
-          ysize = xsize;
-          ysize -= (yp[ysize-1] == 0);
-          ASSERT (yp[ysize-1] != 0);
+         /* possible high zero on F[k-1] */
+         ysize = xsize;
+         ysize -= (yp[ysize-1] == 0);
+         ASSERT (yp[ysize-1] != 0);
  
-          /* xp = 2*F[k] + F[k-1] */
+         /* xp = 2*F[k] + F[k-1] */
  #if HAVE_NATIVE_mpn_addlsh1_n
-          c = mpn_addlsh1_n (xp, yp, xp, xsize);
+         c = mpn_addlsh1_n (xp, yp, xp, xsize);
  #else
-          c = mpn_lshift (xp, xp, xsize, 1);
-          c += mpn_add_n (xp, xp, yp, xsize);
+         c = mpn_lshift (xp, xp, xsize, 1);
+         c += mpn_add_n (xp, xp, yp, xsize);
  #endif
-          ASSERT (xalloc >= xsize+1);
-          xp[xsize] = c;
-          xsize += (c != 0);
-          ASSERT (xp[xsize-1] != 0);
-
-          ASSERT (lalloc >= xsize + ysize);
-          c = mpn_mul (lp, xp, xsize, yp, ysize);
-          lsize = xsize + ysize;
-          lsize -= (c == 0);
-
-          /* lp = 5*lp */
-#if HAVE_NATIVE_mpn_addlshift
-          c = mpn_addlshift (lp, lp, lsize, 2);
+         ASSERT (xalloc >= xsize+1);
+         xp[xsize] = c;
+         xsize += (c != 0);
+         ASSERT (xp[xsize-1] != 0);
+
+         ASSERT (lalloc >= xsize + ysize);
+         c = mpn_mul (lp, xp, xsize, yp, ysize);
+         lsize = xsize + ysize;
+         lsize -= (c == 0);
+
+         /* lp = 5*lp */
+#if HAVE_NATIVE_mpn_addlsh2_n
+         c = mpn_addlsh2_n (lp, lp, lp, lsize);
  #else
-          c = mpn_lshift (xp, lp, lsize, 2);
-          c += mpn_add_n (lp, lp, xp, lsize);
+         /* FIXME: Is this faster than mpn_mul_1 ? */
+         c = mpn_lshift (xp, lp, lsize, 2);
+         c += mpn_add_n (lp, lp, xp, lsize);
  #endif
-          ASSERT (lalloc >= lsize+1);
-          lp[lsize] = c;
-          lsize += (c != 0);
-
-          /* lp = lp - 4*(-1)^k */
-          if (n & 2)
-            {
-              /* no overflow, see comments above */
-              ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
-              lp[0] += 4;
-            }
-          else
-            {
-              /* won't go negative */
-              MPN_DECR_U (lp, lsize, CNST_LIMB(4));
-            }
-
-          TRACE (mpn_trace ("  l",lp, lsize));
-          break;
-        }
+         ASSERT (lalloc >= lsize+1);
+         lp[lsize] = c;
+         lsize += (c != 0);
+
+         /* lp = lp - 4*(-1)^k */
+         if (n & 2)
+           {
+             /* no overflow, see comments above */
+             ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
+             lp[0] += 4;
+           }
+         else
+           {
+             /* won't go negative */
+             MPN_DECR_U (lp, lsize, CNST_LIMB(4));
+           }
+
+         TRACE (mpn_trace ("  l",lp, lsize));
+         break;
+       }
  
        MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
        zeros++;
        n /= 2;
  
        if (n <= FIB_TABLE_LUCNUM_LIMIT)
-        {
-          /* L[n] = F[n] + 2F[n-1] */
-          lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
-          lsize = 1;
-
-          TRACE (printf ("  initial small n=%lu\n", n);
-                 mpn_trace ("  l",lp, lsize));
-          break;
-        }
+       {
+         /* L[n] = F[n] + 2F[n-1] */
+         lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
+         lsize = 1;
+
+         TRACE (printf ("  initial small n=%lu\n", n);
+                mpn_trace ("  l",lp, lsize));
+         break;
+       }
      }
  
    for ( ; zeros != 0; zeros--)
@@ -172,19 +172,19 @@ mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
        lsize -= (xp[lsize-1] == 0);
  
        /* First time around the loop k==n determines (-1)^k, after that k is
-         always even and we set n=0 to indicate that.  */
+        always even and we set n=0 to indicate that.  */
        if (n & 1)
-        {
-          /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
-          ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
-          xp[0] += 2;
-          n = 0;
-        }
+       {
+         /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
+         ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
+         xp[0] += 2;
+         n = 0;
+       }
        else
-        {
-          /* won't go negative */
-          MPN_DECR_U (xp, lsize, CNST_LIMB(2));
-        }
+       {
+         /* won't go negative */
+         MPN_DECR_U (xp, lsize, CNST_LIMB(2));
+       }
  
        MP_PTR_SWAP (xp, lp);
        ASSERT (lp[lsize-1] != 0);
diff --git a/mpz/mfac_uiui.c b/mpz/mfac_uiui.c

new file mode 100644 (file)

index 0000000..da392ab
--- /dev/null
+++ b/mpz/mfac_uiui.c
@@ -0,0 +1,124 @@
+/* mpz_mfac_uiui(RESULT, N, M) -- Set RESULT to N!^(M) = N(N-M)(N-2M)...
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)               \
+  do {                                                         \
+    if ((PR) > (MAX_PR)) {                                     \
+      (VEC)[(I)++] = (PR);                                     \
+      (PR) = (P);                                              \
+    } else                                                     \
+      (PR) *= (P);                                             \
+  } while (0)
+
+/*********************************************************/
+/* Section oder factorials:                              */
+/*********************************************************/
+
+/* mpz_mfac_uiui (x, n, m) computes x = n!^(m) = n*(n-m)*(n-2m)*...   */
+
+void
+mpz_mfac_uiui (mpz_ptr x, unsigned long n, unsigned long m)
+{
+  ASSERT (n <= GMP_NUMB_MAX);
+  ASSERT (m != 0);
+
+  if (n < 3 || n - 3 < m - 1) { /* (n < 3 || n - 1 <= m || m == 0) */
+    PTR (x)[0] = n + (n == 0);
+    SIZ (x) = 1;
+  } else { /* m < n - 1 < GMP_NUMB_MAX */
+    mp_limb_t g, sn;
+    mpz_t     t;
+
+    sn = n;
+    g = mpn_gcd_1 (&sn, 1, m);
+    if (g != 1) { n/=g; m/=g; }
+
+    if (m <= 2) { /* fac or 2fac */
+      if (m == 1) {
+       if (g > 2) {
+         mpz_init (t);
+         mpz_fac_ui (t, n);
+         sn = n;
+       } else {
+         if (g == 2)
+           mpz_2fac_ui (x, n << 1);
+         else
+           mpz_fac_ui (x, n);
+         return;
+       }
+      } else { /* m == 2 */
+       if (g != 1) {
+         mpz_init (t);
+         mpz_2fac_ui (t, n);
+         sn = n / 2 + 1;
+       } else {
+         mpz_2fac_ui (x, n);
+         return;
+       }
+      }
+    } else { /* m >= 3, gcd(n,m) = 1 */
+      mp_limb_t *factors;
+      mp_limb_t prod, max_prod, j;
+      TMP_DECL;
+
+      sn = n / m + 1;
+
+      j = 0;
+      prod = n;
+      n -= m;
+      max_prod = GMP_NUMB_MAX / n;
+
+      TMP_MARK;
+      factors = TMP_ALLOC_LIMBS (sn / log_n_max (n) + 2);
+
+      for (; n > m; n -= m)
+       FACTOR_LIST_STORE (n, prod, max_prod, factors, j);
+
+      factors[j++] = n;
+      factors[j++] = prod;
+
+      if (g > 1) {
+       mpz_init (t);
+       mpz_prodlimbs (t, factors, j);
+      } else
+       mpz_prodlimbs (x, factors, j);
+
+      TMP_FREE;
+    }
+
+    if (g > 1) {
+      mpz_t p;
+
+      mpz_init (p);
+      mpz_ui_pow_ui (p, g, sn); /* g^sn */
+      mpz_mul (x, p, t);
+      mpz_clear (p);
+      mpz_clear (t);
+    }
+  }
+}
diff --git a/mpz/millerrabin.c b/mpz/millerrabin.c

index f717278b0f6ae0bde8591c91b3cc47ce63fc95ee..bf6a9a649d48e44f77707c984c2a6125c77f0b35 100644 (file)
--- a/mpz/millerrabin.c
+++ b/mpz/millerrabin.c
@@ -30,9 +30,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp.h"
  #include "gmp-impl.h"
  
-static int millerrabin __GMP_PROTO ((mpz_srcptr, mpz_srcptr,
-                                    mpz_ptr, mpz_ptr,
-                                    mpz_srcptr, unsigned long int));
+static int millerrabin (mpz_srcptr, mpz_srcptr,
+                       mpz_ptr, mpz_ptr,
+                       mpz_srcptr, unsigned long int);
  
  int
  mpz_millerrabin (mpz_srcptr n, int reps)
@@ -91,7 +91,7 @@ mpz_millerrabin (mpz_srcptr n, int reps)
  
  static int
  millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
-             mpz_srcptr q, unsigned long int k)
+            mpz_srcptr q, unsigned long int k)
  {
    unsigned long int i;
  
diff --git a/mpz/mod.c b/mpz/mod.c

index f7b84115a7b0ed2cc41864a9683d29be9c39d57f..cf0fe26d96b47b4ece1703095d69095d74858244 100644 (file)
--- a/mpz/mod.c
+++ b/mpz/mod.c
@@ -1,7 +1,7 @@
  /* mpz_mod -- The mathematical mod function.
  
-Copyright 1991, 1993, 1994, 1995, 1996, 2001, 2002, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 2001, 2002, 2005, 2010, 2012
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,34 +24,34 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_mod (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
  {
-  mp_size_t divisor_size = divisor->_mp_size;
-  mpz_t temp_divisor;          /* N.B.: lives until function returns! */
+  mp_size_t rn, bn;
+  mpz_t temp_divisor;
    TMP_DECL;
  
    TMP_MARK;
  
+  bn = ABSIZ(divisor);
+
    /* We need the original value of the divisor after the remainder has been
       preliminary calculated.  We have to copy it to temporary space if it's
       the same variable as REM.  */
    if (rem == divisor)
      {
-      MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
-      mpz_set (temp_divisor, divisor);
-      divisor = temp_divisor;
+      PTR(temp_divisor) = TMP_ALLOC_LIMBS (bn);
+      MPN_COPY (PTR(temp_divisor), PTR(divisor), bn);
+    }
+  else
+    {
+      PTR(temp_divisor) = PTR(divisor);
      }
+  SIZ(temp_divisor) = bn;
+  divisor = temp_divisor;
  
    mpz_tdiv_r (rem, dividend, divisor);
  
-  if (rem->_mp_size != 0)
-    {
-      if (dividend->_mp_size < 0)
-       {
-         if (divisor->_mp_size < 0)
-           mpz_sub (rem, rem, divisor);
-         else
-           mpz_add (rem, rem, divisor);
-       }
-    }
+  rn = SIZ (rem);
+  if (rn < 0)
+    mpz_add (rem, rem, divisor);
  
    TMP_FREE;
  }
diff --git a/mpz/mul.c b/mpz/mul.c

index ee49aeab899264191a373ccbfc12401ff063631b..71c62e06b976fb6e048c82739ed535ddb18bf017 100644 (file)
--- a/mpz/mul.c
+++ b/mpz/mul.c
@@ -1,6 +1,6 @@
  /* mpz_mul -- Multiply two integers.
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2009, 2011 Free
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2009, 2011, 2012 Free
  Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -21,17 +21,10 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include <stdio.h> /* for NULL */
  #include "gmp.h"
  #include "gmp-impl.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
  
  
  void
-#ifndef BERKELEY_MP
  mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
-#else /* BERKELEY_MP */
-mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
-#endif /* BERKELEY_MP */
  {
    mp_size_t usize;
    mp_size_t vsize;
@@ -58,61 +51,59 @@ mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
  
    if (vsize == 0)
      {
-      SIZ(w) = 0;
+      SIZ (w) = 0;
        return;
      }
  
  #if HAVE_NATIVE_mpn_mul_2
    if (vsize <= 2)
      {
-      MPZ_REALLOC (w, usize+vsize);
-      wp = PTR(w);
+      wp = MPZ_REALLOC (w, usize+vsize);
        if (vsize == 1)
-        cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);
+       cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]);
        else
-        {
-          cy_limb = mpn_mul_2 (wp, PTR(u), usize, PTR(v));
-          usize++;
-        }
+       {
+         cy_limb = mpn_mul_2 (wp, PTR (u), usize, PTR (v));
+         usize++;
+       }
        wp[usize] = cy_limb;
        usize += (cy_limb != 0);
-      SIZ(w) = (sign_product >= 0 ? usize : -usize);
+      SIZ (w) = (sign_product >= 0 ? usize : -usize);
        return;
      }
  #else
    if (vsize == 1)
      {
-      MPZ_REALLOC (w, usize+1);
-      wp = PTR(w);
-      cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);
+      wp = MPZ_REALLOC (w, usize+1);
+      cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]);
        wp[usize] = cy_limb;
        usize += (cy_limb != 0);
-      SIZ(w) = (sign_product >= 0 ? usize : -usize);
+      SIZ (w) = (sign_product >= 0 ? usize : -usize);
        return;
      }
  #endif
  
    TMP_MARK;
    free_me = NULL;
-  up = PTR(u);
-  vp = PTR(v);
-  wp = PTR(w);
+  up = PTR (u);
+  vp = PTR (v);
+  wp = PTR (w);
  
    /* Ensure W has space enough to store the result.  */
    wsize = usize + vsize;
-  if (ALLOC(w) < wsize)
+  if (ALLOC (w) < wsize)
      {
        if (wp == up || wp == vp)
         {
           free_me = wp;
-         free_me_size = ALLOC(w);
+         free_me_size = ALLOC (w);
         }
        else
-       (*__gmp_free_func) (wp, ALLOC(w) * BYTES_PER_MP_LIMB);
+       (*__gmp_free_func) (wp, ALLOC (w) * BYTES_PER_MP_LIMB);
  
-      ALLOC(w) = wsize;
+      ALLOC (w) = wsize;
        wp = (mp_ptr) (*__gmp_allocate_func) (wsize * BYTES_PER_MP_LIMB);
-      PTR(w) = wp;
+      PTR (w) = wp;
      }
    else
      {
@@ -148,7 +139,7 @@ mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
  
    wsize -= cy_limb == 0;
  
-  SIZ(w) = sign_product < 0 ? -wsize : wsize;
+  SIZ (w) = sign_product < 0 ? -wsize : wsize;
    if (free_me != NULL)
      (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
    TMP_FREE;
diff --git a/mpz/mul_2exp.c b/mpz/mul_2exp.c

index a1521816eacde345d720f2db64167af79a168d91..4e1f4e6bd5769b00ca8d404b746dd08c2083ab21 100644 (file)
--- a/mpz/mul_2exp.c
+++ b/mpz/mul_2exp.c
@@ -1,6 +1,7 @@
  /* mpz_mul_2exp -- Multiply a bignum by 2**CNT
  
-Copyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2002, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,47 +22,41 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpz_mul_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+mpz_mul_2exp (mpz_ptr r, mpz_srcptr u, mp_bitcnt_t cnt)
  {
-  mp_size_t usize = u->_mp_size;
-  mp_size_t abs_usize = ABS (usize);
-  mp_size_t wsize;
+  mp_size_t un, rn;
    mp_size_t limb_cnt;
-  mp_ptr wp;
-  mp_limb_t wlimb;
-
-  if (usize == 0)
-    {
-      w->_mp_size = 0;
-      return;
-    }
+  mp_ptr rp;
+  mp_srcptr up;
+  mp_limb_t rlimb;
  
+  un = ABSIZ (u);
    limb_cnt = cnt / GMP_NUMB_BITS;
-  wsize = abs_usize + limb_cnt + 1;
-  if (w->_mp_alloc < wsize)
-    _mpz_realloc (w, wsize);
-
-  wp = w->_mp_d;
-  wsize = abs_usize + limb_cnt;
+  rn = un + limb_cnt;
  
-  cnt %= GMP_NUMB_BITS;
-  if (cnt != 0)
+  if (un == 0)
+    rn = 0;
+  else
      {
-      wlimb = mpn_lshift (wp + limb_cnt, u->_mp_d, abs_usize, cnt);
-      if (wlimb != 0)
+      rp = MPZ_REALLOC (r, rn + 1);
+      up = PTR(u);
+
+      cnt %= GMP_NUMB_BITS;
+      if (cnt != 0)
         {
-         wp[wsize] = wlimb;
-         wsize++;
+         rlimb = mpn_lshift (rp + limb_cnt, up, un, cnt);
+         rp[rn] = rlimb;
+         rn += (rlimb != 0);
+       }
+      else
+       {
+         MPN_COPY_DECR (rp + limb_cnt, up, un);
         }
-    }
-  else
-    {
-      MPN_COPY_DECR (wp + limb_cnt, u->_mp_d, abs_usize);
-    }
  
-  /* Zero all whole limbs at low end.  Do it here and not before calling
-     mpn_lshift, not to lose for U == W.  */
-  MPN_ZERO (wp, limb_cnt);
+      /* Zero all whole limbs at low end.  Do it here and not before calling
+        mpn_lshift, not to lose for U == R.  */
+      MPN_ZERO (rp, limb_cnt);
+    }
  
-  w->_mp_size = usize >= 0 ? wsize : -wsize;
+  SIZ(r) = SIZ(u) >= 0 ? rn : -rn;
  }
diff --git a/mpz/mul_i.h b/mpz/mul_i.h

index 2de3fe0b5e41ab323da5c050a9869be9115f2bc0..868c90d035622b870ab478b414142dac59b00801 100644 (file)
--- a/mpz/mul_i.h
+++ b/mpz/mul_i.h
@@ -1,8 +1,8 @@
  /* mpz_mul_ui/si (product, multiplier, small_multiplicand) -- Set PRODUCT to
     MULTIPLICATOR times SMALL_MULTIPLICAND.
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2008 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2008, 2012
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -44,27 +44,27 @@ void
  FUNCTION (mpz_ptr prod, mpz_srcptr mult,
            MULTIPLICAND_UNSIGNED long int small_mult)
  {
-  mp_size_t size = SIZ(mult);
-  mp_size_t sign_product = size;
+  mp_size_t size;
+  mp_size_t sign_product;
    mp_limb_t sml;
    mp_limb_t cy;
    mp_ptr pp;
  
-  if (size == 0 || small_mult == 0)
+  sign_product = SIZ(mult);
+  if (sign_product == 0 || small_mult == 0)
      {
        SIZ(prod) = 0;
        return;
      }
  
-  size = ABS (size);
+  size = ABS (sign_product);
  
    sml = MULTIPLICAND_ABS (small_mult);
  
    if (sml <= GMP_NUMB_MAX)
      {
-      MPZ_REALLOC (prod, size + 1);
-      pp = PTR(prod);
-      cy = mpn_mul_1 (pp, PTR(mult), size, sml & GMP_NUMB_MASK);
+      pp = MPZ_REALLOC (prod, size + 1);
+      cy = mpn_mul_1 (pp, PTR(mult), size, sml);
        pp[size] = cy;
        size += cy != 0;
      }
@@ -79,14 +79,14 @@ FUNCTION (mpz_ptr prod, mpz_srcptr mult,
  
        tp = TMP_ALLOC_LIMBS (size + 2);
  
+      /* Use, maybe, mpn_mul_2? */
        cy = mpn_mul_1 (tp, PTR(mult), size, sml & GMP_NUMB_MASK);
        tp[size] = cy;
        cy = mpn_addmul_1 (tp + 1, PTR(mult), size, sml >> GMP_NUMB_BITS);
        tp[size + 1] = cy;
        size += 2;
        MPN_NORMALIZE_NOT_ZERO (tp, size); /* too general, need to trim one or two limb */
-      MPZ_REALLOC (prod, size);
-      pp = PTR(prod);
+      pp = MPZ_REALLOC (prod, size);
        MPN_COPY (pp, tp, size);
        TMP_FREE;
      }
diff --git a/mpz/n_pow_ui.c b/mpz/n_pow_ui.c

index 6d527c36ce0281d524a64d314050e33737150a63..1a457b63d66bdd12d2a53ebb9c19f2ee6a39375e 100644 (file)
--- a/mpz/n_pow_ui.c
+++ b/mpz/n_pow_ui.c
@@ -4,7 +4,7 @@
     CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
     FUTURE GNU MP RELEASES.
  
-Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -163,11 +163,11 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
    TMP_DECL;
  
    TRACE (printf ("mpz_n_pow_ui rp=0x%lX bp=0x%lX bsize=%ld e=%lu (0x%lX)\n",
-                 PTR(r), bp, bsize, e, e);
-         mpn_trace ("b", bp, bsize));
+                PTR(r), bp, bsize, e, e);
+        mpn_trace ("b", bp, bsize));
  
    ASSERT (bsize == 0 || bp[ABS(bsize)-1] != 0);
-  ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ABSIZ(r), bp, bsize));
+  ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ALLOC(r), bp, ABS(bsize)));
  
    /* b^0 == 1, including 0^0 == 1 */
    if (e == 0)
@@ -207,7 +207,7 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
    rtwos_limbs += rtwos_bits / GMP_NUMB_BITS;
    rtwos_bits %= GMP_NUMB_BITS;
    TRACE (printf ("trailing zero btwos=%d rtwos_limbs=%ld rtwos_bits=%lu\n",
-                 btwos, rtwos_limbs, rtwos_bits));
+                btwos, rtwos_limbs, rtwos_bits));
  
    TMP_MARK;
  
@@ -220,25 +220,25 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
      {
      bsize_1:
        /* Power up as far as possible within blimb.  We start here with e!=0,
-         but if e is small then we might reach e==0 and the whole b^e in rl.
-         Notice this code works when blimb==1 too, reaching e==0.  */
+        but if e is small then we might reach e==0 and the whole b^e in rl.
+        Notice this code works when blimb==1 too, reaching e==0.  */
  
        while (blimb <= GMP_NUMB_HALFMAX)
-        {
-          TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n",
-                         e, blimb, rl));
-          ASSERT (e != 0);
-          if ((e & 1) != 0)
-            rl *= blimb;
-          e >>= 1;
-          if (e == 0)
-            goto got_rl;
-          blimb *= blimb;
-        }
+       {
+         TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n",
+                        e, blimb, rl));
+         ASSERT (e != 0);
+         if ((e & 1) != 0)
+           rl *= blimb;
+         e >>= 1;
+         if (e == 0)
+           goto got_rl;
+         blimb *= blimb;
+       }
  
  #if HAVE_NATIVE_mpn_mul_2
        TRACE (printf ("single power, e=0x%lX b=0x%lX rl=0x%lX\n",
-                     e, blimb, rl));
+                    e, blimb, rl));
  
        /* Can power b once more into blimb:blimb_low */
        bsize = 2;
@@ -254,62 +254,62 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
  
      got_rl:
        TRACE (printf ("double power e=0x%lX blimb=0x%lX:0x%lX rl=0x%lX:%lX\n",
-                     e, blimb, blimb_low, rl_high, rl));
+                    e, blimb, blimb_low, rl_high, rl));
  
        /* Combine left-over rtwos_bits into rl_high:rl to be handled by the
-         final mul_1 or mul_2 rather than a separate lshift.
-         - rl_high:rl mustn't be 1 (since then there's no final mul)
-         - rl_high mustn't overflow
-         - rl_high mustn't change to non-zero, since mul_1+lshift is
-         probably faster than mul_2 (FIXME: is this true?)  */
+        final mul_1 or mul_2 rather than a separate lshift.
+        - rl_high:rl mustn't be 1 (since then there's no final mul)
+        - rl_high mustn't overflow
+        - rl_high mustn't change to non-zero, since mul_1+lshift is
+        probably faster than mul_2 (FIXME: is this true?)  */
  
        if (rtwos_bits != 0
-          && ! (rl_high == 0 && rl == 1)
-          && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
-        {
-          mp_limb_t  new_rl_high = (rl_high << rtwos_bits)
-            | (rl >> (GMP_NUMB_BITS-rtwos_bits));
-          if (! (rl_high == 0 && new_rl_high != 0))
-            {
-              rl_high = new_rl_high;
-              rl <<= rtwos_bits;
-              rtwos_bits = 0;
-              TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n",
-                             rl_high, rl));
-            }
-        }
+         && ! (rl_high == 0 && rl == 1)
+         && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
+       {
+         mp_limb_t  new_rl_high = (rl_high << rtwos_bits)
+           | (rl >> (GMP_NUMB_BITS-rtwos_bits));
+         if (! (rl_high == 0 && new_rl_high != 0))
+           {
+             rl_high = new_rl_high;
+             rl <<= rtwos_bits;
+             rtwos_bits = 0;
+             TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n",
+                            rl_high, rl));
+           }
+       }
  #else
      got_rl:
        TRACE (printf ("small power e=0x%lX blimb=0x%lX rl=0x%lX\n",
-                     e, blimb, rl));
+                    e, blimb, rl));
  
        /* Combine left-over rtwos_bits into rl to be handled by the final
-         mul_1 rather than a separate lshift.
-         - rl mustn't be 1 (since then there's no final mul)
-         - rl mustn't overflow  */
+        mul_1 rather than a separate lshift.
+        - rl mustn't be 1 (since then there's no final mul)
+        - rl mustn't overflow  */
  
        if (rtwos_bits != 0
-          && rl != 1
-          && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
-        {
-          rl <<= rtwos_bits;
-          rtwos_bits = 0;
-          TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl));
-        }
+         && rl != 1
+         && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
+       {
+         rl <<= rtwos_bits;
+         rtwos_bits = 0;
+         TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl));
+       }
  #endif
      }
    else if (bsize == 2)
      {
        mp_limb_t  bsecond = bp[1];
        if (btwos != 0)
-        blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
+       blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
        bsecond >>= btwos;
        if (bsecond == 0)
-        {
-          /* Two limbs became one after rshift. */
-          bsize = 1;
-          goto bsize_1;
-        }
+       {
+         /* Two limbs became one after rshift. */
+         bsize = 1;
+         goto bsize_1;
+       }
  
        TRACE (printf ("bsize==2 using b=0x%lX:%lX", bsecond, blimb));
  #if HAVE_NATIVE_mpn_mul_2
@@ -324,12 +324,12 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
    else
      {
        if (r_bp_overlap || btwos != 0)
-        {
-          mp_ptr tp = TMP_ALLOC_LIMBS (bsize);
-          MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos);
-          bp = tp;
-          TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize));
-        }
+       {
+         mp_ptr tp = TMP_ALLOC_LIMBS (bsize);
+         MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos);
+         bp = tp;
+         TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize));
+       }
  #if HAVE_NATIVE_mpn_mul_2
        /* in case 3 limbs rshift to 2 and hence use the mul_2 loop below */
        blimb_low = bp[0];
@@ -337,7 +337,7 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
        blimb = bp[bsize-1];
  
        TRACE (printf ("big bsize=%ld  ", bsize);
-             mpn_trace ("b", bp, bsize));
+            mpn_trace ("b", bp, bsize));
      }
  
    /* At this point blimb is the most significant limb of the base to use.
@@ -360,9 +360,8 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
    count_leading_zeros (cnt, blimb);
    ralloc = (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS) * e / GMP_NUMB_BITS + 5;
    TRACE (printf ("ralloc %ld, from bsize=%ld blimb=0x%lX cnt=%d\n",
-                 ralloc, bsize, blimb, cnt));
-  MPZ_REALLOC (r, ralloc + rtwos_limbs);
-  rp = PTR(r);
+                ralloc, bsize, blimb, cnt));
+  rp = MPZ_REALLOC (r, ralloc + rtwos_limbs);
  
    /* Low zero limbs resulting from powers of 2. */
    MPN_ZERO (rp, rtwos_limbs);
@@ -371,7 +370,7 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
    if (e == 0)
      {
        /* Any e==0 other than via bsize==1 or bsize==2 is covered at the
-         start. */
+        start. */
        rp[0] = rl;
        rsize = 1;
  #if HAVE_NATIVE_mpn_mul_2
@@ -386,125 +385,125 @@ mpz_n_pow_ui (mpz_ptr r, mp_srcptr bp, mp_size_t bsize, unsigned long int e)
        mp_size_t  talloc;
  
        /* In the mpn_mul_1 or mpn_mul_2 loops or in the mpn_mul loop when the
-         low bit of e is zero, tp only has to hold the second last power
-         step, which is half the size of the final result.  There's no need
-         to round up the divide by 2, since ralloc includes a +2 for rl
-         which not needed by tp.  In the mpn_mul loop when the low bit of e
-         is 1, tp must hold nearly the full result, so just size it the same
-         as rp.  */
+        low bit of e is zero, tp only has to hold the second last power
+        step, which is half the size of the final result.  There's no need
+        to round up the divide by 2, since ralloc includes a +2 for rl
+        which not needed by tp.  In the mpn_mul loop when the low bit of e
+        is 1, tp must hold nearly the full result, so just size it the same
+        as rp.  */
  
        talloc = ralloc;
  #if HAVE_NATIVE_mpn_mul_2
        if (bsize <= 2 || (e & 1) == 0)
-        talloc /= 2;
+       talloc /= 2;
  #else
        if (bsize <= 1 || (e & 1) == 0)
-        talloc /= 2;
+       talloc /= 2;
  #endif
        TRACE (printf ("talloc %ld\n", talloc));
        tp = TMP_ALLOC_LIMBS (talloc);
  
        /* Go from high to low over the bits of e, starting with i pointing at
-         the bit below the highest 1 (which will mean i==-1 if e==1).  */
-      count_leading_zeros (cnt, e);
+        the bit below the highest 1 (which will mean i==-1 if e==1).  */
+      count_leading_zeros (cnt, (mp_limb_t) e);
        i = GMP_LIMB_BITS - cnt - 2;
  
  #if HAVE_NATIVE_mpn_mul_2
        if (bsize <= 2)
-        {
-          mp_limb_t  mult[2];
-
-          /* Any bsize==1 will have been powered above to be two limbs. */
-          ASSERT (bsize == 2);
-          ASSERT (blimb != 0);
-
-          /* Arrange the final result ends up in r, not in the temp space */
-          if ((i & 1) == 0)
-            SWAP_RP_TP;
-
-          rp[0] = blimb_low;
-          rp[1] = blimb;
-          rsize = 2;
-
-          mult[0] = blimb_low;
-          mult[1] = blimb;
-
-          for ( ; i >= 0; i--)
-            {
-              TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
-                             i, e, rsize, ralloc, talloc);
-                     mpn_trace ("r", rp, rsize));
-
-              MPN_SQR (tp, talloc, rp, rsize);
-              SWAP_RP_TP;
-              if ((e & (1L << i)) != 0)
-                MPN_MUL_2 (rp, rsize, ralloc, mult);
-            }
-
-          TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize));
-          if (rl_high != 0)
-            {
-              mult[0] = rl;
-              mult[1] = rl_high;
-              MPN_MUL_2 (rp, rsize, ralloc, mult);
-            }
-          else if (rl != 1)
-            MPN_MUL_1 (rp, rsize, ralloc, rl);
-        }
+       {
+         mp_limb_t  mult[2];
+
+         /* Any bsize==1 will have been powered above to be two limbs. */
+         ASSERT (bsize == 2);
+         ASSERT (blimb != 0);
+
+         /* Arrange the final result ends up in r, not in the temp space */
+         if ((i & 1) == 0)
+           SWAP_RP_TP;
+
+         rp[0] = blimb_low;
+         rp[1] = blimb;
+         rsize = 2;
+
+         mult[0] = blimb_low;
+         mult[1] = blimb;
+
+         for ( ; i >= 0; i--)
+           {
+             TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+                            i, e, rsize, ralloc, talloc);
+                    mpn_trace ("r", rp, rsize));
+
+             MPN_SQR (tp, talloc, rp, rsize);
+             SWAP_RP_TP;
+             if ((e & (1L << i)) != 0)
+               MPN_MUL_2 (rp, rsize, ralloc, mult);
+           }
+
+         TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize));
+         if (rl_high != 0)
+           {
+             mult[0] = rl;
+             mult[1] = rl_high;
+             MPN_MUL_2 (rp, rsize, ralloc, mult);
+           }
+         else if (rl != 1)
+           MPN_MUL_1 (rp, rsize, ralloc, rl);
+       }
  #else
        if (bsize == 1)
-        {
-          /* Arrange the final result ends up in r, not in the temp space */
-          if ((i & 1) == 0)
-            SWAP_RP_TP;
-
-          rp[0] = blimb;
-          rsize = 1;
-
-          for ( ; i >= 0; i--)
-            {
-              TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
-                             i, e, rsize, ralloc, talloc);
-                     mpn_trace ("r", rp, rsize));
-
-              MPN_SQR (tp, talloc, rp, rsize);
-              SWAP_RP_TP;
-              if ((e & (1L << i)) != 0)
-                MPN_MUL_1 (rp, rsize, ralloc, blimb);
-            }
-
-          TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize));
-          if (rl != 1)
-            MPN_MUL_1 (rp, rsize, ralloc, rl);
-        }
+       {
+         /* Arrange the final result ends up in r, not in the temp space */
+         if ((i & 1) == 0)
+           SWAP_RP_TP;
+
+         rp[0] = blimb;
+         rsize = 1;
+
+         for ( ; i >= 0; i--)
+           {
+             TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+                            i, e, rsize, ralloc, talloc);
+                    mpn_trace ("r", rp, rsize));
+
+             MPN_SQR (tp, talloc, rp, rsize);
+             SWAP_RP_TP;
+             if ((e & (1L << i)) != 0)
+               MPN_MUL_1 (rp, rsize, ralloc, blimb);
+           }
+
+         TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize));
+         if (rl != 1)
+           MPN_MUL_1 (rp, rsize, ralloc, rl);
+       }
  #endif
        else
-        {
-          int  parity;
-
-          /* Arrange the final result ends up in r, not in the temp space */
-          ULONG_PARITY (parity, e);
-          if (((parity ^ i) & 1) != 0)
-            SWAP_RP_TP;
-
-          MPN_COPY (rp, bp, bsize);
-          rsize = bsize;
-
-          for ( ; i >= 0; i--)
-            {
-              TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
-                             i, e, rsize, ralloc, talloc);
-                     mpn_trace ("r", rp, rsize));
-
-              MPN_SQR (tp, talloc, rp, rsize);
-              SWAP_RP_TP;
-              if ((e & (1L << i)) != 0)
-                {
-                  MPN_MUL (tp, talloc, rp, rsize, bp, bsize);
-                  SWAP_RP_TP;
-                }
-            }
-        }
+       {
+         int  parity;
+
+         /* Arrange the final result ends up in r, not in the temp space */
+         ULONG_PARITY (parity, e);
+         if (((parity ^ i) & 1) != 0)
+           SWAP_RP_TP;
+
+         MPN_COPY (rp, bp, bsize);
+         rsize = bsize;
+
+         for ( ; i >= 0; i--)
+           {
+             TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+                            i, e, rsize, ralloc, talloc);
+                    mpn_trace ("r", rp, rsize));
+
+             MPN_SQR (tp, talloc, rp, rsize);
+             SWAP_RP_TP;
+             if ((e & (1L << i)) != 0)
+               {
+                 MPN_MUL (tp, talloc, rp, rsize, bp, bsize);
+                 SWAP_RP_TP;
+               }
+           }
+       }
      }
  
    ASSERT (rp == PTR(r) + rtwos_limbs);
diff --git a/mpz/neg.c b/mpz/neg.c

index 6d0f8a994d221ebe44f80d5c1fa05fbff74cb421..caf2df5d4cefad7e6904572d077c54291d2cafbe 100644 (file)
--- a/mpz/neg.c
+++ b/mpz/neg.c
@@ -1,6 +1,6 @@
  /* mpz_neg(mpz_ptr dst, mpz_ptr src) -- Assign the negated value of SRC to DST.
  
-Copyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -25,23 +25,22 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_neg (mpz_ptr w, mpz_srcptr u)
  {
-  mp_ptr wp, up;
+  mp_ptr wp;
+  mp_srcptr up;
    mp_size_t usize, size;
  
-  usize = u->_mp_size;
+  usize = SIZ (u);
  
    if (u != w)
      {
        size = ABS (usize);
  
-      if (w->_mp_alloc < size)
-       _mpz_realloc (w, size);
+      wp = MPZ_NEWALLOC (w, size);
  
-      wp = w->_mp_d;
-      up = u->_mp_d;
+      up = PTR (u);
  
        MPN_COPY (wp, up, size);
      }
  
-  w->_mp_size = -usize;
+  SIZ (w) = -usize;
  }
diff --git a/mpz/nextprime.c b/mpz/nextprime.c

index 9e68ea8a6616b27ea5133eb46bb0d6a1b9ffe5cb..c1af45ad0b2737f50f29f8318714e5209aa12ee4 100644 (file)
--- a/mpz/nextprime.c
+++ b/mpz/nextprime.c
@@ -1,6 +1,6 @@
  /* mpz_nextprime(p,t) - compute the next prime > t and store that in p.
  
-Copyright 1999, 2000, 2001, 2008, 2009 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2008, 2009, 2012 Free Software Foundation, Inc.
  
  Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
  
@@ -43,7 +43,6 @@ mpz_nextprime (mpz_ptr p, mpz_srcptr n)
    int i;
    unsigned prime_limit;
    unsigned long prime;
-  int cnt;
    mp_size_t pn;
    mp_bitcnt_t nbits;
    unsigned incr;
@@ -62,8 +61,7 @@ mpz_nextprime (mpz_ptr p, mpz_srcptr n)
      return;
  
    pn = SIZ(p);
-  count_leading_zeros (cnt, PTR(p)[pn - 1]);
-  nbits = pn * GMP_NUMB_BITS - (cnt - GMP_NAIL_BITS);
+  MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1);
    if (nbits / 2 >= NUMBER_OF_PRIMES)
      prime_limit = NUMBER_OF_PRIMES - 1;
    else
diff --git a/mpz/oddfac_1.c b/mpz/oddfac_1.c

new file mode 100644 (file)

index 0000000..e1ce119
--- /dev/null
+++ b/mpz/oddfac_1.c
@@ -0,0 +1,416 @@
+/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
+
+Contributed to the GNU project by Marco Bodrato.
+
+THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
+IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
+DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* TODO:
+   - split this file in smaller parts with functions that can be recycled for different computations.
+ */
+
+/**************************************************************/
+/* Section macros: common macros, for mswing/fac/bin (&sieve) */
+/**************************************************************/
+
+#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)                 \
+  if ((PR) > (MAX_PR)) {                                       \
+    (VEC)[(I)++] = (PR);                                       \
+    (PR) = 1;                                                  \
+  }
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)               \
+  do {                                                         \
+    if ((PR) > (MAX_PR)) {                                     \
+      (VEC)[(I)++] = (PR);                                     \
+      (PR) = (P);                                              \
+    } else                                                     \
+      (PR) *= (P);                                             \
+  } while (0)
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)                        \
+    __max_i = (end);                                           \
+                                                               \
+    do {                                                       \
+      ++__i;                                                   \
+      if (((sieve)[__index] & __mask) == 0)                    \
+       {                                                       \
+         (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)         \
+  do {                                                         \
+    mp_limb_t __mask, __index, __max_i, __i;                   \
+                                                               \
+    __i = (start)-(off);                                       \
+    __index = __i / GMP_LIMB_BITS;                             \
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);            \
+    __i += (off);                                              \
+                                                               \
+    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
+
+#define LOOP_ON_SIEVE_STOP                                     \
+       }                                                       \
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);      \
+      __index += __mask & 1;                                   \
+    }  while (__i <= __max_i)                                  \
+
+#define LOOP_ON_SIEVE_END                                      \
+    LOOP_ON_SIEVE_STOP;                                                \
+  } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+#if WANT_ASSERT
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+#endif
+
+/*********************************************************/
+/* Section mswing: 2-multiswing factorial                 */
+/*********************************************************/
+
+/* Returns an approximation of the sqare root of x.  *
+ * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4    */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+  int s;
+
+  ASSERT (x > 2);
+  count_leading_zeros (s, x - 1);
+  s = GMP_LIMB_BITS - 1 - s;
+  return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
+}
+
+#if 0
+/* A count-then-exponentiate variant for SWING_A_PRIME */
+#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)                \
+  do {                                                 \
+    mp_limb_t __q, __prime;                            \
+    int __exp;                                         \
+    __prime = (P);                                     \
+    __exp = 0;                                         \
+    __q = (N);                                         \
+    do {                                               \
+      __q /= __prime;                                  \
+      __exp += __q & 1;                                        \
+    } while (__q >= __prime);                          \
+    if (__exp) { /* Store $prime^{exp}$ */             \
+      for (__q = __prime; --__exp; __q *= __prime);    \
+      FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I);      \
+    };                                                 \
+  } while (0)
+#else
+#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)        \
+  do {                                         \
+    mp_limb_t __q, __prime;                    \
+    __prime = (P);                             \
+    FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);    \
+    __q = (N);                                 \
+    do {                                       \
+      __q /= __prime;                          \
+      if ((__q & 1) != 0) (PR) *= __prime;     \
+    } while (__q >= __prime);                  \
+  } while (0)
+#endif
+
+#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)     \
+  do {                                                 \
+    mp_limb_t __prime;                                 \
+    __prime = (P);                                     \
+    if ((((N) / __prime) & 1) != 0)                    \
+      FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);  \
+  } while (0)
+
+/* mpz_2multiswing_1 computes the odd part of the 2-multiswing
+   factorial of the parameter n.  The result x is an odd positive
+   integer so that multiswing(n,2) = x 2^a.
+
+   Uses the algorithm described by Peter Luschny in "Divide, Swing and
+   Conquer the Factorial!".
+
+   The pointer sieve points to primesieve_size(n) limbs containing a
+   bit-array where primes are marked as 0.
+   Enough (FIXME: explain :-) limbs must be pointed by factors.
+ */
+
+static void
+mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
+{
+  mp_limb_t prod, max_prod;
+  mp_size_t j;
+
+  ASSERT (n >= 26);
+
+  j = 0;
+  prod  = -(n & 1);
+  n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
+
+  prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
+  max_prod = GMP_NUMB_MAX / (n-1);
+
+  /* Handle prime = 3 separately. */
+  SWING_A_PRIME (3, n, prod, max_prod, factors, j);
+
+  /* Swing primes from 5 to n/3 */
+  {
+    mp_limb_t s;
+
+    {
+      mp_limb_t prime;
+
+      s = limb_apprsqrt(n);
+      ASSERT (s >= 5);
+      s = n_to_bit (s);
+      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
+      SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
+      LOOP_ON_SIEVE_END;
+      s++;
+    }
+
+    ASSERT (max_prod <= GMP_NUMB_MAX / 3);
+    ASSERT (bit_to_n (s) * bit_to_n (s) > n);
+    ASSERT (s <= n_to_bit (n / 3));
+    {
+      mp_limb_t prime;
+      mp_limb_t l_max_prod = max_prod * 3;
+
+      LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
+      SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
+      LOOP_ON_SIEVE_END;
+    }
+  }
+
+  /* Store primes from (n+1)/2 to n */
+  {
+    mp_limb_t prime;
+    LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
+    FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+    LOOP_ON_SIEVE_END;
+  }
+
+  if (LIKELY (j != 0))
+    {
+      factors[j++] = prod;
+      mpz_prodlimbs (x, factors, j);
+    }
+  else
+    {
+      PTR (x)[0] = prod;
+      SIZ (x) = 1;
+    }
+}
+
+#undef SWING_A_PRIME
+#undef SH_SWING_A_PRIME
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
+#undef FACTOR_LIST_APPEND
+
+/*********************************************************/
+/* Section oddfac: odd factorial, needed also by binomial*/
+/*********************************************************/
+
+#if TUNE_PROGRAM_BUILD
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
+#else
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
+#endif
+
+/* mpz_oddfac_1 computes the odd part of the factorial of the
+   parameter n.  I.e. n! = x 2^a, where x is the returned value: an
+   odd positive integer.
+
+   If flag != 0 a square is skipped in the DSC part, e.g.
+   if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
+
+   If n is too small, flag is ignored, and an ASSERT can be triggered.
+
+   TODO: FAC_DSC_THRESHOLD is used here with two different roles:
+    - to decide when prime factorisation is needed,
+    - to stop the recursion, once sieving is done.
+   Maybe two thresholds can do a better job.
+ */
+void
+mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
+{
+  ASSERT (n <= GMP_NUMB_MAX);
+  ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
+
+  if (n <= ODD_FACTORIAL_TABLE_LIMIT)
+    {
+      PTR (x)[0] = __gmp_oddfac_table[n];
+      SIZ (x) = 1;
+    }
+  else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
+    {
+      mp_ptr   px;
+
+      px = MPZ_NEWALLOC (x, 2);
+      umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
+      SIZ (x) = 2;
+    }
+  else
+    {
+      unsigned s;
+      mp_ptr   factors;
+
+      s = 0;
+      {
+       mp_limb_t tn;
+       mp_limb_t prod, max_prod, i;
+       mp_size_t j;
+       TMP_SDECL;
+
+#if TUNE_PROGRAM_BUILD
+       ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
+       ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
+#endif
+
+       /* Compute the number of recursive steps for the DSC algorithm. */
+       for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
+         tn >>= 1;
+
+       j = 0;
+
+       TMP_SMARK;
+       factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
+       ASSERT (tn >= FACTORS_PER_LIMB);
+
+       prod = 1;
+#if TUNE_PROGRAM_BUILD
+       max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
+#else
+       max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
+#endif
+
+       ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
+       do {
+         i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
+         factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
+         do {
+           FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
+           i += 2;
+         } while (i <= tn);
+         max_prod <<= 1;
+         tn >>= 1;
+       } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
+
+       factors[j++] = prod;
+       factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
+       factors[j++] = __gmp_oddfac_table[tn >> 1];
+       mpz_prodlimbs (x, factors, j);
+
+       TMP_SFREE;
+      }
+
+      if (s != 0)
+       /* Use the algorithm described by Peter Luschny in "Divide,
+          Swing and Conquer the Factorial!".
+
+          Improvement: there are two temporary buffers, factors and
+          square, that are never used together; with a good estimate
+          of the maximal needed size, they could share a single
+          allocation.
+       */
+       {
+         mpz_t mswing;
+         mp_ptr sieve;
+         mp_size_t size;
+         TMP_DECL;
+
+         TMP_MARK;
+
+         flag--;
+         size = n / GMP_NUMB_BITS + 4;
+         ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
+         /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
+            one more can be overwritten by mul, another for the sieve */
+         MPZ_TMP_INIT (mswing, size);
+         /* Initialize size, so that ASSERT can check it correctly. */
+         ASSERT_CODE (SIZ (mswing) = 0);
+
+         /* Put the sieve on the second half, it will be overwritten by the last mswing. */
+         sieve = PTR (mswing) + size / 2 + 1;
+
+         size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
+
+         factors = TMP_ALLOC_LIMBS (size);
+         do {
+           mp_ptr    square, px;
+           mp_size_t nx, ns;
+           mp_limb_t cy;
+           TMP_DECL;
+
+           s--;
+           ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
+           mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
+
+           TMP_MARK;
+           nx = SIZ (x);
+           if (s == flag) {
+             size = nx;
+             square = TMP_ALLOC_LIMBS (size);
+             MPN_COPY (square, PTR (x), nx);
+           } else {
+             size = nx << 1;
+             square = TMP_ALLOC_LIMBS (size);
+             mpn_sqr (square, PTR (x), nx);
+             size -= (square[size - 1] == 0);
+           }
+           ns = SIZ (mswing);
+           nx = size + ns;
+           px = MPZ_NEWALLOC (x, nx);
+           ASSERT (ns <= size);
+           cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
+
+           TMP_FREE;
+           SIZ(x) = nx - (cy == 0);
+         } while (s != 0);
+         TMP_FREE;
+       }
+    }
+}
+
+#undef FACTORS_PER_LIMB
+#undef FACTOR_LIST_STORE
diff --git a/mpz/out_raw.c b/mpz/out_raw.c

index 3eb9fab50ea58411f7e3cc07225c96403b48386e..ed3330af5b5e665629c36f9262805c2559783f13 100644 (file)
--- a/mpz/out_raw.c
+++ b/mpz/out_raw.c
@@ -71,70 +71,70 @@ mpz_out_raw (FILE *fp, mpz_srcptr x)
        i = abs_xsize;
  
        if (GMP_NAIL_BITS == 0)
-        {
-          /* reverse limb order, and byte swap if necessary */
+       {
+         /* reverse limb order, and byte swap if necessary */
  #ifdef _CRAY
-          _Pragma ("_CRI ivdep");
+         _Pragma ("_CRI ivdep");
  #endif
-          do
-            {
-              bp -= BYTES_PER_MP_LIMB;
-              xlimb = *xp;
-              HTON_LIMB_STORE ((mp_ptr) bp, xlimb);
-              xp++;
-            }
-          while (--i > 0);
-
-          /* strip high zero bytes (without fetching from bp) */
-          count_leading_zeros (zeros, xlimb);
-          zeros /= 8;
-          bp += zeros;
-          bytes -= zeros;
-        }
+         do
+           {
+             bp -= BYTES_PER_MP_LIMB;
+             xlimb = *xp;
+             HTON_LIMB_STORE ((mp_ptr) bp, xlimb);
+             xp++;
+           }
+         while (--i > 0);
+
+         /* strip high zero bytes (without fetching from bp) */
+         count_leading_zeros (zeros, xlimb);
+         zeros /= 8;
+         bp += zeros;
+         bytes -= zeros;
+       }
        else
-        {
-          mp_limb_t  new_xlimb;
-          int        bits;
-          ASSERT_CODE (char *bp_orig = bp - bytes);
-
-          ASSERT_ALWAYS (GMP_NUMB_BITS >= 8);
-
-          bits = 0;
-          xlimb = 0;
-          for (;;)
-            {
-              while (bits >= 8)
-                {
-                  ASSERT (bp > bp_orig);
-                  *--bp = xlimb & 0xFF;
-                  xlimb >>= 8;
-                  bits -= 8;
-                }
-
-              if (i == 0)
-                break;
-
-              new_xlimb = *xp++;
-              i--;
-              ASSERT (bp > bp_orig);
-              *--bp = (xlimb | (new_xlimb << bits)) & 0xFF;
-              xlimb = new_xlimb >> (8 - bits);
-              bits += GMP_NUMB_BITS - 8;
-            }
-
-          if (bits != 0)
-            {
-              ASSERT (bp > bp_orig);
-              *--bp = xlimb;
-            }
-
-          ASSERT (bp == bp_orig);
-          while (*bp == 0)
-            {
-              bp++;
-              bytes--;
-            }
-        }
+       {
+         mp_limb_t  new_xlimb;
+         int        bits;
+         ASSERT_CODE (char *bp_orig = bp - bytes);
+
+         ASSERT_ALWAYS (GMP_NUMB_BITS >= 8);
+
+         bits = 0;
+         xlimb = 0;
+         for (;;)
+           {
+             while (bits >= 8)
+               {
+                 ASSERT (bp > bp_orig);
+                 *--bp = xlimb & 0xFF;
+                 xlimb >>= 8;
+                 bits -= 8;
+               }
+
+             if (i == 0)
+               break;
+
+             new_xlimb = *xp++;
+             i--;
+             ASSERT (bp > bp_orig);
+             *--bp = (xlimb | (new_xlimb << bits)) & 0xFF;
+             xlimb = new_xlimb >> (8 - bits);
+             bits += GMP_NUMB_BITS - 8;
+           }
+
+         if (bits != 0)
+           {
+             ASSERT (bp > bp_orig);
+             *--bp = xlimb;
+           }
+
+         ASSERT (bp == bp_orig);
+         while (*bp == 0)
+           {
+             bp++;
+             bytes--;
+           }
+       }
      }
  
    /* total bytes to be written */
diff --git a/mpz/out_str.c b/mpz/out_str.c

index 8643db865a66bf7c92067240ce41a886f7c82c53..3fd0666e6d1b036b20c7db28373c789cd24ce81b 100644 (file)
--- a/mpz/out_str.c
+++ b/mpz/out_str.c
@@ -1,7 +1,8 @@
  /* mpz_out_str(stream, base, integer) -- Output to STREAM the multi prec.
     integer INTEGER in base BASE.
  
-Copyright 1991, 1993, 1994, 1996, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2005, 2011, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,17 +22,18 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include <stdio.h>
  #include "gmp.h"
  #include "gmp-impl.h"
+#include "longlong.h"
  
  size_t
  mpz_out_str (FILE *stream, int base, mpz_srcptr x)
  {
    mp_ptr xp;
-  mp_size_t x_size = x->_mp_size;
+  mp_size_t x_size = SIZ (x);
    unsigned char *str;
    size_t str_size;
    size_t i;
    size_t written;
-  char *num_to_text;
+  const char *num_to_text;
    TMP_DECL;
  
    if (stream == 0)
@@ -40,7 +42,7 @@ mpz_out_str (FILE *stream, int base, mpz_srcptr x)
    if (base >= 0)
      {
        num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
-      if (base == 0)
+      if (base <= 1)
         base = 10;
        else if (base > 36)
         {
@@ -52,15 +54,13 @@ mpz_out_str (FILE *stream, int base, mpz_srcptr x)
    else
      {
        base = -base;
+      if (base <= 1)
+       base = 10;
+      else if (base > 36)
+       return 0;
        num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
      }
  
-  if (x_size == 0)
-    {
-      fputc ('0', stream);
-      return ferror (stream) ? 0 : 1;
-    }
-
    written = 0;
  
    if (x_size < 0)
@@ -71,25 +71,21 @@ mpz_out_str (FILE *stream, int base, mpz_srcptr x)
      }
  
    TMP_MARK;
-  str_size = ((size_t) (x_size * GMP_LIMB_BITS
-                       * mp_bases[base].chars_per_bit_exactly)) + 3;
-  str = (unsigned char *) TMP_ALLOC (str_size);
  
-  /* Move the number to convert into temporary space, since mpn_get_str
-     clobbers its argument + needs one extra high limb....  */
-  xp = TMP_ALLOC_LIMBS (x_size + 1);
-  MPN_COPY (xp, x->_mp_d, x_size);
-
-  str_size = mpn_get_str (str, base, xp, x_size);
+  DIGITS_IN_BASE_PER_LIMB (str_size, x_size, base);
+  str_size += 3;
+  str = (unsigned char *) TMP_ALLOC (str_size);
  
-  /* mpn_get_str might make some leading zeros.  Skip them.  */
-  while (*str == 0)
+  xp = PTR (x);
+  if (! POW2_P (base))
      {
-      str_size--;
-      str++;
+      xp = TMP_ALLOC_LIMBS (x_size | 1);  /* |1 in case x_size==0 */
+      MPN_COPY (xp, PTR (x), x_size);
      }
  
-  /* Translate to printable chars.  */
+  str_size = mpn_get_str (str, base, xp, x_size);
+
+  /* Convert result to printable chars.  */
    for (i = 0; i < str_size; i++)
      str[i] = num_to_text[str[i]];
    str[str_size] = 0;
diff --git a/mpz/powm.c b/mpz/powm.c

index 29b0132c46615dbb1fbe5623d681b97fcfc36f3c..6e33da12dc001f9e6b0736e9ee4df82325ec9b2f 100644 (file)
--- a/mpz/powm.c
+++ b/mpz/powm.c
@@ -2,8 +2,8 @@
  
     Contributed to the GNU project by Torbjorn Granlund.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009
-Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008,
+2009, 2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -24,9 +24,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "longlong.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
  
  
  /* TODO
@@ -52,21 +49,18 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define HANDLE_NEGATIVE_EXPONENT 1
  
  void
-#ifndef BERKELEY_MP
  mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
-#else /* BERKELEY_MP */
-pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
-#endif /* BERKELEY_MP */
  {
    mp_size_t n, nodd, ncnt;
    int cnt;
    mp_ptr rp, tp;
    mp_srcptr bp, ep, mp;
    mp_size_t rn, bn, es, en, itch;
+  mpz_t new_b;                 /* note: value lives long via 'b' */
    TMP_DECL;
  
    n = ABSIZ(m);
-  if (n == 0)
+  if (UNLIKELY (n == 0))
      DIVIDE_BY_ZERO;
  
    mp = PTR(m);
@@ -76,7 +70,6 @@ pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
    es = SIZ(e);
    if (UNLIKELY (es <= 0))
      {
-      mpz_t new_b;
        if (es == 0)
         {
           /* b^0 mod m,  b is anything and m is non-zero.
@@ -89,7 +82,7 @@ pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
  #if HANDLE_NEGATIVE_EXPONENT
        MPZ_TMP_INIT (new_b, n + 1);
  
-      if (! mpz_invert (new_b, b, m))
+      if (UNLIKELY (! mpz_invert (new_b, b, m)))
         DIVIDE_BY_ZERO;
        b = new_b;
        es = -es;
@@ -158,11 +151,11 @@ pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
    cnt = 0;
    if (mp[0] % 2 == 0)
      {
-      mp_ptr new = TMP_ALLOC_LIMBS (nodd);
+      mp_ptr newmp = TMP_ALLOC_LIMBS (nodd);
        count_trailing_zeros (cnt, mp[0]);
-      mpn_rshift (new, mp, nodd, cnt);
-      nodd -= new[nodd - 1] == 0;
-      mp = new;
+      mpn_rshift (newmp, mp, nodd, cnt);
+      nodd -= newmp[nodd - 1] == 0;
+      mp = newmp;
        ncnt++;
      }
  
@@ -197,10 +190,10 @@ pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
  
        if (bn < ncnt)
         {
-         mp_ptr new = TMP_ALLOC_LIMBS (ncnt);
-         MPN_COPY (new, bp, bn);
-         MPN_ZERO (new + bn, ncnt - bn);
-         bp = new;
+         mp_ptr newbp = TMP_ALLOC_LIMBS (ncnt);
+         MPN_COPY (newbp, bp, bn);
+         MPN_ZERO (newbp + bn, ncnt - bn);
+         bp = newbp;
         }
  
        r2 = tp;
@@ -232,10 +225,10 @@ pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
      zero:
        if (nodd < ncnt)
         {
-         mp_ptr new = TMP_ALLOC_LIMBS (ncnt);
-         MPN_COPY (new, mp, nodd);
-         MPN_ZERO (new + nodd, ncnt - nodd);
-         mp = new;
+         mp_ptr newmp = TMP_ALLOC_LIMBS (ncnt);
+         MPN_COPY (newmp, mp, nodd);
+         MPN_ZERO (newmp + nodd, ncnt - nodd);
+         mp = newmp;
         }
  
        odd_inv_2exp = tp + n;
diff --git a/mpz/powm_sec.c b/mpz/powm_sec.c

index 2432fe4989338c2400bfdd7be3db7a25efc00031..895ca8913fb97bde768cb74730b1ce0e43e97048 100644 (file)
--- a/mpz/powm_sec.c
+++ b/mpz/powm_sec.c
@@ -2,8 +2,8 @@
  
     Contributed to the GNU project by Torbjorn Granlund.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009
-Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009,
+2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -35,18 +35,15 @@ mpz_powm_sec (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
    TMP_DECL;
  
    n = ABSIZ(m);
-  if (n == 0)
-    DIVIDE_BY_ZERO;
  
    mp = PTR(m);
  
-  if (mp[0] % 2 == 0)
+  if (UNLIKELY ((n == 0) || (mp[0] % 2 == 0)))
      DIVIDE_BY_ZERO;
  
    es = SIZ(e);
    if (UNLIKELY (es <= 0))
      {
-      mpz_t new_b;
        if (es == 0)
         {
           /* b^0 mod m,  b is anything and m is non-zero.
diff --git a/mpz/powm_ui.c b/mpz/powm_ui.c

index 64615d1078c258718d98b5a869ca6e8555c86455..b8f81000b9c468375457dea7df62a88a337e897b 100644 (file)
--- a/mpz/powm_ui.c
+++ b/mpz/powm_ui.c
@@ -1,7 +1,9 @@
-/* mpz_powm_ui(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
+/* mpz_powm_ui(res,base,exp,mod) -- Set R to (U^E) mod M.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software
-Foundation, Inc.
+   Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009,
+2011, 2012, 2013 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,173 +25,251 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "longlong.h"
  
-/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
-   t is defined by (tp,mn).  */
+
+/* This code is very old, and should be rewritten to current GMP standard.  It
+   is slower than mpz_powm for large exponents, but also for small exponents
+   when the mod argument is small.
+
+   As an intermediate solution, we now deflect to mpz_powm for exponents >= 20.
+*/
+
+/*
+  b ^ e mod m   res
+  0   0     0    ?
+  0   e     0    ?
+  0   0     m    ?
+  0   e     m    0
+  b   0     0    ?
+  b   e     0    ?
+  b   0     m    1 mod m
+  b   e     m    b^e mod m
+*/
+
  static void
-reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
+mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv, mp_ptr tp)
  {
    mp_ptr qp;
    TMP_DECL;
-
    TMP_MARK;
-  qp = TMP_ALLOC_LIMBS (an - mn + 1);
  
-  mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
+  qp = tp;
+
+  if (dn == 1)
+    np[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
+  else if (dn == 2)
+    mpn_div_qr_2n_pi1 (qp, np, np, nn, dp[1], dp[0], dinv->inv32);
+  else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) ||
+          BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))
+    mpn_sbpi1_div_qr (qp, np, nn, dp, dn, dinv->inv32);
+  else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
+          BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
+          (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
+          + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
+    {
+      mpn_dcpi1_div_qr (qp, np, nn, dp, dn, dinv);
+    }
+  else
+    {
+      /* We need to allocate separate remainder area, since mpn_mu_div_qr does
+        not handle overlap between the numerator and remainder areas.
+        FIXME: Make it handle such overlap.  */
+      mp_ptr rp = TMP_ALLOC_LIMBS (dn);
+      mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
+      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+      mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
+      MPN_COPY (np, rp, dn);
+    }
  
    TMP_FREE;
  }
  
-void
-mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
+/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
+   t is defined by (tp,mn).  */
+static void
+reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn, gmp_pi1_t *dinv)
  {
-  mp_ptr xp, tp, qp, mp, bp;
-  mp_size_t xn, tn, mn, bn;
-  int m_zero_cnt;
-  int c;
-  mp_limb_t e;
+  mp_ptr rp, scratch;
    TMP_DECL;
+  TMP_MARK;
+
+  rp = TMP_ALLOC_LIMBS (an);
+  scratch = TMP_ALLOC_LIMBS (an - mn + 1);
+  MPN_COPY (rp, ap, an);
+  mod (rp, an, mp, mn, dinv, scratch);
+  MPN_COPY (tp, rp, mn);
  
-  mp = PTR(m);
-  mn = ABSIZ(m);
-  if (mn == 0)
-    DIVIDE_BY_ZERO;
+  TMP_FREE;
+}
  
-  if (el == 0)
+void
+mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
+{
+  if (el < 20)
      {
-      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
-        depending on if MOD equals 1.  */
-      SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
-      PTR(r)[0] = 1;
-      return;
-    }
+      mp_ptr xp, tp, mp, bp, scratch;
+      mp_size_t xn, tn, mn, bn;
+      int m_zero_cnt;
+      int c;
+      mp_limb_t e, m2;
+      gmp_pi1_t dinv;
+      TMP_DECL;
  
-  TMP_MARK;
+      mp = PTR(m);
+      mn = ABSIZ(m);
+      if (UNLIKELY (mn == 0))
+       DIVIDE_BY_ZERO;
  
-  /* Normalize m (i.e. make its most significant bit set) as required by
-     division functions below.  */
-  count_leading_zeros (m_zero_cnt, mp[mn - 1]);
-  m_zero_cnt -= GMP_NAIL_BITS;
-  if (m_zero_cnt != 0)
-    {
-      mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
-      mpn_lshift (new_mp, mp, mn, m_zero_cnt);
-      mp = new_mp;
-    }
+      if (el == 0)
+       {
+         /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if
+            M equals 1.  */
+         SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
+         PTR(r)[0] = 1;
+         return;
+       }
  
-  bn = ABSIZ(b);
-  bp = PTR(b);
-  if (bn > mn)
-    {
-      /* Reduce possibly huge base.  Use a function call to reduce, since we
-        don't want the quotient allocation to live until function return.  */
-      mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
-      reduce (new_bp, bp, bn, mp, mn);
-      bp = new_bp;
-      bn = mn;
-      /* Canonicalize the base, since we are potentially going to multiply with
-        it quite a few times.  */
-      MPN_NORMALIZE (bp, bn);
-    }
+      TMP_MARK;
  
-  if (bn == 0)
-    {
-      SIZ(r) = 0;
-      TMP_FREE;
-      return;
-    }
+      /* Normalize m (i.e. make its most significant bit set) as required by
+        division functions below.  */
+      count_leading_zeros (m_zero_cnt, mp[mn - 1]);
+      m_zero_cnt -= GMP_NAIL_BITS;
+      if (m_zero_cnt != 0)
+       {
+         mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
+         mpn_lshift (new_mp, mp, mn, m_zero_cnt);
+         mp = new_mp;
+       }
  
-  tp = TMP_ALLOC_LIMBS (2 * mn + 1);
-  xp = TMP_ALLOC_LIMBS (mn);
+      m2 = mn == 1 ? 0 : mp[mn - 2];
+      invert_pi1 (dinv, mp[mn - 1], m2);
  
-  qp = TMP_ALLOC_LIMBS (mn + 1);
+      bn = ABSIZ(b);
+      bp = PTR(b);
+      if (bn > mn)
+       {
+         /* Reduce possibly huge base.  Use a function call to reduce, since we
+            don't want the quotient allocation to live until function return.  */
+         mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
+         reduce (new_bp, bp, bn, mp, mn, &dinv);
+         bp = new_bp;
+         bn = mn;
+         /* Canonicalize the base, since we are potentially going to multiply with
+            it quite a few times.  */
+         MPN_NORMALIZE (bp, bn);
+       }
  
-  MPN_COPY (xp, bp, bn);
-  xn = bn;
+      if (bn == 0)
+       {
+         SIZ(r) = 0;
+         TMP_FREE;
+         return;
+       }
  
-  e = el;
-  count_leading_zeros (c, e);
-  e = (e << c) << 1;           /* shift the exp bits to the left, lose msb */
-  c = GMP_LIMB_BITS - 1 - c;
+      tp = TMP_ALLOC_LIMBS (2 * mn + 1);
+      xp = TMP_ALLOC_LIMBS (mn);
+      scratch = TMP_ALLOC_LIMBS (mn + 1);
  
-  /* Main loop. */
+      MPN_COPY (xp, bp, bn);
+      xn = bn;
  
-  /* If m is already normalized (high bit of high limb set), and b is the
-     same size, but a bigger value, and e==1, then there's no modular
-     reductions done and we can end up with a result out of range at the
-     end. */
-  if (c == 0)
-    {
-      if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
-        mpn_sub_n (xp, xp, mp, mn);
-      goto finishup;
-    }
+      e = el;
+      count_leading_zeros (c, e);
+      e = (e << c) << 1;               /* shift the exp bits to the left, lose msb */
+      c = GMP_LIMB_BITS - 1 - c;
  
-  while (c != 0)
-    {
-      mpn_sqr (tp, xp, xn);
-      tn = 2 * xn; tn -= tp[tn - 1] == 0;
-      if (tn < mn)
+      if (c == 0)
         {
-         MPN_COPY (xp, tp, tn);
-         xn = tn;
+         /* If m is already normalized (high bit of high limb set), and b is
+            the same size, but a bigger value, and e==1, then there's no
+            modular reductions done and we can end up with a result out of
+            range at the end. */
+         if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
+           mpn_sub_n (xp, xp, mp, mn);
         }
        else
         {
-         mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
-         xn = mn;
+         /* Main loop. */
+         do
+           {
+             mpn_sqr (tp, xp, xn);
+             tn = 2 * xn; tn -= tp[tn - 1] == 0;
+             if (tn < mn)
+               {
+                 MPN_COPY (xp, tp, tn);
+                 xn = tn;
+               }
+             else
+               {
+                 mod (tp, tn, mp, mn, &dinv, scratch);
+                 MPN_COPY (xp, tp, mn);
+                 xn = mn;
+               }
+
+             if ((mp_limb_signed_t) e < 0)
+               {
+                 mpn_mul (tp, xp, xn, bp, bn);
+                 tn = xn + bn; tn -= tp[tn - 1] == 0;
+                 if (tn < mn)
+                   {
+                     MPN_COPY (xp, tp, tn);
+                     xn = tn;
+                   }
+                 else
+                   {
+                     mod (tp, tn, mp, mn, &dinv, scratch);
+                     MPN_COPY (xp, tp, mn);
+                     xn = mn;
+                   }
+               }
+             e <<= 1;
+             c--;
+           }
+         while (c != 0);
         }
  
-      if ((mp_limb_signed_t) e < 0)
+      /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing it
+        with the original M.  */
+      if (m_zero_cnt != 0)
         {
-         mpn_mul (tp, xp, xn, bp, bn);
-         tn = xn + bn; tn -= tp[tn - 1] == 0;
-         if (tn < mn)
+         mp_limb_t cy;
+         cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
+         tp[xn] = cy; xn += cy != 0;
+
+         if (xn < mn)
             {
-             MPN_COPY (xp, tp, tn);
-             xn = tn;
+             MPN_COPY (xp, tp, xn);
             }
           else
             {
-             mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
+             mod (tp, xn, mp, mn, &dinv, scratch);
+             MPN_COPY (xp, tp, mn);
               xn = mn;
             }
+         mpn_rshift (xp, xp, xn, m_zero_cnt);
         }
-      e <<= 1;
-      c--;
-    }
-
- finishup:
-  /* We shifted m left m_zero_cnt steps.  Adjust the result by reducing
-     it with the original MOD.  */
-  if (m_zero_cnt != 0)
-    {
-      mp_limb_t cy;
-      cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
-      tp[xn] = cy; xn += cy != 0;
+      MPN_NORMALIZE (xp, xn);
  
-      if (xn < mn)
+      if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
         {
-         MPN_COPY (xp, tp, xn);
-       }
-      else
-       {
-         mpn_tdiv_qr (qp, xp, 0L, tp, xn, mp, mn);
+         mp = PTR(m);                  /* want original, unnormalized m */
+         mpn_sub (xp, mp, mn, xp, xn);
           xn = mn;
+         MPN_NORMALIZE (xp, xn);
         }
-      mpn_rshift (xp, xp, xn, m_zero_cnt);
-    }
-  MPN_NORMALIZE (xp, xn);
+      MPZ_REALLOC (r, xn);
+      SIZ (r) = xn;
+      MPN_COPY (PTR(r), xp, xn);
  
-  if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
+      TMP_FREE;
+    }
+  else
      {
-      mp = PTR(m);                     /* want original, unnormalized m */
-      mpn_sub (xp, mp, mn, xp, xn);
-      xn = mn;
-      MPN_NORMALIZE (xp, xn);
+      /* For large exponents, fake a mpz_t exponent and deflect to the more
+        sophisticated mpz_powm.  */
+      mpz_t e;
+      mp_limb_t ep[LIMBS_PER_ULONG];
+      MPZ_FAKE_UI (e, ep, el);
+      mpz_powm (r, b, e, m);
      }
-  MPZ_REALLOC (r, xn);
-  SIZ (r) = xn;
-  MPN_COPY (PTR(r), xp, xn);
-
-  TMP_FREE;
  }
diff --git a/mpz/pprime_p.c b/mpz/pprime_p.c

index ce501a44b8f95cc5620634d069e994c8c60aa0f0..8a642df00b3c0c4878cadef97046fc2f467e1a81 100644 (file)
--- a/mpz/pprime_p.c
+++ b/mpz/pprime_p.c
@@ -28,7 +28,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "longlong.h"
  
-static int isprime __GMP_PROTO ((unsigned long int));
+static int isprime (unsigned long int);
  
  
  /* MPN_MOD_OR_MODEXACT_1_ODD can be used instead of mpn_mod_1 for the trial
@@ -65,7 +65,7 @@ mpz_probab_prime_p (mpz_srcptr n, int reps)
    /* Check if n has small factors.  */
  #if defined (PP_INVERTED)
    r = MPN_MOD_OR_PREINV_MOD_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP,
-                               (mp_limb_t) PP_INVERTED);
+                              (mp_limb_t) PP_INVERTED);
  #else
    r = mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP);
  #endif
diff --git a/mpz/primorial_ui.c b/mpz/primorial_ui.c

new file mode 100644 (file)

index 0000000..13c562f
--- /dev/null
+++ b/mpz/primorial_ui.c
@@ -0,0 +1,153 @@
+/* mpz_primorial_ui(RESULT, N) -- Set RESULT to N# the product of primes <= N.
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* TODO: Remove duplicated constants / macros / static functions...
+ */
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)               \
+  do {                                                         \
+    if ((PR) > (MAX_PR)) {                                     \
+      (VEC)[(I)++] = (PR);                                     \
+      (PR) = (P);                                              \
+    } else                                                     \
+      (PR) *= (P);                                             \
+  } while (0)
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)                        \
+    __max_i = (end);                                           \
+                                                               \
+    do {                                                       \
+      ++__i;                                                   \
+      if (((sieve)[__index] & __mask) == 0)                    \
+       {                                                       \
+         (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)         \
+  do {                                                         \
+    mp_limb_t __mask, __index, __max_i, __i;                   \
+                                                               \
+    __i = (start)-(off);                                       \
+    __index = __i / GMP_LIMB_BITS;                             \
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);            \
+    __i += (off);                                              \
+                                                               \
+    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
+
+#define LOOP_ON_SIEVE_STOP                                     \
+       }                                                       \
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);      \
+      __index += __mask & 1;                                   \
+    }  while (__i <= __max_i)                                  \
+
+#define LOOP_ON_SIEVE_END                                      \
+    LOOP_ON_SIEVE_STOP;                                                \
+  } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+#if WANT_ASSERT
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+#endif
+
+/*********************************************************/
+/* Section primorial: implementation                     */
+/*********************************************************/
+
+void
+mpz_primorial_ui (mpz_ptr x, unsigned long n)
+{
+  static const mp_limb_t table[] = { 1, 1, 2, 6, 6 };
+
+  ASSERT (n <= GMP_NUMB_MAX);
+
+  if (n < numberof (table))
+    {
+      PTR (x)[0] = table[n];
+      SIZ (x) = 1;
+    }
+  else
+    {
+      mp_limb_t *sieve, *factors;
+      mp_size_t size;
+      mp_limb_t prod;
+      mp_limb_t j;
+      TMP_DECL;
+
+      size = 1 + n / GMP_NUMB_BITS + n / (2*GMP_NUMB_BITS);
+      ASSERT (size >= primesieve_size (n));
+      sieve = MPZ_REALLOC (x, size);
+      size = (gmp_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
+
+      TMP_MARK;
+      factors = TMP_ALLOC_LIMBS (size);
+
+      j = 0;
+
+      prod = table[numberof (table)-1];
+
+      /* Store primes from 5 to n */
+      {
+       mp_limb_t prime, max_prod;
+
+       max_prod = GMP_NUMB_MAX / n;
+
+       LOOP_ON_SIEVE_BEGIN (prime, n_to_bit(numberof (table)), n_to_bit (n), 0, sieve);
+       FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+       LOOP_ON_SIEVE_END;
+      }
+
+      if (j != 0)
+       {
+         factors[j++] = prod;
+         mpz_prodlimbs (x, factors, j);
+       }
+      else
+       {
+         PTR (x)[0] = prod;
+         SIZ (x) = 1;
+       }
+
+      TMP_FREE;
+    }
+}
diff --git a/mpz/prodlimbs.c b/mpz/prodlimbs.c

new file mode 100644 (file)

index 0000000..8676887
--- /dev/null
+++ b/mpz/prodlimbs.c
@@ -0,0 +1,98 @@
+/* mpz_prodlimps(RESULT, V, LEN) -- Set RESULT to V[0]*V[1]*...*V[LEN-1].
+
+Contributed to the GNU project by Marco Bodrato.
+
+THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
+IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
+DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*********************************************************/
+/* Section list-prod: product of a list -> mpz_t         */
+/*********************************************************/
+
+/* FIXME: should be tuned */
+#ifndef RECURSIVE_PROD_THRESHOLD
+#define RECURSIVE_PROD_THRESHOLD (MUL_TOOM22_THRESHOLD)
+#endif
+
+/* Computes the product of the j>1 limbs pointed by factors, puts the
+ * result in x. It assumes that all limbs are non-zero. Above
+ * Karatsuba's threshold it uses a binary splitting startegy, to gain
+ * speed by the asymptotically fast multiplication algorithms.
+ *
+ * The list in  {factors, j} is overwritten.
+ * Returns the size of the result
+ */
+
+mp_size_t
+mpz_prodlimbs (mpz_ptr x, mp_ptr factors, mp_size_t j)
+{
+  mp_limb_t cy;
+  mp_size_t size, i;
+  mp_ptr    prod;
+
+  ASSERT (j > 1);
+  ASSERT (RECURSIVE_PROD_THRESHOLD > 3);
+
+  if (BELOW_THRESHOLD (j, RECURSIVE_PROD_THRESHOLD)) {
+    j--;
+    size = 1;
+
+    for (i = 1; i < j; i++)
+      {
+       cy = mpn_mul_1 (factors, factors, size, factors[i]);
+       factors[size] = cy;
+       size += cy != 0;
+      };
+
+    prod = MPZ_NEWALLOC (x, size + 1);
+
+    cy = mpn_mul_1 (prod, factors, size, factors[i]);
+    prod[size] = cy;
+    return SIZ (x) = size + (cy != 0);
+  } else {
+    mpz_t x1, x2;
+    TMP_DECL;
+
+    i = j >> 1;
+    j -= i;
+    TMP_MARK;
+
+    MPZ_TMP_INIT (x2, j);
+
+    PTR (x1) = factors + i;
+    ALLOC (x1) = j;
+    j = mpz_prodlimbs (x2, factors + i, j);
+    i = mpz_prodlimbs (x1, factors, i);
+    size = i + j;
+    prod = MPZ_NEWALLOC (x, size);
+    if (i >= j)
+      cy = mpn_mul (prod, PTR(x1), i, PTR(x2), j);
+    else
+      cy = mpn_mul (prod, PTR(x2), j, PTR(x1), i);
+    TMP_FREE;
+
+    return SIZ (x) = size - (cy == 0);
+  }
+}
diff --git a/mpz/random2.c b/mpz/random2.c

index f3b85659d2f5490c7758148cf2837e4fcf029085..cc4cfef3126f6b1249fe1070518f7632784c70f4 100644 (file)
--- a/mpz/random2.c
+++ b/mpz/random2.c
@@ -2,7 +2,7 @@
     long runs of consecutive ones and zeros in the binary representation.
     Meant for testing of other MP routines.
  
-Copyright 1991, 1993, 1994, 1996, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,15 +26,15 @@ void
  mpz_random2 (mpz_ptr x, mp_size_t size)
  {
    mp_size_t abs_size;
+  mp_ptr xp;
  
    abs_size = ABS (size);
    if (abs_size != 0)
      {
-      if (x->_mp_alloc < abs_size)
-       _mpz_realloc (x, abs_size);
+      xp = MPZ_REALLOC (x, abs_size);
  
-      mpn_random2 (x->_mp_d, abs_size);
+      mpn_random2 (xp, abs_size);
      }
  
-  x->_mp_size = size;
+  SIZ (x) = size;
  }
diff --git a/mpz/remove.c b/mpz/remove.c

index 21c002345f7d95fbd1ca91229878b771b916a40f..52afdec8252f456b84230d3501243f3711d84359 100644 (file)
--- a/mpz/remove.c
+++ b/mpz/remove.c
@@ -1,6 +1,6 @@
  /* mpz_remove -- divide out a factor and return its multiplicity.
  
-Copyright 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,70 +23,95 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  mp_bitcnt_t
  mpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f)
  {
-  mpz_t fpow[GMP_LIMB_BITS];           /* Really MP_SIZE_T_BITS */
-  mpz_t x, rem;
    mp_bitcnt_t pwr;
-  int p;
-
-  if (mpz_cmp_ui (f, 1) <= 0)
-    DIVIDE_BY_ZERO;
-
-  if (SIZ (src) == 0)
+  mp_srcptr fp;
+  mp_size_t sn, fn, afn;
+  mp_limb_t fp0;
+
+  sn = SIZ (src);
+  fn = SIZ (f);
+  fp = PTR (f);
+  afn = ABS (fn);
+  fp0 = fp[0];
+
+  if (UNLIKELY ((afn <= (fp0 == 1)) /* mpz_cmpabs_ui (f, 1) <= 0 */
+               | (sn == 0)))
      {
-      if (src != dest)
-        mpz_set (dest, src);
+      /*  f = 0 or f = +- 1 or src = 0 */
+      if (afn == 0)
+       DIVIDE_BY_ZERO;
+      mpz_set (dest, src);
        return 0;
      }
  
-  if (mpz_cmp_ui (f, 2) == 0)
-    {
-      mp_bitcnt_t s0;
-      s0 = mpz_scan1 (src, 0);
-      mpz_div_2exp (dest, src, s0);
-      return s0;
-    }
-
-  /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0).  It is an
-     upper bound of the result we're seeking.  We could also shift down the
-     operands so that they become odd, to make intermediate values smaller.  */
+  if ((fp0 & 1) != 0)
+    { /* f is odd */
+      mp_ptr dp;
+      mp_size_t dn;
  
-  mpz_init (rem);
-  mpz_init (x);
+      dn = ABS (sn);
+      dp = MPZ_REALLOC (dest, dn);
  
-  pwr = 0;
-  mpz_init (fpow[0]);
-  mpz_set (fpow[0], f);
-  mpz_set (dest, src);
+      pwr = mpn_remove (dp, &dn, PTR(src), dn, PTR(f), afn, ~(mp_bitcnt_t) 0);
  
-  /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k).  */
-  for (p = 0;; p++)
-    {
-      mpz_tdiv_qr (x, rem, dest, fpow[p]);
-      if (SIZ (rem) != 0)
-       break;
-      mpz_init (fpow[p + 1]);
-      mpz_mul (fpow[p + 1], fpow[p], fpow[p]);
-      mpz_set (dest, x);
+      SIZ (dest) = ((pwr & (fn < 0)) ^ (sn < 0)) ? -dn : dn;
      }
+  else if (afn == (fp0 == 2))
+    { /* mpz_cmpabs_ui (f, 2) == 0 */
+      pwr = mpz_scan1 (src, 0);
+      mpz_div_2exp (dest, src, pwr);
+      if (pwr & (fn < 0)) /*((pwr % 2 == 1) && (SIZ (f) < 0))*/
+       mpz_neg (dest, dest);
+    }
+  else
+    { /* f != +-2 */
+      mpz_t fpow[GMP_LIMB_BITS];               /* Really MP_SIZE_T_BITS */
+      mpz_t x, rem;
+      int p;
+
+      /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0).  It is an
+        upper bound of the result we're seeking.  We could also shift down the
+        operands so that they become odd, to make intermediate values
+        smaller.  */
+
+      mpz_init (rem);
+      mpz_init (x);
+
+      pwr = 0;
+      mpz_init_set (fpow[0], f);
+      mpz_set (dest, src);
+
+      /* Divide by f, f^2 ... f^(2^k) until we get a remainder for f^(2^k).  */
+      for (p = 0;; p++)
+       {
+         mpz_tdiv_qr (x, rem, dest, fpow[p]);
+         if (SIZ (rem) != 0)
+           break;
+         mpz_init (fpow[p + 1]);
+         mpz_mul (fpow[p + 1], fpow[p], fpow[p]);
+         mpz_set (dest, x);
+       }
  
-  pwr = (1L << p) - 1;
+      pwr = ((mp_bitcnt_t)1 << p) - 1;
  
-  mpz_clear (fpow[p]);
+      mpz_clear (fpow[p]);
  
-  /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a
-     zero remainder.  */
-  while (--p >= 0)
-    {
-      mpz_tdiv_qr (x, rem, dest, fpow[p]);
-      if (SIZ (rem) == 0)
+      /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give
+        a zero remainder.  */
+      while (--p >= 0)
         {
-         pwr += 1L << p;
-         mpz_set (dest, x);
+         mpz_tdiv_qr (x, rem, dest, fpow[p]);
+         if (SIZ (rem) == 0)
+           {
+             pwr += (mp_bitcnt_t)1 << p;
+             mpz_set (dest, x);
+           }
+         mpz_clear (fpow[p]);
         }
-      mpz_clear (fpow[p]);
+
+      mpz_clear (x);
+      mpz_clear (rem);
      }
  
-  mpz_clear (x);
-  mpz_clear (rem);
    return pwr;
  }
diff --git a/mpz/root.c b/mpz/root.c

index ece0a997bbebd928429b0ea01e3ca7be8cf267ee..90c228f8825f54d8c5e5359c7a48e8024ed71549 100644 (file)
--- a/mpz/root.c
+++ b/mpz/root.c
@@ -1,7 +1,7 @@
  /* mpz_root(root, u, nth) --  Set ROOT to floor(U^(1/nth)).
     Return an indication if the result is exact.
  
-Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -32,12 +32,12 @@ mpz_root (mpz_ptr root, mpz_srcptr u, unsigned long int nth)
    us = SIZ(u);
  
    /* even roots of negatives provoke an exception */
-  if (us < 0 && (nth & 1) == 0)
+  if (UNLIKELY (us < 0 && (nth & 1) == 0))
      SQRT_OF_NEGATIVE;
  
    /* root extraction interpreted as c^(1/nth) means a zeroth root should
       provoke a divide by zero, do this even if c==0 */
-  if (nth == 0)
+  if (UNLIKELY (nth == 0))
      DIVIDE_BY_ZERO;
  
    if (us == 0)
diff --git a/mpz/rootrem.c b/mpz/rootrem.c

index 69988d6f25db47eaaa820f288ab779f02862bd1d..267fb33277cc9af1ec1c667f4fe417b7fb57e787 100644 (file)
--- a/mpz/rootrem.c
+++ b/mpz/rootrem.c
@@ -1,7 +1,7 @@
-/* mpz_rootrem(root, rem, u, nth) --  Set ROOT to floor(U^(1/nth)) and
+/* mpz_rootrem(root, rem, u, nth) --  Set ROOT to trunc(U^(1/nth)) and
     set REM to the remainder.
  
-Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -32,12 +32,12 @@ mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth)
    us = SIZ(u);
  
    /* even roots of negatives provoke an exception */
-  if (us < 0 && (nth & 1) == 0)
+  if (UNLIKELY (us < 0 && (nth & 1) == 0))
      SQRT_OF_NEGATIVE;
  
    /* root extraction interpreted as c^(1/nth) means a zeroth root should
       provoke a divide by zero, do this even if c==0 */
-  if (nth == 0)
+  if (UNLIKELY (nth == 0))
      DIVIDE_BY_ZERO;
  
    if (us == 0)
@@ -81,10 +81,10 @@ mpz_rootrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr u, unsigned long int nth)
        SIZ(root) = us >= 0 ? rootn : -rootn;
        if (u == root)
         MPN_COPY (up, rootp, rootn);
-      else if (u == rem)
-       MPN_COPY (up, remp, remn);
      }
  
-  SIZ(rem) = remn;
+  if (u == rem)
+    MPN_COPY (up, remp, remn);
+  SIZ(rem) = us >= 0 ? remn : -remn;
    TMP_FREE;
  }
diff --git a/mpz/rrandomb.c b/mpz/rrandomb.c

index ee8aa35822def5bd6f7334e3882760ad0b105685..3e06e2c70c649ed0e7db48154d4d0cd91aa51131 100644 (file)
--- a/mpz/rrandomb.c
+++ b/mpz/rrandomb.c
@@ -2,7 +2,7 @@
     long runs of consecutive ones and zeros in the binary representation.
     Meant for testing of other MP routines.
  
-Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2004, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -22,18 +22,19 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp.h"
  #include "gmp-impl.h"
  
-static void gmp_rrandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_bitcnt_t));
+static void gmp_rrandomb (mp_ptr, gmp_randstate_t, mp_bitcnt_t);
  
  void
  mpz_rrandomb (mpz_ptr x, gmp_randstate_t rstate, mp_bitcnt_t nbits)
  {
    mp_size_t nl;
+  mp_ptr xp;
  
    nl = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
    if (nbits != 0)
      {
-      MPZ_REALLOC (x, nl);
-      gmp_rrandomb (PTR(x), rstate, nbits);
+      xp = MPZ_REALLOC (x, nl);
+      gmp_rrandomb (xp, rstate, nbits);
      }
  
    SIZ(x) = nl;
diff --git a/mpz/scan1.c b/mpz/scan1.c

index e7e3c7f81699226c3f31b65c3d7f3ac7a3b66a4e..e9bfacaa07a1a23f90994cde8075baf3ebb484ca 100644 (file)
--- a/mpz/scan1.c
+++ b/mpz/scan1.c
@@ -33,7 +33,7 @@ mpz_scan1 (mpz_srcptr u, mp_bitcnt_t starting_bit) __GMP_NOTHROW
    mp_srcptr      u_ptr = PTR(u);
    mp_size_t      size = SIZ(u);
    mp_size_t      abs_size = ABS(size);
-  mp_srcptr      u_end = u_ptr + abs_size;
+  mp_srcptr      u_end = u_ptr + abs_size - 1;
    mp_size_t      starting_limb = starting_bit / GMP_NUMB_BITS;
    mp_srcptr      p = u_ptr + starting_limb;
    mp_limb_t      limb;
@@ -44,6 +44,10 @@ mpz_scan1 (mpz_srcptr u, mp_bitcnt_t starting_bit) __GMP_NOTHROW
    if (starting_limb >= abs_size)
      return (size >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
  
+  /* This is an important case, where sign is not relevant! */
+  if (starting_bit == 0)
+    goto short_cut;
+
    limb = *p;
  
    if (size >= 0)
@@ -55,62 +59,35 @@ mpz_scan1 (mpz_srcptr u, mp_bitcnt_t starting_bit) __GMP_NOTHROW
         {
           /* If it's the high limb which is zero after masking, then there's
              no 1 bits after starting_bit.  */
-         p++;
           if (p == u_end)
             return ~(mp_bitcnt_t) 0;
  
           /* Otherwise search further for a non-zero limb.  The high limb is
              non-zero, if nothing else.  */
-         for (;;)
+       search_nonzero:
+         do
             {
-             limb = *p;
-             if (limb != 0)
-               break;
+             ASSERT (p != u_end);
               p++;
-             ASSERT (p < u_end);
+           short_cut:
+             limb = *p;
             }
+         while (limb == 0);
         }
      }
    else
      {
-      mp_srcptr  q;
-
        /* If there's a non-zero limb before ours then we're in the ones
-        complement region.  Search from *(p-1) downwards since that might
-        give better cache locality, and since a non-zero in the middle of a
-        number is perhaps a touch more likely than at the end.  */
-      q = p;
-      while (q != u_ptr)
-       {
-         q--;
-         if (*q != 0)
-           goto inverted;
-       }
-
-      if (limb == 0)
-       {
-         /* Skip zero limbs, to find the start of twos complement.  The
-            high limb is non-zero, if nothing else.  This search is
-            necessary so the -limb is applied at the right spot. */
-         do
-           {
-             p++;
-             ASSERT (p < u_end);
-             limb = *p;
-           }
-         while (limb == 0);
-
-         /* Apply twos complement, and look for a 1 bit in that.  Since
-            limb!=0 here, also have (-limb)!=0 so there's certainly a 1
-            bit.  */
-         limb = -limb;
-         goto got_limb;
-       }
+        complement region.  */
+      if (mpn_zero_p (u_ptr, starting_limb)) {
+       if (limb == 0)
+         /* Seeking for the first non-zero bit, it is the same for u and -u. */
+         goto search_nonzero;
  
-      /* Adjust so ~limb implied by searching for 0 bit becomes -limb.  */
-      limb--;
+       /* Adjust so ~limb implied by searching for 0 bit becomes -limb.  */
+       limb--;
+      }
  
-    inverted:
        /* Now seeking a 0 bit. */
  
        /* Mask to 1 all bits before starting_bit, thus ignoring them. */
@@ -120,9 +97,9 @@ mpz_scan1 (mpz_srcptr u, mp_bitcnt_t starting_bit) __GMP_NOTHROW
          then the zero immediately past the end is the result.  */
        while (limb == GMP_NUMB_MAX)
         {
-         p++;
           if (p == u_end)
             return (mp_bitcnt_t) abs_size * GMP_NUMB_BITS;
+         p++;
           limb = *p;
         }
  
@@ -130,7 +107,6 @@ mpz_scan1 (mpz_srcptr u, mp_bitcnt_t starting_bit) __GMP_NOTHROW
        limb = ~limb;
      }
  
- got_limb:
    ASSERT (limb != 0);
    count_trailing_zeros (cnt, limb);
    return (mp_bitcnt_t) (p - u_ptr) * GMP_NUMB_BITS + cnt;
diff --git a/mpz/set.c b/mpz/set.c

index d7366c837baf39bb99f62d5a86b26c9444d3cfab..20140609f67bec7fbfcaab7b510e7574ae5d52d6 100644 (file)
--- a/mpz/set.c
+++ b/mpz/set.c
@@ -1,6 +1,6 @@
  /* mpz_set (dest_integer, src_integer) -- Assign DEST_INTEGER from SRC_INTEGER.
  
-Copyright 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,33 +21,19 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  
-#ifdef BERKELEY_MP
-#include "mp.h"
-#define FUNCTION   move
-#define ARGUMENTS  mpz_srcptr u, mpz_ptr w
-
-#else
-#define FUNCTION   mpz_set
-#define ARGUMENTS  mpz_ptr w, mpz_srcptr u
-
-#endif
-
-
  void
-FUNCTION (ARGUMENTS)
+mpz_set (mpz_ptr w, mpz_srcptr u)
  {
    mp_ptr wp, up;
    mp_size_t usize, size;
  
-  usize = u->_mp_size;
+  usize = SIZ(u);
    size = ABS (usize);
  
-  if (w->_mp_alloc < size)
-    _mpz_realloc (w, size);
+  wp = MPZ_REALLOC (w, size);
  
-  wp = w->_mp_d;
-  up = u->_mp_d;
+  up = PTR(u);
  
    MPN_COPY (wp, up, size);
-  w->_mp_size = usize;
+  SIZ(w) = usize;
  }
diff --git a/mpz/set_d.c b/mpz/set_d.c

index 2e7fce107f9353855a5b43c2cf34333d1b85abcc..b5f3b9385ce2e3f50cf40df85c3cb1aa38cb273f 100644 (file)
--- a/mpz/set_d.c
+++ b/mpz/set_d.c
@@ -44,8 +44,8 @@ mpz_set_d (mpz_ptr r, double d)
    mp_size_t rn;
  
    DOUBLE_NAN_INF_ACTION (d,
-                         __gmp_invalid_operation (),
-                         __gmp_invalid_operation ());
+                        __gmp_invalid_operation (),
+                        __gmp_invalid_operation ());
  
    negative = d < 0;
    d = ABS (d);
diff --git a/mpz/set_f.c b/mpz/set_f.c

index b939b662b58ff4103dd500db6cbf244345bef473..3ea14e31269f3ca872cdc81bf43c6f02520fddbe 100644 (file)
--- a/mpz/set_f.c
+++ b/mpz/set_f.c
@@ -1,6 +1,6 @@
  /* mpz_set_f (dest_integer, src_float) -- Assign DEST_INTEGER from SRC_FLOAT.
  
-Copyright 1996, 2001 Free Software Foundation, Inc.
+Copyright 1996, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -36,8 +36,7 @@ mpz_set_f (mpz_ptr w, mpf_srcptr u)
        return;
      }
  
-  MPZ_REALLOC (w, exp);
-  wp = PTR(w);
+  wp = MPZ_REALLOC (w, exp);
    up = PTR(u);
  
    size = SIZ (u);
diff --git a/mpz/set_si.c b/mpz/set_si.c

index bffb2ee3332c25792f09b8cd130f4776c233e224..7dcc5b93dd4219cab6c77a292453b1710377aa43 100644 (file)
--- a/mpz/set_si.c
+++ b/mpz/set_si.c
@@ -1,7 +1,7 @@
  /* mpz_set_si(dest,val) -- Assign DEST with a small value VAL.
  
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,17 +29,17 @@ mpz_set_si (mpz_ptr dest, signed long int val)
  
    vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
  
-  dest->_mp_d[0] = vl & GMP_NUMB_MASK;
+  PTR (dest)[0] = vl & GMP_NUMB_MASK;
    size = vl != 0;
  
  #if GMP_NAIL_BITS != 0
    if (vl > GMP_NUMB_MAX)
      {
        MPZ_REALLOC (dest, 2);
-      dest->_mp_d[1] = vl >> GMP_NUMB_BITS;
+      PTR (dest)[1] = vl >> GMP_NUMB_BITS;
        size = 2;
      }
  #endif
  
-  dest->_mp_size = val >= 0 ? size : -size;
+  SIZ (dest) = val >= 0 ? size : -size;
  }
diff --git a/mpz/set_str.c b/mpz/set_str.c

index 550c4866b85d8fe71da144043a83c6ff98c6988c..f7ccd775ff9788e7f15e467df74d7a2dc8a43674 100644 (file)
--- a/mpz/set_str.c
+++ b/mpz/set_str.c
@@ -4,8 +4,8 @@
     the base in the C standard way, i.e.  0xhh...h means base 16,
     0oo...o means base 8, otherwise assume base 10.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2005
-Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2005,
+2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,8 +26,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include <ctype.h>
  #include "gmp.h"
  #include "gmp-impl.h"
+#include "longlong.h"
  
-extern const unsigned char __gmp_digit_value_tab[];
  #define digit_value_tab __gmp_digit_value_tab
  
  int
@@ -95,7 +95,7 @@ mpz_set_str (mpz_ptr x, const char *str, int base)
    /* Make sure the string does not become empty, mpn_set_str would fail.  */
    if (c == 0)
      {
-      x->_mp_size = 0;
+      SIZ (x) = 0;
        return 0;
      }
  
@@ -122,13 +122,12 @@ mpz_set_str (mpz_ptr x, const char *str, int base)
  
    str_size = s - begs;
  
-  xsize = 2 + (mp_size_t)
-    (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+  LIMBS_PER_DIGIT_IN_BASE (xsize, str_size, base);
    MPZ_REALLOC (x, xsize);
  
    /* Convert the byte array in base BASE to our bignum format.  */
-  xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, base);
-  x->_mp_size = negative ? -xsize : xsize;
+  xsize = mpn_set_str (PTR (x), (unsigned char *) begs, str_size, base);
+  SIZ (x) = negative ? -xsize : xsize;
  
    TMP_FREE;
    return 0;
diff --git a/mpz/set_ui.c b/mpz/set_ui.c

index 13afc6a787588d4d0ef824beefaaaa3f591dc624..3c1931176b3a53f8f07bf29cb640968d7a05dddf 100644 (file)
--- a/mpz/set_ui.c
+++ b/mpz/set_ui.c
@@ -1,7 +1,7 @@
  /* mpz_set_ui(integer, val) -- Assign INTEGER with a small value VAL.
  
-Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2004, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -26,17 +26,17 @@ mpz_set_ui (mpz_ptr dest, unsigned long int val)
  {
    mp_size_t size;
  
-  dest->_mp_d[0] = val & GMP_NUMB_MASK;
+  PTR (dest)[0] = val & GMP_NUMB_MASK;
    size = val != 0;
  
  #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
    if (val > GMP_NUMB_MAX)
      {
        MPZ_REALLOC (dest, 2);
-      dest->_mp_d[1] = val >> GMP_NUMB_BITS;
+      PTR (dest)[1] = val >> GMP_NUMB_BITS;
        size = 2;
      }
  #endif
  
-  dest->_mp_size = size;
+  SIZ (dest) = size;
  }
diff --git a/mpz/setbit.c b/mpz/setbit.c

index 6d9b4020d5668da3717ed65e82f12ab82bc99dff..63c86da6f0b0abfdeb3b821d04232472882349a0 100644 (file)
--- a/mpz/setbit.c
+++ b/mpz/setbit.c
@@ -1,6 +1,6 @@
  /* mpz_setbit -- set a specified bit.
  
-Copyright 1991, 1993, 1994, 1995, 1997, 1999, 2001, 2002 Free Software
+Copyright 1991, 1993, 1994, 1995, 1997, 1999, 2001, 2002, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -22,29 +22,29 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpz_setbit (mpz_ptr d, mp_bitcnt_t bit_index)
+mpz_setbit (mpz_ptr d, mp_bitcnt_t bit_idx)
  {
-  mp_size_t dsize = d->_mp_size;
-  mp_ptr dp = d->_mp_d;
-  mp_size_t limb_index;
+  mp_size_t dsize = SIZ (d);
+  mp_ptr dp = PTR (d);
+  mp_size_t limb_idx;
+  mp_limb_t mask;
  
-  limb_index = bit_index / GMP_NUMB_BITS;
+  limb_idx = bit_idx / GMP_NUMB_BITS;
+  mask = CNST_LIMB(1) << (bit_idx % GMP_NUMB_BITS);
    if (dsize >= 0)
      {
-      if (limb_index < dsize)
+      if (limb_idx < dsize)
         {
-         dp[limb_index] |= (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
-         d->_mp_size = dsize;
+         dp[limb_idx] |= mask;
         }
        else
         {
           /* Ugh.  The bit should be set outside of the end of the
              number.  We have to increase the size of the number.  */
-         if (UNLIKELY (d->_mp_alloc < limb_index + 1))
-            dp = _mpz_realloc (d, limb_index + 1);
-         MPN_ZERO (dp + dsize, limb_index - dsize);
-         dp[limb_index] = (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
-         d->_mp_size = limb_index + 1;
+         dp = MPZ_REALLOC (d, limb_idx + 1);
+         SIZ (d) = limb_idx + 1;
+         MPN_ZERO (dp + dsize, limb_idx - dsize);
+         dp[limb_idx] = mask;
         }
      }
    else
@@ -58,60 +58,40 @@ mpz_setbit (mpz_ptr d, mp_bitcnt_t bit_index)
  
        dsize = -dsize;
  
-      /* No upper bound on this loop, we're sure there's a non-zero limb
-        sooner ot later.  */
-      for (zero_bound = 0; ; zero_bound++)
-       if (dp[zero_bound] != 0)
-         break;
+      /* No index upper bound on this loop, we're sure there's a non-zero limb
+        sooner or later.  */
+      zero_bound = 0;
+      while (dp[zero_bound] == 0)
+       zero_bound++;
  
-      if (limb_index > zero_bound)
+      if (limb_idx > zero_bound)
         {
-         if (limb_index < dsize)
-            {
-              mp_limb_t  dlimb;
-              dlimb = dp[limb_index];
-              dlimb &= ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
-              dp[limb_index] = dlimb;
-
-              if (UNLIKELY (dlimb == 0 && limb_index == dsize-1))
-                {
-                  /* high limb became zero, must normalize */
-                  do {
-                    dsize--;
-                  } while (dsize > 0 && dp[dsize-1] == 0);
-                  d->_mp_size = -dsize;
-                }
-            }
-       }
-      else if (limb_index == zero_bound)
-       {
-         dp[limb_index] = ((dp[limb_index] - 1)
-                           & ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS))) + 1;
-         if (dp[limb_index] == 0)
+         if (limb_idx < dsize)
             {
-             mp_size_t i;
-             for (i = limb_index + 1; i < dsize; i++)
+             mp_limb_t  dlimb;
+             dlimb = dp[limb_idx] & ~mask;
+             dp[limb_idx] = dlimb;
+
+             if (UNLIKELY (dlimb == 0 && limb_idx == dsize-1))
                 {
-                 dp[i] += 1;
-                 if (dp[i] != 0)
-                   goto fin;
+                 /* high limb became zero, must normalize */
+                 do {
+                   dsize--;
+                 } while (dsize > 0 && dp[dsize-1] == 0);
+                 SIZ (d) = -dsize;
                 }
-             /* We got carry all way out beyond the end of D.  Increase
-                its size (and allocation if necessary).  */
-             dsize++;
-             if (UNLIKELY (d->_mp_alloc < dsize))
-                dp = _mpz_realloc (d, dsize);
-             dp[i] = 1;
-             d->_mp_size = -dsize;
-           fin:;
             }
         }
+      else if (limb_idx == zero_bound)
+       {
+         dp[limb_idx] = ((dp[limb_idx] - 1) & ~mask) + 1;
+         ASSERT (dp[limb_idx] != 0);
+       }
        else
         {
-         mpn_decr_u (dp + limb_index,
-                    (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
+         MPN_DECR_U (dp + limb_idx, dsize - limb_idx, mask);
           dsize -= dp[dsize - 1] == 0;
-         d->_mp_size = -dsize;
+         SIZ (d) = -dsize;
         }
      }
  }
diff --git a/mpz/sqrt.c b/mpz/sqrt.c

index 6de21209c2eb1b75cc1c3b7b4ba365d7b3a4bef3..9a4f8cbc150161e06748baf0f1a25e9132e14a24 100644 (file)
--- a/mpz/sqrt.c
+++ b/mpz/sqrt.c
@@ -1,7 +1,7 @@
  /* mpz_sqrt(root, u) --  Set ROOT to floor(sqrt(U)).
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -27,58 +27,40 @@ mpz_sqrt (mpz_ptr root, mpz_srcptr op)
  {
    mp_size_t op_size, root_size;
    mp_ptr root_ptr, op_ptr;
-  mp_ptr free_me = NULL;
-  mp_size_t free_me_size;
-  TMP_DECL;
  
-  TMP_MARK;
-  op_size = op->_mp_size;
-  if (op_size <= 0)
+  op_size = SIZ (op);
+  if (UNLIKELY (op_size <= 0))
      {
        if (op_size < 0)
-        SQRT_OF_NEGATIVE;
+       SQRT_OF_NEGATIVE;
        SIZ(root) = 0;
        return;
      }
  
    /* The size of the root is accurate after this simple calculation.  */
    root_size = (op_size + 1) / 2;
+  SIZ (root) = root_size;
  
-  root_ptr = root->_mp_d;
-  op_ptr = op->_mp_d;
+  op_ptr = PTR (op);
  
-  if (root->_mp_alloc < root_size)
+  if (root == op)
      {
-      if (root_ptr == op_ptr)
-       {
-         free_me = root_ptr;
-         free_me_size = root->_mp_alloc;
-       }
-      else
-       (*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
+      /* Allocate temp space for the root, which we then copy to the
+        shared OP/ROOT variable.  */
+      TMP_DECL;
+      TMP_MARK;
  
-      root->_mp_alloc = root_size;
-      root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
-      root->_mp_d = root_ptr;
+      root_ptr = TMP_ALLOC_LIMBS (root_size);
+      mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
+
+      MPN_COPY (op_ptr, root_ptr, root_size);
+
+      TMP_FREE;
      }
    else
      {
-      /* Make OP not overlap with ROOT.  */
-      if (root_ptr == op_ptr)
-       {
-         /* ROOT and OP are identical.  Allocate temporary space for OP.  */
-         op_ptr = TMP_ALLOC_LIMBS (op_size);
-         /* Copy to the temporary space.  Hack: Avoid temporary variable
-            by using ROOT_PTR.  */
-         MPN_COPY (op_ptr, root_ptr, op_size);
-       }
-    }
+      root_ptr = MPZ_REALLOC (root, root_size);
  
-  mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
-
-  root->_mp_size = root_size;
-
-  if (free_me != NULL)
-    (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
-  TMP_FREE;
+      mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
+    }
  }
diff --git a/mpz/sqrtrem.c b/mpz/sqrtrem.c

index ed8a85b6c6f076f0d9f2c20ceff58db7d4cdbe8e..76abdc0c29de41a1afd3afc947820eccddcab264 100644 (file)
--- a/mpz/sqrtrem.c
+++ b/mpz/sqrtrem.c
@@ -1,8 +1,8 @@
  /* mpz_sqrtrem(root,rem,x) -- Set ROOT to floor(sqrt(X)) and REM
     to the remainder, i.e. X - ROOT**2.
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2011, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -19,83 +19,56 @@ License for more details.
  You should have received a copy of the GNU Lesser General Public License
  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
-#include <stdio.h> /* for NULL */
  #include "gmp.h"
  #include "gmp-impl.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
  
  void
-#ifndef BERKELEY_MP
  mpz_sqrtrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr op)
-#else /* BERKELEY_MP */
-msqrt (mpz_srcptr op, mpz_ptr root, mpz_ptr rem)
-#endif /* BERKELEY_MP */
  {
    mp_size_t op_size, root_size, rem_size;
-  mp_ptr root_ptr, op_ptr;
-  mp_ptr free_me = NULL;
-  mp_size_t free_me_size;
-  TMP_DECL;
-
-  TMP_MARK;
-  op_size = op->_mp_size;
-  if (op_size <= 0)
+  mp_ptr root_ptr, op_ptr, rem_ptr;
+
+  op_size = SIZ (op);
+  if (UNLIKELY (op_size <= 0))
      {
        if (op_size < 0)
-        SQRT_OF_NEGATIVE;
+       SQRT_OF_NEGATIVE;
        SIZ(root) = 0;
        SIZ(rem) = 0;
        return;
      }
  
-  if (rem->_mp_alloc < op_size)
-    _mpz_realloc (rem, op_size);
+  rem_ptr = MPZ_REALLOC (rem, op_size);
  
    /* The size of the root is accurate after this simple calculation.  */
    root_size = (op_size + 1) / 2;
+  SIZ (root) = root_size;
  
-  root_ptr = root->_mp_d;
-  op_ptr = op->_mp_d;
+  op_ptr = PTR (op);
  
-  if (root->_mp_alloc < root_size)
+  if (root == op)
      {
-      if (root_ptr == op_ptr)
-       {
-         free_me = root_ptr;
-         free_me_size = root->_mp_alloc;
-       }
-      else
-       (*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
-
-      root->_mp_alloc = root_size;
-      root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
-      root->_mp_d = root_ptr;
+      /* Allocate temp space for the root, which we then copy to the
+        shared OP/ROOT variable.  */
+      TMP_DECL;
+      TMP_MARK;
+
+      root_ptr = TMP_ALLOC_LIMBS (root_size);
+      rem_size = mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size);
+
+      if (rem != root) /* Don't overwrite remainder */
+       MPN_COPY (op_ptr, root_ptr, root_size);
+
+      TMP_FREE;
      }
    else
      {
-      /* Make OP not overlap with ROOT.  */
-      if (root_ptr == op_ptr)
-       {
-         /* ROOT and OP are identical.  Allocate temporary space for OP.  */
-         op_ptr = TMP_ALLOC_LIMBS (op_size);
-         /* Copy to the temporary space.  Hack: Avoid temporary variable
-            by using ROOT_PTR.  */
-         MPN_COPY (op_ptr, root_ptr, op_size);
-       }
-    }
-
-  rem_size = mpn_sqrtrem (root_ptr, rem->_mp_d, op_ptr, op_size);
+      root_ptr = MPZ_REALLOC (root, root_size);
  
-  root->_mp_size = root_size;
-
-  /* Write remainder size last, to enable us to define this function to
-     give only the square root remainder, if the user calls if with
-     ROOT == REM.  */
-  rem->_mp_size = rem_size;
+      rem_size = mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size);
+    }
  
-  if (free_me != NULL)
-    (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
-  TMP_FREE;
+  /* Write remainder size last, to make this function give only the square root
+     remainder, when passed ROOT == REM.  */
+  SIZ (rem) = rem_size;
  }
diff --git a/mpz/swap.c b/mpz/swap.c

index de8195a7d8d984c40d33c6953d1e4ec3b0b5587c..73348fe16d029f5e0c19e4631128c0487b210501 100644 (file)
--- a/mpz/swap.c
+++ b/mpz/swap.c
@@ -1,6 +1,6 @@
  /* mpz_swap (dest_integer, src_integer) -- Swap U and V.
  
-Copyright 1997, 1998, 2001 Free Software Foundation, Inc.
+Copyright 1997, 1998, 2001, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -27,18 +27,18 @@ mpz_swap (mpz_ptr u, mpz_ptr v) __GMP_NOTHROW
    mp_size_t usize, vsize;
    mp_size_t ualloc, valloc;
  
-  ualloc = u->_mp_alloc;
-  valloc = v->_mp_alloc;
-  v->_mp_alloc = ualloc;
-  u->_mp_alloc = valloc;
+  ualloc = ALLOC (u);
+  valloc = ALLOC (v);
+  ALLOC (v) = ualloc;
+  ALLOC (u) = valloc;
  
-  usize = u->_mp_size;
-  vsize = v->_mp_size;
-  v->_mp_size = usize;
-  u->_mp_size = vsize;
+  usize = SIZ (u);
+  vsize = SIZ (v);
+  SIZ (v) = usize;
+  SIZ (u) = vsize;
  
-  up = u->_mp_d;
-  vp = v->_mp_d;
-  v->_mp_d = up;
-  u->_mp_d = vp;
+  up = PTR (u);
+  vp = PTR (v);
+  PTR (v) = up;
+  PTR (u) = vp;
  }
diff --git a/mpz/tdiv_q.c b/mpz/tdiv_q.c

index e78dd64dcefc02dfd95b89e21b2883ff26f28887..6c6b65c8529883089747e98850a69f222c032ca1 100644 (file)
--- a/mpz/tdiv_q.c
+++ b/mpz/tdiv_q.c
@@ -1,7 +1,7 @@
  /* mpz_tdiv_q -- divide two integers and produce a quotient.
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2010 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2010, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -36,7 +36,7 @@ mpz_tdiv_q (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
    dl = ABS (ds);
    ql = nl - dl + 1;
  
-  if (dl == 0)
+  if (UNLIKELY (dl == 0))
      DIVIDE_BY_ZERO;
  
    if (ql <= 0)
@@ -45,10 +45,9 @@ mpz_tdiv_q (mpz_ptr quot, mpz_srcptr num, mpz_srcptr den)
        return;
      }
  
-  MPZ_REALLOC (quot, ql);
+  qp = MPZ_REALLOC (quot, ql);
  
    TMP_MARK;
-  qp = PTR (quot);
    np = PTR (num);
    dp = PTR (den);
  
diff --git a/mpz/tdiv_q_2exp.c b/mpz/tdiv_q_2exp.c

index 491d9d0fe43736e653a4c2397c1ab5428157ffae..88c4deda503b8e1216d1fe78dab18ea8547bab60 100644 (file)
--- a/mpz/tdiv_q_2exp.c
+++ b/mpz/tdiv_q_2exp.c
@@ -1,7 +1,8 @@
  /* mpz_tdiv_q_2exp -- Divide an integer by 2**CNT.  Round the quotient
     towards -infinity.
  
-Copyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2002, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -22,38 +23,35 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  
  void
-mpz_tdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+mpz_tdiv_q_2exp (mpz_ptr r, mpz_srcptr u, mp_bitcnt_t cnt)
  {
-  mp_size_t usize, wsize;
+  mp_size_t un, rn;
    mp_size_t limb_cnt;
+  mp_ptr rp;
+  mp_srcptr up;
  
-  usize = u->_mp_size;
+  un = SIZ(u);
    limb_cnt = cnt / GMP_NUMB_BITS;
-  wsize = ABS (usize) - limb_cnt;
-  if (wsize <= 0)
-    w->_mp_size = 0;
+  rn = ABS (un) - limb_cnt;
+
+  if (rn <= 0)
+    rn = 0;
    else
      {
-      mp_ptr wp;
-      mp_srcptr up;
-
-      if (w->_mp_alloc < wsize)
-       _mpz_realloc (w, wsize);
-
-      wp = w->_mp_d;
-      up = u->_mp_d;
+      rp = MPZ_REALLOC (r, rn);
+      up = PTR(u) + limb_cnt;
  
        cnt %= GMP_NUMB_BITS;
        if (cnt != 0)
         {
-         mpn_rshift (wp, up + limb_cnt, wsize, cnt);
-         wsize -= wp[wsize - 1] == 0;
+         mpn_rshift (rp, up, rn, cnt);
+         rn -= rp[rn - 1] == 0;
         }
        else
         {
-         MPN_COPY_INCR (wp, up + limb_cnt, wsize);
+         MPN_COPY_INCR (rp, up, rn);
         }
-
-      w->_mp_size = usize >= 0 ? wsize : -wsize;
      }
+
+  SIZ(r) = un >= 0 ? rn : -rn;
  }
diff --git a/mpz/tdiv_q_ui.c b/mpz/tdiv_q_ui.c

index 50abb85f94418a305e74219ac73dd837dc2df9f0..6d287943dc2832c025a240f4f917076f88b75c5f 100644 (file)
--- a/mpz/tdiv_q_ui.c
+++ b/mpz/tdiv_q_ui.c
@@ -1,7 +1,7 @@
  /* mpz_tdiv_q_ui(quot, dividend, divisor_limb)
     -- Divide DIVIDEND by DIVISOR_LIMB and store the result in QUOT.
  
-Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004 Free Software
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -29,7 +29,7 @@ mpz_tdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
    mp_ptr np, qp;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
@@ -40,8 +40,7 @@ mpz_tdiv_q_ui (mpz_ptr quot, mpz_srcptr dividend, unsigned long int divisor)
      }
  
    nn = ABS(ns);
-  MPZ_REALLOC (quot, nn);
-  qp = PTR(quot);
+  qp = MPZ_REALLOC (quot, nn);
    np = PTR(dividend);
  
  #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
diff --git a/mpz/tdiv_qr.c b/mpz/tdiv_qr.c

index 64b6e03f02b639bdbe16af39f903071cf372cc93..cfbc7d9bf21ac1326d4a255372f797bd50777b7a 100644 (file)
--- a/mpz/tdiv_qr.c
+++ b/mpz/tdiv_qr.c
@@ -1,7 +1,8 @@
  /* mpz_tdiv_qr(quot,rem,dividend,divisor) -- Set QUOT to DIVIDEND/DIVISOR,
     and REM to DIVIDEND mod DIVISOR.
  
-Copyright 1991, 1993, 1994, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2005, 2011, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -21,16 +22,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "longlong.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
  
  void
-#ifndef BERKELEY_MP
  mpz_tdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
-#else /* BERKELEY_MP */
-mdiv (mpz_srcptr num, mpz_srcptr den, mpz_ptr quot, mpz_ptr rem)
-#endif /* BERKELEY_MP */
  {
    mp_size_t ql;
    mp_size_t ns, ds, nl, dl;
@@ -43,18 +37,16 @@ mdiv (mpz_srcptr num, mpz_srcptr den, mpz_ptr quot, mpz_ptr rem)
    dl = ABS (ds);
    ql = nl - dl + 1;
  
-  if (dl == 0)
+  if (UNLIKELY (dl == 0))
      DIVIDE_BY_ZERO;
  
-  MPZ_REALLOC (rem, dl);
+  rp = MPZ_REALLOC (rem, dl);
  
    if (ql <= 0)
      {
        if (num != rem)
         {
-         mp_ptr np, rp;
           np = PTR (num);
-         rp = PTR (rem);
           MPN_COPY (rp, np, nl);
           SIZ (rem) = SIZ (num);
         }
@@ -64,11 +56,9 @@ mdiv (mpz_srcptr num, mpz_srcptr den, mpz_ptr quot, mpz_ptr rem)
        return;
      }
  
-  MPZ_REALLOC (quot, ql);
+  qp = MPZ_REALLOC (quot, ql);
  
    TMP_MARK;
-  qp = PTR (quot);
-  rp = PTR (rem);
    np = PTR (num);
    dp = PTR (den);
  
diff --git a/mpz/tdiv_qr_ui.c b/mpz/tdiv_qr_ui.c

index 4f797b1801556a37c62f1900e5423446c0c37dee..361e5a485c51d0e1929aea44b48aab8d7fedab91 100644 (file)
--- a/mpz/tdiv_qr_ui.c
+++ b/mpz/tdiv_qr_ui.c
@@ -2,7 +2,7 @@
     Set QUOT to DIVIDEND / SHORT_DIVISOR
     and REM to DIVIDEND mod SHORT_DIVISOR.
  
-Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004 Free Software
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2012 Free Software
  Foundation, Inc.
  
  This file is part of the GNU MP Library.
@@ -30,7 +30,7 @@ mpz_tdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long in
    mp_ptr np, qp;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
@@ -42,8 +42,7 @@ mpz_tdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long in
      }
  
    nn = ABS(ns);
-  MPZ_REALLOC (quot, nn);
-  qp = PTR(quot);
+  qp = MPZ_REALLOC (quot, nn);
    np = PTR(dividend);
  
  #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
@@ -62,8 +61,7 @@ mpz_tdiv_qr_ui (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, unsigned long in
           return rl;
         }
  
-      MPZ_REALLOC (rem, 2);
-      rp = PTR(rem);
+      rp = MPZ_REALLOC (rem, 2);
  
        dp[0] = divisor & GMP_NUMB_MASK;
        dp[1] = divisor >> GMP_NUMB_BITS;
diff --git a/mpz/tdiv_r.c b/mpz/tdiv_r.c

index a3b008c5ed8fa60db98fc43fc97e1fc3b910cedd..13de14b1a829e46c3b13d6f4213361ab38a0c7db 100644 (file)
--- a/mpz/tdiv_r.c
+++ b/mpz/tdiv_r.c
@@ -1,6 +1,7 @@
  /* mpz_tdiv_r(rem, dividend, divisor) -- Set REM to DIVIDEND mod DIVISOR.
  
-Copyright 1991, 1993, 1994, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -35,18 +36,16 @@ mpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
    dl = ABS (ds);
    ql = nl - dl + 1;
  
-  if (dl == 0)
+  if (UNLIKELY (dl == 0))
      DIVIDE_BY_ZERO;
  
-  MPZ_REALLOC (rem, dl);
+  rp = MPZ_REALLOC (rem, dl);
  
    if (ql <= 0)
      {
        if (num != rem)
         {
-         mp_ptr np, rp;
           np = PTR (num);
-         rp = PTR (rem);
           MPN_COPY (rp, np, nl);
           SIZ (rem) = SIZ (num);
         }
@@ -55,7 +54,6 @@ mpz_tdiv_r (mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
  
    TMP_MARK;
    qp = TMP_ALLOC_LIMBS (ql);
-  rp = PTR (rem);
    np = PTR (num);
    dp = PTR (den);
  
diff --git a/mpz/tdiv_r_2exp.c b/mpz/tdiv_r_2exp.c

index 3828ff1306c74e60d51f9f71ffd502c58b1dc13b..5ed9de8931241769902fda54ba73bf67f57fa989 100644 (file)
--- a/mpz/tdiv_r_2exp.c
+++ b/mpz/tdiv_r_2exp.c
@@ -1,6 +1,7 @@
-/* mpz_tdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder.
+/* mpz_tdiv_r_2exp -- Divide an integer by 2**CNT and produce a remainder.
  
-Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2012 Free Software Foundation,
+Inc.
  
  This file is part of the GNU MP Library.
  
@@ -23,10 +24,10 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  void
  mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, mp_bitcnt_t cnt)
  {
-  mp_size_t in_size = ABS (in->_mp_size);
+  mp_size_t in_size = ABSIZ (in);
    mp_size_t res_size;
    mp_size_t limb_cnt = cnt / GMP_NUMB_BITS;
-  mp_srcptr in_ptr = in->_mp_d;
+  mp_srcptr in_ptr = PTR (in);
  
    if (in_size > limb_cnt)
      {
@@ -37,18 +38,16 @@ mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, mp_bitcnt_t cnt)
        if (x != 0)
         {
           res_size = limb_cnt + 1;
-         if (res->_mp_alloc < res_size)
-           _mpz_realloc (res, res_size);
+         MPZ_REALLOC (res, res_size);
  
-         res->_mp_d[limb_cnt] = x;
+         PTR (res)[limb_cnt] = x;
         }
        else
         {
           res_size = limb_cnt;
           MPN_NORMALIZE (in_ptr, res_size);
  
-         if (res->_mp_alloc < res_size)
-           _mpz_realloc (res, res_size);
+         MPZ_REALLOC (res, res_size);
  
           limb_cnt = res_size;
         }
@@ -58,13 +57,12 @@ mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, mp_bitcnt_t cnt)
        /* The input operand is smaller than 2**CNT.  We perform a no-op,
          apart from that we might need to copy IN to RES.  */
        res_size = in_size;
-      if (res->_mp_alloc < res_size)
-       _mpz_realloc (res, res_size);
+      MPZ_REALLOC (res, res_size);
  
        limb_cnt = res_size;
      }
  
    if (res != in)
-    MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt);
-  res->_mp_size = in->_mp_size >= 0 ? res_size : -res_size;
+    MPN_COPY (PTR (res), PTR (in), limb_cnt);
+  SIZ (res) = SIZ (in) >= 0 ? res_size : -res_size;
  }
diff --git a/mpz/tdiv_r_ui.c b/mpz/tdiv_r_ui.c

index 64a8b94d15264b2644543ed4dc8078a59ef72a3d..a206811bf65b1af37bf2c74924ea50c8d6792d07 100644 (file)
--- a/mpz/tdiv_r_ui.c
+++ b/mpz/tdiv_r_ui.c
@@ -1,8 +1,8 @@
  /* mpz_tdiv_r_ui(rem, dividend, divisor_limb)
     -- Set REM to DIVDEND mod DIVISOR_LIMB.
  
-Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2005, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,7 +29,7 @@ mpz_tdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
    mp_ptr np;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
@@ -57,8 +57,7 @@ mpz_tdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor)
           return rl;
         }
  
-      MPZ_REALLOC (rem, 2);
-      rp = PTR(rem);
+      rp = MPZ_REALLOC (rem, 2);
  
        TMP_MARK;
        dp[0] = divisor & GMP_NUMB_MASK;
diff --git a/mpz/tdiv_ui.c b/mpz/tdiv_ui.c

index fafd97ee0b878a8f33b813344a38f29c179382aa..6d834fc4e57b0f968511414074a69bffa02530fc 100644 (file)
--- a/mpz/tdiv_ui.c
+++ b/mpz/tdiv_ui.c
@@ -1,7 +1,7 @@
  /* mpz_tdiv_ui(dividend, divisor_limb) -- Return DIVDEND mod DIVISOR_LIMB.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2001, 2002, 2004, 2005 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2001, 2002, 2004, 2005, 2012
+Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -29,7 +29,7 @@ mpz_tdiv_ui (mpz_srcptr dividend, unsigned long int divisor)
    mp_ptr np;
    mp_limb_t rl;
  
-  if (divisor == 0)
+  if (UNLIKELY (divisor == 0))
      DIVIDE_BY_ZERO;
  
    ns = SIZ(dividend);
diff --git a/mpz/tstbit.c b/mpz/tstbit.c

index c3006c813b35a86ad8cb1ea38b3bd049024ddd30..595a7b72114cc0b06d309570f53366aa0b94345c 100644 (file)
--- a/mpz/tstbit.c
+++ b/mpz/tstbit.c
@@ -56,14 +56,14 @@ mpz_tstbit (mpz_srcptr u, mp_bitcnt_t bit_index) __GMP_NOTHROW
        limb = -limb;     /* twos complement */
  
        while (p != u_ptr)
-        {
-          p--;
-          if (*p != 0)
-            {
-              limb--;   /* make it a ones complement instead */
-              break;
-            }
-        }
+       {
+         p--;
+         if (*p != 0)
+           {
+             limb--;   /* make it a ones complement instead */
+             break;
+           }
+       }
      }
  
    return (limb >> (bit_index % GMP_NUMB_BITS)) & 1;
diff --git a/mpz/urandomm.c b/mpz/urandomm.c

index b8a6d6898c1090596b76da426b5aa59e0ba0ea31..6bf74603e9fb831462ca7776a5b022ca71e2b978 100644 (file)
--- a/mpz/urandomm.c
+++ b/mpz/urandomm.c
@@ -2,7 +2,7 @@
     integer in the range 0 to N-1, using STATE as the random state
     previously initialized by a call to gmp_randinit().
  
-Copyright 2000, 2002  Free Software Foundation, Inc.
+Copyright 2000, 2002, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -37,7 +37,7 @@ mpz_urandomm (mpz_ptr rop, gmp_randstate_t rstate, mpz_srcptr n)
    TMP_DECL;
  
    size = ABSIZ (n);
-  if (size == 0)
+  if (UNLIKELY (size == 0))
      DIVIDE_BY_ZERO;
  
    nlast = &PTR (n)[size - 1];
diff --git a/mpz/xor.c b/mpz/xor.c

index 18edd8129ed2b09eafdc036a2c89e26f0c6e72ee..66ff1c0fb4f679d37910873e58f1d280f897120d 100644 (file)
--- a/mpz/xor.c
+++ b/mpz/xor.c
@@ -1,7 +1,7 @@
  /* mpz_xor -- Logical xor.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005, 2012 Free
+Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -28,7 +28,6 @@ mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
    mp_size_t op1_size, op2_size;
    mp_ptr res_ptr;
    mp_size_t res_size, res_alloc;
-  mp_size_t i;
    TMP_DECL;
  
    TMP_MARK;
@@ -56,8 +55,8 @@ mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
               if (res_ptr != op1_ptr)
                 MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
                           op1_size - op2_size);
-             for (i = op2_size - 1; i >= 0; i--)
-               res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+             if (LIKELY (op2_size != 0))
+               mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op2_size);
               res_size = op1_size;
             }
           else
@@ -73,8 +72,8 @@ mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
               if (res_ptr != op2_ptr)
                 MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
                           op2_size - op1_size);
-             for (i = op1_size - 1; i >= 0; i--)
-               res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+             if (LIKELY (op1_size != 0))
+               mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
               res_size = op2_size;
             }
  
@@ -91,7 +90,7 @@ mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
      {
        if (op2_size < 0)
         {
-         mp_ptr opx;
+         mp_ptr opx, opy;
  
           /* Both operands are negative, the result will be positive.
               (-OP1) ^ (-OP2) =
@@ -103,38 +102,23 @@ mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
  
           /* Possible optimization: Decrease mpn_sub precision,
              as we won't use the entire res of both.  */
-         opx = TMP_ALLOC_LIMBS (op1_size);
+         TMP_ALLOC_LIMBS_2 (opx, op1_size, opy, op2_size);
           mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
           op1_ptr = opx;
  
-         opx = TMP_ALLOC_LIMBS (op2_size);
-         mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
-         op2_ptr = opx;
-
-         res_alloc = MAX (op1_size, op2_size);
-         if (ALLOC(res) < res_alloc)
-           {
-             _mpz_realloc (res, res_alloc);
-             res_ptr = PTR(res);
-             /* op1_ptr and op2_ptr point to temporary space.  */
-           }
+         mpn_sub_1 (opy, op2_ptr, op2_size, (mp_limb_t) 1);
+         op2_ptr = opy;
  
           if (op1_size > op2_size)
-           {
-             MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
-                       op1_size - op2_size);
-             for (i = op2_size - 1; i >= 0; i--)
-               res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
-             res_size = op1_size;
-           }
-         else
-           {
-             MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
-                       op2_size - op1_size);
-             for (i = op1_size - 1; i >= 0; i--)
-               res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
-             res_size = op2_size;
-           }
+           MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+
+         res_alloc = op2_size;
+         res_ptr = MPZ_REALLOC (res, res_alloc);
+
+         MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+                   op2_size - op1_size);
+         mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
+         res_size = op2_size;
  
           MPN_NORMALIZE (res_ptr, res_size);
           SIZ(res) = res_size;
@@ -145,8 +129,8 @@ mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
         {
           /* We should compute -OP1 ^ OP2.  Swap OP1 and OP2 and fall
              through to the code that handles OP1 ^ -OP2.  */
-          MPZ_SRCPTR_SWAP (op1, op2);
-          MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+         MPZ_SRCPTR_SWAP (op1, op2);
+         MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
         }
      }
  
@@ -177,24 +161,20 @@ mpz_xor (mpz_ptr res, mpz_srcptr op1, mpz_srcptr op2)
      if (op1_size > op2_size)
        {
         MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
-       for (i = op2_size - 1; i >= 0; i--)
-         res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+       mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op2_size);
         res_size = op1_size;
        }
      else
        {
         MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
-       for (i = op1_size - 1; i >= 0; i--)
-         res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+       if (LIKELY (op1_size != 0))
+         mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
         res_size = op2_size;
        }
  
      cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
-    if (cy)
-      {
-       res_ptr[res_size] = cy;
-       res_size++;
-      }
+    res_ptr[res_size] = cy;
+    res_size += (cy != 0);
  
      MPN_NORMALIZE (res_ptr, res_size);
      SIZ(res) = -res_size;
diff --git a/nextprime.c b/nextprime.c

index f3e80f6ddc966095d8693360ab7fa8edbeefc21c..f0b01d625d3cf0b98ba8473ad9641c17d7dc96fa 100644 (file)
--- a/nextprime.c
+++ b/nextprime.c
@@ -23,7 +23,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  License for more details.
  
  You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.  */
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /*
    Optimisation ideas:
diff --git a/primesieve.c b/primesieve.c

new file mode 100644 (file)

index 0000000..d1bf5a1
--- /dev/null
+++ b/primesieve.c
@@ -0,0 +1,284 @@
+/* primesieve (BIT_ARRAY, N) -- Fills the BIT_ARRAY with a mask for primes up to N.
+
+Contributed to the GNU project by Marco Bodrato.
+
+THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
+IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
+DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/**************************************************************/
+/* Section macros: common macros, for mswing/fac/bin (&sieve) */
+/**************************************************************/
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)                        \
+    __max_i = (end);                                           \
+                                                               \
+    do {                                                       \
+      ++__i;                                                   \
+      if (((sieve)[__index] & __mask) == 0)                    \
+       {                                                       \
+         (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)         \
+  do {                                                         \
+    mp_limb_t __mask, __index, __max_i, __i;                   \
+                                                               \
+    __i = (start)-(off);                                       \
+    __index = __i / GMP_LIMB_BITS;                             \
+    __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);            \
+    __i += (off);                                              \
+                                                               \
+    LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
+
+#define LOOP_ON_SIEVE_STOP                                     \
+       }                                                       \
+      __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);      \
+      __index += __mask & 1;                                   \
+    }  while (__i <= __max_i)                                  \
+
+#define LOOP_ON_SIEVE_END                                      \
+    LOOP_ON_SIEVE_STOP;                                                \
+  } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if 0
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+#if 0
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+#endif
+
+#if GMP_LIMB_BITS > 61
+#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
+#define SEED_LIMIT 202
+#else
+#if GMP_LIMB_BITS > 30
+#define SIEVE_SEED CNST_LIMB(0x69128480)
+#define SEED_LIMIT 114
+#else
+#if GMP_LIMB_BITS > 15
+#define SIEVE_SEED CNST_LIMB(0x8480)
+#define SEED_LIMIT 54
+#else
+#if GMP_LIMB_BITS > 7
+#define SIEVE_SEED CNST_LIMB(0x80)
+#define SEED_LIMIT 34
+#else
+#define SIEVE_SEED CNST_LIMB(0x0)
+#define SEED_LIMIT 24
+#endif /* 7 */
+#endif /* 15 */
+#endif /* 30 */
+#endif /* 61 */
+
+static void
+first_block_primesieve (mp_ptr bit_array, mp_limb_t n)
+{
+  mp_size_t bits, limbs;
+
+  ASSERT (n > 4);
+
+  bits  = n_to_bit(n);
+  limbs = bits / GMP_LIMB_BITS + 1;
+
+  /* FIXME: We can skip 5 too, filling with a 5-part pattern. */
+  MPN_ZERO (bit_array, limbs);
+  bit_array[0] = SIEVE_SEED;
+
+  if ((bits + 1) % GMP_LIMB_BITS != 0)
+    bit_array[limbs-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
+
+  if (n > SEED_LIMIT) {
+    mp_limb_t mask, index, i;
+
+    ASSERT (n > 49);
+
+    mask = 1;
+    index = 0;
+    i = 1;
+    do {
+      if ((bit_array[index] & mask) == 0)
+       {
+         mp_size_t step, lindex;
+         mp_limb_t lmask;
+         unsigned  maskrot;
+
+         step = id_to_n(i);
+/*       lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
+         lindex = i*(step+1)-1+(-(i&1)&(i+1));
+/*       lindex = i*(step+1+(i&1))-1+(i&1); */
+         if (lindex > bits)
+           break;
+
+         step <<= 1;
+         maskrot = step % GMP_LIMB_BITS;
+
+         lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+         do {
+           bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+           lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+           lindex += step;
+         } while (lindex <= bits);
+
+/*       lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */
+         lindex = i*(i*3+6)+(i&1);
+
+         lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+         for ( ; lindex <= bits; lindex += step) {
+           bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+           lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+         };
+       }
+      mask = mask << 1 | mask >> (GMP_LIMB_BITS-1);
+      index += mask & 1;
+      i++;
+    } while (1);
+  }
+}
+
+static void
+block_resieve (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset,
+                     mp_srcptr sieve, mp_limb_t sieve_bits)
+{
+  mp_size_t bits, step;
+
+  ASSERT (limbs > 0);
+
+  bits = limbs * GMP_LIMB_BITS - 1;
+
+  /* FIXME: We can skip 5 too, filling with a 5-part pattern. */
+  MPN_ZERO (bit_array, limbs);
+
+  LOOP_ON_SIEVE_BEGIN(step,0,sieve_bits,0,sieve);
+  {
+    mp_size_t lindex;
+    mp_limb_t lmask;
+    unsigned  maskrot;
+
+/*  lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
+    lindex = __i*(step+1)-1+(-(__i&1)&(__i+1));
+/*  lindex = __i*(step+1+(__i&1))-1+(__i&1); */
+    if (lindex > bits + offset)
+      break;
+
+    step <<= 1;
+    maskrot = step % GMP_LIMB_BITS;
+
+    if (lindex < offset)
+      lindex += step * ((offset - lindex - 1) / step + 1);
+
+    lindex -= offset;
+
+    lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+    for ( ; lindex <= bits; lindex += step) {
+      bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+      lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+    };
+
+/*  lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */
+    lindex = __i*(__i*3+6)+(__i&1);
+    if (lindex > bits + offset)
+      continue;
+
+    if (lindex < offset)
+      lindex += step * ((offset - lindex - 1) / step + 1);
+
+    lindex -= offset;
+
+    lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+    for ( ; lindex <= bits; lindex += step) {
+      bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+      lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+    };
+  }
+  LOOP_ON_SIEVE_END;
+}
+
+#define BLOCK_SIZE 2048
+
+/* Fills bit_array with the characteristic function of composite
+   numbers up to the parameter n. I.e. a bit set to "1" represent a
+   composite, a "0" represent a prime.
+
+   The primesieve_size(n) limbs pointed to by bit_array are
+   overwritten. The returned value counts prime integers in the
+   interval [4, n]. Note that n > 4.
+
+   Even numbers and multiples of 3 are excluded "a priori", only
+   numbers equivalent to +/- 1 mod 6 have their bit in the array.
+
+   Once sieved, if the bit b is ZERO it represent a prime, the
+   represented prime is bit_to_n(b), if the LSbit is bit 0, or
+   id_to_n(b), if you call "1" the first bit.
+ */
+
+mp_limb_t
+gmp_primesieve (mp_ptr bit_array, mp_limb_t n)
+{
+  mp_size_t size;
+  mp_limb_t bits;
+
+  ASSERT (n > 4);
+
+  bits = n_to_bit(n);
+  size = bits / GMP_LIMB_BITS + 1;
+
+  if (size > BLOCK_SIZE * 2) {
+    mp_size_t off;
+    off = BLOCK_SIZE + (size % BLOCK_SIZE);
+    first_block_primesieve (bit_array, id_to_n (off * GMP_LIMB_BITS));
+    for ( ; off < size; off += BLOCK_SIZE)
+      block_resieve (bit_array + off, BLOCK_SIZE, off * GMP_LIMB_BITS, bit_array, off * GMP_LIMB_BITS - 1);
+  } else {
+    first_block_primesieve (bit_array, n);
+  }
+
+  if ((bits + 1) % GMP_LIMB_BITS != 0)
+    bit_array[size-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
+
+
+  return size * GMP_LIMB_BITS - mpn_popcount (bit_array, size);
+}
+
+#undef BLOCK_SIZE
+#undef SEED_LIMIT
+#undef SIEVE_SEED
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
diff --git a/printf/Makefile.in b/printf/Makefile.in

index ea8543783da63aa278f703544012301614a9ebe2..9702b8073b338a9771bb711c71ee10a797e98ccf 100644 (file)
--- a/printf/Makefile.in
+++ b/printf/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -33,6 +33,23 @@
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -51,12 +68,11 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  subdir = printf
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -65,12 +81,12 @@ CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  LTLIBRARIES = $(noinst_LTLIBRARIES)
  libprintf_la_LIBADD =
-am_libprintf_la_OBJECTS = asprintf$U.lo asprntffuns$U.lo doprnt$U.lo \
-       doprntf$U.lo doprnti$U.lo fprintf$U.lo obprintf$U.lo \
-       obvprintf$U.lo obprntffuns$U.lo printf$U.lo printffuns$U.lo \
-       snprintf$U.lo snprntffuns$U.lo sprintf$U.lo sprintffuns$U.lo \
-       vasprintf$U.lo vfprintf$U.lo vprintf$U.lo vsnprintf$U.lo \
-       vsprintf$U.lo repl-vsnprintf$U.lo
+am_libprintf_la_OBJECTS = asprintf.lo asprntffuns.lo doprnt.lo \
+       doprntf.lo doprnti.lo fprintf.lo obprintf.lo obvprintf.lo \
+       obprntffuns.lo printf.lo printffuns.lo snprintf.lo \
+       snprntffuns.lo sprintf.lo sprintffuns.lo vasprintf.lo \
+       vfprintf.lo vprintf.lo vsnprintf.lo vsprintf.lo \
+       repl-vsnprintf.lo
  libprintf_la_OBJECTS = $(am_libprintf_la_OBJECTS)
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
  depcomp =
@@ -86,6 +102,11 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(libprintf_la_SOURCES)
  DIST_SOURCES = $(libprintf_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -187,8 +208,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -235,7 +256,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -301,7 +321,7 @@ clean-noinstLTLIBRARIES:
           echo "rm -f \"$${dir}/so_locations\""; \
           rm -f "$${dir}/so_locations"; \
         done
-libprintf.la: $(libprintf_la_OBJECTS) $(libprintf_la_DEPENDENCIES) 
+libprintf.la: $(libprintf_la_OBJECTS) $(libprintf_la_DEPENDENCIES) $(EXTRA_libprintf_la_DEPENDENCIES) 
         $(LINK)  $(libprintf_la_OBJECTS) $(libprintf_la_LIBADD) $(LIBS)
  
  mostlyclean-compile:
@@ -309,11 +329,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -323,60 +338,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-asprintf_.c: asprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/asprintf.c; then echo $(srcdir)/asprintf.c; else echo asprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-asprntffuns_.c: asprntffuns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/asprntffuns.c; then echo $(srcdir)/asprntffuns.c; else echo asprntffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-doprnt_.c: doprnt.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doprnt.c; then echo $(srcdir)/doprnt.c; else echo doprnt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-doprntf_.c: doprntf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doprntf.c; then echo $(srcdir)/doprntf.c; else echo doprntf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-doprnti_.c: doprnti.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doprnti.c; then echo $(srcdir)/doprnti.c; else echo doprnti.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fprintf_.c: fprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fprintf.c; then echo $(srcdir)/fprintf.c; else echo fprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-obprintf_.c: obprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/obprintf.c; then echo $(srcdir)/obprintf.c; else echo obprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-obprntffuns_.c: obprntffuns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/obprntffuns.c; then echo $(srcdir)/obprntffuns.c; else echo obprntffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-obvprintf_.c: obvprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/obvprintf.c; then echo $(srcdir)/obvprintf.c; else echo obvprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-printf_.c: printf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/printf.c; then echo $(srcdir)/printf.c; else echo printf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-printffuns_.c: printffuns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/printffuns.c; then echo $(srcdir)/printffuns.c; else echo printffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-repl-vsnprintf_.c: repl-vsnprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/repl-vsnprintf.c; then echo $(srcdir)/repl-vsnprintf.c; else echo repl-vsnprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-snprintf_.c: snprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/snprintf.c; then echo $(srcdir)/snprintf.c; else echo snprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-snprntffuns_.c: snprntffuns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/snprntffuns.c; then echo $(srcdir)/snprntffuns.c; else echo snprntffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sprintf_.c: sprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sprintf.c; then echo $(srcdir)/sprintf.c; else echo sprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sprintffuns_.c: sprintffuns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sprintffuns.c; then echo $(srcdir)/sprintffuns.c; else echo sprintffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vasprintf_.c: vasprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vasprintf.c; then echo $(srcdir)/vasprintf.c; else echo vasprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vfprintf_.c: vfprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vfprintf.c; then echo $(srcdir)/vfprintf.c; else echo vfprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vprintf_.c: vprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vprintf.c; then echo $(srcdir)/vprintf.c; else echo vprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vsnprintf_.c: vsnprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vsnprintf.c; then echo $(srcdir)/vsnprintf.c; else echo vsnprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vsprintf_.c: vsprintf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vsprintf.c; then echo $(srcdir)/vsprintf.c; else echo vsprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-asprintf_.$(OBJEXT) asprintf_.lo asprntffuns_.$(OBJEXT) \
-asprntffuns_.lo doprnt_.$(OBJEXT) doprnt_.lo doprntf_.$(OBJEXT) \
-doprntf_.lo doprnti_.$(OBJEXT) doprnti_.lo fprintf_.$(OBJEXT) \
-fprintf_.lo obprintf_.$(OBJEXT) obprintf_.lo obprntffuns_.$(OBJEXT) \
-obprntffuns_.lo obvprintf_.$(OBJEXT) obvprintf_.lo printf_.$(OBJEXT) \
-printf_.lo printffuns_.$(OBJEXT) printffuns_.lo \
-repl-vsnprintf_.$(OBJEXT) repl-vsnprintf_.lo snprintf_.$(OBJEXT) \
-snprintf_.lo snprntffuns_.$(OBJEXT) snprntffuns_.lo sprintf_.$(OBJEXT) \
-sprintf_.lo sprintffuns_.$(OBJEXT) sprintffuns_.lo \
-vasprintf_.$(OBJEXT) vasprintf_.lo vfprintf_.$(OBJEXT) vfprintf_.lo \
-vprintf_.$(OBJEXT) vprintf_.lo vsnprintf_.$(OBJEXT) vsnprintf_.lo \
-vsprintf_.$(OBJEXT) vsprintf_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -480,10 +441,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -551,7 +517,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -564,7 +530,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool clean-noinstLTLIBRARIES ctags distclean \
@@ -576,9 +542,8 @@ uninstall-am:
         install-pdf install-pdf-am install-ps install-ps-am \
         install-strip installcheck installcheck-am installdirs \
         maintainer-clean maintainer-clean-generic mostlyclean \
-       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
-       uninstall-am
+       mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+       pdf pdf-am ps ps-am tags uninstall uninstall-am
  
  
  # Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/printf/doprntf.c b/printf/doprntf.c

index 0a1928b26ecf09359faf8b2a57c85eb83a624e8d..49e701304d20e0e03429478a8fe10b4283bbb56e 100644 (file)
--- a/printf/doprntf.c
+++ b/printf/doprntf.c
@@ -36,6 +36,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "gmp.h"
  #include "gmp-impl.h"
+#include "longlong.h"
  
  
  /* change this to "#define TRACE(x) x" for diagnostics */
diff --git a/printf/printffuns.c b/printf/printffuns.c

index 4f4e74d989f0c0cb37b043a83ffb72bd509109f8..89c9589fce51a837b10f80ace80a980732653496 100644 (file)
--- a/printf/printffuns.c
+++ b/printf/printffuns.c
@@ -37,7 +37,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /* SunOS 4 stdio.h doesn't provide a prototype for this */
  #if ! HAVE_DECL_VFPRINTF
-int vfprintf __GMP_PROTO ((FILE *, const char *, va_list));
+int vfprintf (FILE *, const char *, va_list);
  #endif
  
  
diff --git a/rand.c b/rand.c

deleted file mode 100644 (file)

index 31969b2..0000000
--- a/rand.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/* gmp_randinit (state, algorithm, ...) -- Initialize a random state.
-
-Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "config.h"
-
-#include <stdio.h> /* for NULL */
-
-#if HAVE_STDARG
-#include <stdarg.h>
-#else
-#include <varargs.h>
-#endif
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-#if HAVE_STDARG
-gmp_randinit (gmp_randstate_t rstate,
-             gmp_randalg_t alg,
-             ...)
-#else
-gmp_randinit (va_alist)
-     va_dcl
-#endif
-{
-  va_list ap;
-#if HAVE_STDARG
-  va_start (ap, alg);
-#else
-  __gmp_randstate_struct *rstate;
-  gmp_randalg_t alg;
-  va_start (ap);
-  rstate = va_arg (ap, __gmp_randstate_struct *);
-  alg = va_arg (ap, gmp_randalg_t);
-#endif
-
-  switch (alg) {
-  case GMP_RAND_ALG_LC:
-    if (! gmp_randinit_lc_2exp_size (rstate, va_arg (ap, unsigned long)))
-      gmp_errno |= GMP_ERROR_INVALID_ARGUMENT;
-    break;
-  default:
-    gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
-    break;
-  }
-  va_end (ap);
-}
diff --git a/rand/Makefile.am b/rand/Makefile.am

new file mode 100644 (file)

index 0000000..844bd81
--- /dev/null
+++ b/rand/Makefile.am
@@ -0,0 +1,27 @@
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2001, 2002, 2010 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+
+noinst_LTLIBRARIES = librandom.la
+
+librandom_la_SOURCES = randmt.h                                                \
+  rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
+  randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c
diff --git a/rand/Makefile.in b/rand/Makefile.in

new file mode 100644 (file)

index 0000000..b47b4b8
--- /dev/null
+++ b/rand/Makefile.in
@@ -0,0 +1,545 @@
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2001, 2002, 2010 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = rand
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+       $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+       $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+librandom_la_LIBADD =
+am_librandom_la_OBJECTS = rand.lo randclr.lo randdef.lo randiset.lo \
+       randlc2s.lo randlc2x.lo randmt.lo randmts.lo rands.lo \
+       randsd.lo randsdui.lo randbui.lo randmui.lo
+librandom_la_OBJECTS = $(am_librandom_la_OBJECTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+       $(LDFLAGS) -o $@
+SOURCES = $(librandom_la_SOURCES)
+DIST_SOURCES = $(librandom_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+noinst_LTLIBRARIES = librandom.la
+librandom_la_SOURCES = randmt.h                                                \
+  rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
+  randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+       @for dep in $?; do \
+         case '$(am__configure_deps)' in \
+           *$$dep*) \
+             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+               && { if test -f $@; then exit 0; else break; fi; }; \
+             exit 1;; \
+         esac; \
+       done; \
+       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps rand/Makefile'; \
+       $(am__cd) $(top_srcdir) && \
+         $(AUTOMAKE) --gnu --ignore-deps rand/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+       @case '$?' in \
+         *config.status*) \
+           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+         *) \
+           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+       esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+       -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+       @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+         dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+         test "$$dir" != "$$p" || dir=.; \
+         echo "rm -f \"$${dir}/so_locations\""; \
+         rm -f "$${dir}/so_locations"; \
+       done
+librandom.la: $(librandom_la_OBJECTS) $(librandom_la_DEPENDENCIES) $(EXTRA_librandom_la_DEPENDENCIES) 
+       $(LINK)  $(librandom_la_OBJECTS) $(librandom_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+       -rm -f *.$(OBJEXT)
+
+distclean-compile:
+       -rm -f *.tab.c
+
+.c.o:
+       $(COMPILE) -c $<
+
+.c.obj:
+       $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+       $(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+       -rm -f *.lo
+
+clean-libtool:
+       -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       mkid -fID $$unique
+tags: TAGS
+
+TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       set x; \
+       here=`pwd`; \
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       shift; \
+       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+         test -n "$$unique" || unique=$$empty_fix; \
+         if test $$# -gt 0; then \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             "$$@" $$unique; \
+         else \
+           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+             $$unique; \
+         fi; \
+       fi
+ctags: CTAGS
+CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
+               $(TAGS_FILES) $(LISP)
+       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
+       unique=`for i in $$list; do \
+           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+         done | \
+         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+             END { if (nonempty) { for (i in files) print i; }; }'`; \
+       test -z "$(CTAGS_ARGS)$$unique" \
+         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+            $$unique
+
+GTAGS:
+       here=`$(am__cd) $(top_builddir) && pwd` \
+         && $(am__cd) $(top_srcdir) \
+         && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+       list='$(DISTFILES)'; \
+         dist_files=`for file in $$list; do echo $$file; done | \
+         sed -e "s|^$$srcdirstrip/||;t" \
+             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+       case $$dist_files in \
+         */*) $(MKDIR_P) `echo "$$dist_files" | \
+                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+                          sort -u` ;; \
+       esac; \
+       for file in $$dist_files; do \
+         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+         if test -d $$d/$$file; then \
+           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+           if test -d "$(distdir)/$$file"; then \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+           fi; \
+           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+         else \
+           test -f "$(distdir)/$$file" \
+           || cp -p $$d/$$file "$(distdir)/$$file" \
+           || exit 1; \
+         fi; \
+       done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+       @echo "This command is intended for maintainers to use"
+       @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+       mostlyclean-am
+
+distclean: distclean-am
+       -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+       distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+       -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+       mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+       clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+       distclean-compile distclean-generic distclean-libtool \
+       distclean-tags distdir dvi dvi-am html html-am info info-am \
+       install install-am install-data install-data-am install-dvi \
+       install-dvi-am install-exec install-exec-am install-html \
+       install-html-am install-info install-info-am install-man \
+       install-pdf install-pdf-am install-ps install-ps-am \
+       install-strip installcheck installcheck-am installdirs \
+       maintainer-clean maintainer-clean-generic mostlyclean \
+       mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+       pdf pdf-am ps ps-am tags uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/rand/rand.c b/rand/rand.c

new file mode 100644 (file)

index 0000000..31969b2
--- /dev/null
+++ b/rand/rand.c
@@ -0,0 +1,64 @@
+/* gmp_randinit (state, algorithm, ...) -- Initialize a random state.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h> /* for NULL */
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if HAVE_STDARG
+gmp_randinit (gmp_randstate_t rstate,
+             gmp_randalg_t alg,
+             ...)
+#else
+gmp_randinit (va_alist)
+     va_dcl
+#endif
+{
+  va_list ap;
+#if HAVE_STDARG
+  va_start (ap, alg);
+#else
+  __gmp_randstate_struct *rstate;
+  gmp_randalg_t alg;
+  va_start (ap);
+  rstate = va_arg (ap, __gmp_randstate_struct *);
+  alg = va_arg (ap, gmp_randalg_t);
+#endif
+
+  switch (alg) {
+  case GMP_RAND_ALG_LC:
+    if (! gmp_randinit_lc_2exp_size (rstate, va_arg (ap, unsigned long)))
+      gmp_errno |= GMP_ERROR_INVALID_ARGUMENT;
+    break;
+  default:
+    gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+    break;
+  }
+  va_end (ap);
+}
diff --git a/rand/randbui.c b/rand/randbui.c

new file mode 100644 (file)

index 0000000..5fc49b2
--- /dev/null
+++ b/rand/randbui.c
@@ -0,0 +1,46 @@
+/* gmp_urandomb_ui -- random bits returned in a ulong.
+
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Currently bits>=BITS_PER_ULONG is quietly truncated to BITS_PER_ULONG,
+   maybe this should raise an exception or something.  */
+
+unsigned long
+gmp_urandomb_ui (gmp_randstate_ptr rstate, unsigned long bits)
+{
+  mp_limb_t  a[LIMBS_PER_ULONG];
+
+  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
+     all, or if bits <= GMP_NUMB_BITS then it will store only a[0] */
+  a[0] = 0;
+#if LIMBS_PER_ULONG > 1
+  a[1] = 0;
+#endif
+
+  _gmp_rand (a, rstate, MIN (bits, BITS_PER_ULONG));
+
+#if LIMBS_PER_ULONG == 1
+  return a[0];
+#else
+  return a[0] | (a[1] << GMP_NUMB_BITS);
+#endif
+}
diff --git a/rand/randclr.c b/rand/randclr.c

new file mode 100644 (file)

index 0000000..a4e8242
--- /dev/null
+++ b/rand/randclr.c
@@ -0,0 +1,27 @@
+/* gmp_randclear (state) -- Clear and deallocate random state STATE.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randclear (gmp_randstate_t rstate)
+{
+  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randclear_fn) (rstate);
+}
diff --git a/rand/randdef.c b/rand/randdef.c

new file mode 100644 (file)

index 0000000..171a0bd
--- /dev/null
+++ b/rand/randdef.c
@@ -0,0 +1,27 @@
+/* gmp_randinit_default -- initialize a random state with a default algorithm.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randinit_default (gmp_randstate_t rstate)
+{
+  gmp_randinit_mt (rstate);
+}
diff --git a/rand/randiset.c b/rand/randiset.c

new file mode 100644 (file)

index 0000000..f140a33
--- /dev/null
+++ b/rand/randiset.c
@@ -0,0 +1,28 @@
+/* gmp_randinit_set -- initialize with a copy of another gmp_randstate_t.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+gmp_randinit_set (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+  (*((gmp_randfnptr_t *) RNG_FNPTR (src))->randiset_fn) (dst, src);
+}
diff --git a/rand/randlc2s.c b/rand/randlc2s.c

new file mode 100644 (file)

index 0000000..4dcde73
--- /dev/null
+++ b/rand/randlc2s.c
@@ -0,0 +1,82 @@
+/* gmp_randinit_lc_2exp_size -- initialize a random state with a linear
+   congruential generator of a requested size.
+
+Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Array of LC-schemes, ordered in increasing order of the first
+   member (the 'm2exp' value).  The end of the array is indicated with
+   an entry containing all zeros.  */
+
+/* All multipliers are in the range 0.01*m and 0.99*m, and are
+congruent to 5 (mod 8).
+They all pass the spectral test with Vt >= 2^(30/t) and merit >= 1.
+(Up to and including 196 bits, merit is >= 3.)  */
+
+struct __gmp_rand_lc_scheme_struct
+{
+  unsigned long int m2exp;     /* Modulus is 2 ^ m2exp. */
+  const char *astr;            /* Multiplier in string form. */
+  unsigned long int c;         /* Addend. */
+};
+
+static const struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] =
+{
+  {32, "29CF535",           1},
+  {33, "51F666D",           1},
+  {34, "A3D73AD",           1},
+  {35, "147E5B85",          1},
+  {36, "28F725C5",          1},
+  {37, "51EE3105",          1},
+  {38, "A3DD5CDD",          1},
+  {39, "147AF833D",         1},
+  {40, "28F5DA175",         1},
+  {56, "AA7D735234C0DD",  1},
+  {64, "BAECD515DAF0B49D", 1},
+  {100, "292787EBD3329AD7E7575E2FD", 1},
+  {128, "48A74F367FA7B5C8ACBB36901308FA85", 1},
+  {156, "78A7FDDDC43611B527C3F1D760F36E5D7FC7C45", 1},
+  {196, "41BA2E104EE34C66B3520CE706A56498DE6D44721E5E24F5", 1},
+  {200, "4E5A24C38B981EAFE84CD9D0BEC48E83911362C114F30072C5", 1},
+  {256, "AF66BA932AAF58A071FD8F0742A99A0C76982D648509973DB802303128A14CB5", 1},
+  {0, NULL, 0}                 /* End of array. */
+};
+
+int
+gmp_randinit_lc_2exp_size (gmp_randstate_t rstate, mp_bitcnt_t size)
+{
+  const struct __gmp_rand_lc_scheme_struct *sp;
+  mpz_t a;
+
+  /* Pick a scheme.  */
+  for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++)
+    if (sp->m2exp / 2 >= size)
+      goto found;
+  return 0;
+
+ found:
+  /* Install scheme.  */
+  mpz_init_set_str (a, sp->astr, 16);
+  gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp);
+  mpz_clear (a);
+  return 1;
+}
diff --git a/rand/randlc2x.c b/rand/randlc2x.c

new file mode 100644 (file)

index 0000000..de89a78
--- /dev/null
+++ b/rand/randlc2x.c
@@ -0,0 +1,322 @@
+/* Linear Congruential pseudo-random number generator functions.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* State structure for LC, the RNG_STATE() pointer in a gmp_randstate_t.
+
+   _mp_seed holds the current seed value, in the range 0 to 2^m2exp-1.
+   SIZ(_mp_seed) is fixed at BITS_TO_LIMBS(_mp_m2exp) and the value is
+   padded with high zero limbs if necessary.  ALLOC(_mp_seed) is the current
+   size of PTR(_mp_seed) in the usual way.  There only needs to be
+   BITS_TO_LIMBS(_mp_m2exp) allocated, but the mpz functions in the
+   initialization and seeding end up making it a bit more than this.
+
+   _mp_a is the "a" multiplier, in the range 0 to 2^m2exp-1.  SIZ(_mp_a) is
+   the size of the value in the normal way for an mpz_t, except that a value
+   of zero is held with SIZ(_mp_a)==1 and PTR(_mp_a)[0]==0.  This makes it
+   easy to call mpn_mul, and the case of a==0 is highly un-random and not
+   worth any trouble to optimize.
+
+   {_cp,_cn} is the "c" addend.  Normally _cn is 1, but when nails are in
+   use a ulong can be bigger than one limb, and in this case _cn is 2 if
+   necessary.  c==0 is stored as _cp[0]==0 and _cn==1, which makes it easy
+   to call __GMPN_ADD.  c==0 is fairly un-random so isn't worth optimizing.
+
+   _mp_m2exp gives the modulus, namely 2^m2exp.  We demand m2exp>=1, since
+   m2exp==0 would mean no bits at all out of each iteration, which makes no
+   sense.  */
+
+typedef struct {
+  mpz_t          _mp_seed;
+  mpz_t          _mp_a;
+  mp_size_t      _cn;
+  mp_limb_t      _cp[LIMBS_PER_ULONG];
+  unsigned long  _mp_m2exp;
+} gmp_rand_lc_struct;
+
+
+/* lc (rp, state) -- Generate next number in LC sequence.  Return the
+   number of valid bits in the result.  Discards the lower half of the
+   result.  */
+
+static unsigned long int
+lc (mp_ptr rp, gmp_randstate_t rstate)
+{
+  mp_ptr tp, seedp, ap;
+  mp_size_t ta;
+  mp_size_t tn, seedn, an;
+  unsigned long int m2exp;
+  unsigned long int bits;
+  int cy;
+  mp_size_t xn;
+  gmp_rand_lc_struct *p;
+  TMP_DECL;
+
+  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+  m2exp = p->_mp_m2exp;
+
+  seedp = PTR (p->_mp_seed);
+  seedn = SIZ (p->_mp_seed);
+
+  ap = PTR (p->_mp_a);
+  an = SIZ (p->_mp_a);
+
+  /* Allocate temporary storage.  Let there be room for calculation of
+     (A * seed + C) % M, or M if bigger than that.  */
+
+  TMP_MARK;
+
+  ta = an + seedn + 1;
+  tn = BITS_TO_LIMBS (m2exp);
+  if (ta <= tn) /* that is, if (ta < tn + 1) */
+    {
+      mp_size_t tmp = an + seedn;
+      ta = tn + 1;
+      tp = TMP_ALLOC_LIMBS (ta);
+      MPN_ZERO (&tp[tmp], ta - tmp); /* mpn_mul won't zero it out.  */
+    }
+  else
+    tp = TMP_ALLOC_LIMBS (ta);
+
+  /* t = a * seed.  NOTE: an is always > 0; see initialization.  */
+  ASSERT (seedn >= an && an > 0);
+  mpn_mul (tp, seedp, seedn, ap, an);
+
+  /* t = t + c.  NOTE: tn is always >= p->_cn (precondition for __GMPN_ADD);
+     see initialization.  */
+  ASSERT (tn >= p->_cn);
+  __GMPN_ADD (cy, tp, tp, tn, p->_cp, p->_cn);
+
+  /* t = t % m */
+  tp[m2exp / GMP_NUMB_BITS] &= (CNST_LIMB (1) << m2exp % GMP_NUMB_BITS) - 1;
+
+  /* Save result as next seed.  */
+  MPN_COPY (PTR (p->_mp_seed), tp, tn);
+
+  /* Discard the lower m2exp/2 of the result.  */
+  bits = m2exp / 2;
+  xn = bits / GMP_NUMB_BITS;
+
+  tn -= xn;
+  if (tn > 0)
+    {
+      unsigned int cnt = bits % GMP_NUMB_BITS;
+      if (cnt != 0)
+       {
+         mpn_rshift (tp, tp + xn, tn, cnt);
+         MPN_COPY_INCR (rp, tp, xn + 1);
+       }
+      else                     /* Even limb boundary.  */
+       MPN_COPY_INCR (rp, tp + xn, tn);
+    }
+
+  TMP_FREE;
+
+  /* Return number of valid bits in the result.  */
+  return (m2exp + 1) / 2;
+}
+
+
+/* Obtain a sequence of random numbers.  */
+static void
+randget_lc (gmp_randstate_t rstate, mp_ptr rp, unsigned long int nbits)
+{
+  unsigned long int rbitpos;
+  int chunk_nbits;
+  mp_ptr tp;
+  mp_size_t tn;
+  gmp_rand_lc_struct *p;
+  TMP_DECL;
+
+  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+  TMP_MARK;
+
+  chunk_nbits = p->_mp_m2exp / 2;
+  tn = BITS_TO_LIMBS (chunk_nbits);
+
+  tp = TMP_ALLOC_LIMBS (tn);
+
+  rbitpos = 0;
+  while (rbitpos + chunk_nbits <= nbits)
+    {
+      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
+
+      if (rbitpos % GMP_NUMB_BITS != 0)
+       {
+         mp_limb_t savelimb, rcy;
+         /* Target of new chunk is not bit aligned.  Use temp space
+            and align things by shifting it up.  */
+         lc (tp, rstate);
+         savelimb = r2p[0];
+         rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
+         r2p[0] |= savelimb;
+         /* bogus */
+         if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS)
+             > GMP_NUMB_BITS)
+           r2p[tn] = rcy;
+       }
+      else
+       {
+         /* Target of new chunk is bit aligned.  Let `lc' put bits
+            directly into our target variable.  */
+         lc (r2p, rstate);
+       }
+      rbitpos += chunk_nbits;
+    }
+
+  /* Handle last [0..chunk_nbits) bits.  */
+  if (rbitpos != nbits)
+    {
+      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
+      int last_nbits = nbits - rbitpos;
+      tn = BITS_TO_LIMBS (last_nbits);
+      lc (tp, rstate);
+      if (rbitpos % GMP_NUMB_BITS != 0)
+       {
+         mp_limb_t savelimb, rcy;
+         /* Target of new chunk is not bit aligned.  Use temp space
+            and align things by shifting it up.  */
+         savelimb = r2p[0];
+         rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
+         r2p[0] |= savelimb;
+         if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits)
+           r2p[tn] = rcy;
+       }
+      else
+       {
+         MPN_COPY (r2p, tp, tn);
+       }
+      /* Mask off top bits if needed.  */
+      if (nbits % GMP_NUMB_BITS != 0)
+       rp[nbits / GMP_NUMB_BITS]
+         &= ~(~CNST_LIMB (0) << nbits % GMP_NUMB_BITS);
+    }
+
+  TMP_FREE;
+}
+
+
+static void
+randseed_lc (gmp_randstate_t rstate, mpz_srcptr seed)
+{
+  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+  mpz_ptr seedz = p->_mp_seed;
+  mp_size_t seedn = BITS_TO_LIMBS (p->_mp_m2exp);
+
+  /* Store p->_mp_seed as an unnormalized integer with size enough
+     for numbers up to 2^m2exp-1.  That size can't be zero.  */
+  mpz_fdiv_r_2exp (seedz, seed, p->_mp_m2exp);
+  MPN_ZERO (&PTR (seedz)[SIZ (seedz)], seedn - SIZ (seedz));
+  SIZ (seedz) = seedn;
+}
+
+
+static void
+randclear_lc (gmp_randstate_t rstate)
+{
+  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+  mpz_clear (p->_mp_seed);
+  mpz_clear (p->_mp_a);
+  (*__gmp_free_func) (p, sizeof (gmp_rand_lc_struct));
+}
+
+static void randiset_lc (gmp_randstate_ptr, gmp_randstate_srcptr);
+
+static const gmp_randfnptr_t Linear_Congruential_Generator = {
+  randseed_lc,
+  randget_lc,
+  randclear_lc,
+  randiset_lc
+};
+
+static void
+randiset_lc (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+  gmp_rand_lc_struct *dstp, *srcp;
+
+  srcp = (gmp_rand_lc_struct *) RNG_STATE (src);
+  dstp = (gmp_rand_lc_struct *) (*__gmp_allocate_func) (sizeof (gmp_rand_lc_struct));
+
+  RNG_STATE (dst) = (mp_limb_t *) (void *) dstp;
+  RNG_FNPTR (dst) = (void *) &Linear_Congruential_Generator;
+
+  /* _mp_seed and _mp_a might be unnormalized (high zero limbs), but
+     mpz_init_set won't worry about that */
+  mpz_init_set (dstp->_mp_seed, srcp->_mp_seed);
+  mpz_init_set (dstp->_mp_a,    srcp->_mp_a);
+
+  dstp->_cn = srcp->_cn;
+
+  dstp->_cp[0] = srcp->_cp[0];
+  if (LIMBS_PER_ULONG > 1)
+    dstp->_cp[1] = srcp->_cp[1];
+  if (LIMBS_PER_ULONG > 2)  /* usually there's only 1 or 2 */
+    MPN_COPY (dstp->_cp + 2, srcp->_cp + 2, LIMBS_PER_ULONG - 2);
+
+  dstp->_mp_m2exp = srcp->_mp_m2exp;
+}
+
+
+void
+gmp_randinit_lc_2exp (gmp_randstate_t rstate,
+                     mpz_srcptr a,
+                     unsigned long int c,
+                     mp_bitcnt_t m2exp)
+{
+  gmp_rand_lc_struct *p;
+  mp_size_t seedn = BITS_TO_LIMBS (m2exp);
+
+  ASSERT_ALWAYS (m2exp != 0);
+
+  p = __GMP_ALLOCATE_FUNC_TYPE (1, gmp_rand_lc_struct);
+  RNG_STATE (rstate) = (mp_limb_t *) (void *) p;
+  RNG_FNPTR (rstate) = (void *) &Linear_Congruential_Generator;
+
+  /* allocate m2exp bits of space for p->_mp_seed, and initial seed "1" */
+  mpz_init2 (p->_mp_seed, m2exp);
+  MPN_ZERO (PTR (p->_mp_seed), seedn);
+  SIZ (p->_mp_seed) = seedn;
+  PTR (p->_mp_seed)[0] = 1;
+
+  /* "a", forced to 0 to 2^m2exp-1 */
+  mpz_init (p->_mp_a);
+  mpz_fdiv_r_2exp (p->_mp_a, a, m2exp);
+
+  /* Avoid SIZ(a) == 0 to avoid checking for special case in lc().  */
+  if (SIZ (p->_mp_a) == 0)
+    {
+      SIZ (p->_mp_a) = 1;
+      PTR (p->_mp_a)[0] = CNST_LIMB (0);
+    }
+
+  MPN_SET_UI (p->_cp, p->_cn, c);
+
+  /* Internally we may discard any bits of c above m2exp.  The following
+     code ensures that __GMPN_ADD in lc() will always work.  */
+  if (seedn < p->_cn)
+    p->_cn = (p->_cp[0] != 0);
+
+  p->_mp_m2exp = m2exp;
+}
diff --git a/rand/randmt.c b/rand/randmt.c

new file mode 100644 (file)

index 0000000..3137bb5
--- /dev/null
+++ b/rand/randmt.c
@@ -0,0 +1,405 @@
+/* Mersenne Twister pseudo-random number generator functions.
+
+   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
+   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+   FUTURE GNU MP RELEASES.
+
+Copyright 2002, 2003, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>   /* for NULL */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "randmt.h"
+
+
+/* This code implements the Mersenne Twister pseudorandom number generator
+   by Takuji Nishimura and Makoto Matsumoto.  The buffer initialization
+   function is different in order to permit seeds greater than 2^32-1.
+
+   This file contains a special __gmp_randinit_mt_noseed which excludes the
+   seeding function from the gmp_randfnptr_t routines.  This is for use by
+   mpn_random and mpn_random2 on the global random generator.  MT seeding
+   uses mpz functions, and we don't want mpn routines dragging mpz functions
+   into the link.  */
+
+
+/* Default seed to use when the generator is not initialized.  */
+#define DEFAULT_SEED 5489 /* was 4357 */
+
+/* Tempering masks.  */
+#define MASK_1 0x9D2C5680
+#define MASK_2 0xEFC60000
+
+/* Initial state of buffer when initialized with default seed.  */
+static const gmp_uint_least32_t default_state[N] =
+{
+  0xD247B233,0x9E5AA8F1,0x0FFA981B,0x9DCB0980,0x74200F2B,0xA576D044,
+  0xE9F05ADF,0x1538BFF5,0x59818BBF,0xCF9E58D8,0x09FCE032,0x6A1C663F,
+  0x5116E78A,0x69B3E0FA,0x6D92D665,0xD0A8BE98,0xF669B734,0x41AC1B68,
+  0x630423F1,0x4B8D6B8A,0xC2C46DD7,0x5680747D,0x43703E8F,0x3B6103D2,
+  0x49E5EB3F,0xCBDAB4C1,0x9C988E23,0x747BEE0B,0x9111E329,0x9F031B5A,
+  0xECCA71B9,0x2AFE4EF8,0x8421C7ED,0xAC89AFF1,0xAED90DF3,0x2DD74F01,
+  0x14906A13,0x75873FA9,0xFF83F877,0x5028A0C9,0x11B4C41D,0x7CAEDBC4,
+  0x8672D0A7,0x48A7C109,0x8320E59F,0xBC0B3D5F,0x75A30886,0xF9E0D128,
+  0x41AF7580,0x239BB94D,0xC67A3C81,0x74EEBD6E,0xBC02B53C,0x727EA449,
+  0x6B8A2806,0x5853B0DA,0xBDE032F4,0xCE234885,0x320D6145,0x48CC053F,
+  0x00DBC4D2,0xD55A2397,0xE1059B6F,0x1C3E05D1,0x09657C64,0xD07CB661,
+  0x6E982E34,0x6DD1D777,0xEDED1071,0xD79DFD65,0xF816DDCE,0xB6FAF1E4,
+  0x1C771074,0x311835BD,0x18F952F7,0xF8F40350,0x4ECED354,0x7C8AC12B,
+  0x31A9994D,0x4FD47747,0xDC227A23,0x6DFAFDDF,0x6796E748,0x0C6F634F,
+  0xF992FA1D,0x4CF670C9,0x067DFD31,0xA7A3E1A5,0x8CD7D9DF,0x972CCB34,
+  0x67C82156,0xD548F6A8,0x045CEC21,0xF3240BFB,0xDEF656A7,0x43DE08C5,
+  0xDAD1F92F,0x3726C56B,0x1409F19A,0x942FD147,0xB926749C,0xADDC31B8,
+  0x53D0D869,0xD1BA52FE,0x6722DF8C,0x22D95A74,0x7DC1B52A,0x1DEC6FD5,
+  0x7262874D,0x0A725DC9,0xE6A8193D,0xA052835A,0xDC9AD928,0xE59EBB90,
+  0x70DBA9FF,0xD612749D,0x5A5A638C,0x6086EC37,0x2A579709,0x1449EA3A,
+  0xBC8E3C06,0x2F900666,0xFBE74FD1,0x6B35B911,0xF8335008,0xEF1E979D,
+  0x738AB29D,0xA2DC0FDC,0x7696305D,0xF5429DAC,0x8C41813B,0x8073E02E,
+  0xBEF83CCD,0x7B50A95A,0x05EE5862,0x00829ECE,0x8CA1958C,0xBE4EA2E2,
+  0x4293BB73,0x656F7B23,0x417316D8,0x4467D7CF,0x2200E63B,0x109050C8,
+  0x814CBE47,0x36B1D4A8,0x36AF9305,0x308327B3,0xEBCD7344,0xA738DE27,
+  0x5A10C399,0x4142371D,0x64A18528,0x0B31E8B2,0x641057B9,0x6AFC363B,
+  0x108AD953,0x9D4DA234,0x0C2D9159,0x1C8A1A1F,0x310C66BA,0x87AA1070,
+  0xDAC832FF,0x0A433422,0x7AF15812,0x2D8D9BD0,0x995A25E9,0x25326CAC,
+  0xA34384DB,0x4C8421CC,0x4F0315EC,0x29E8649E,0xA7732D6F,0x2E94D3E3,
+  0x7D98A340,0x397C4D74,0x659DB4DE,0x747D4E9A,0xD9DB8435,0x4659DBE9,
+  0x313E6DC5,0x29D104DC,0x9F226CBA,0x452F18B0,0xD0BC5068,0x844CA299,
+  0x782B294E,0x4AE2EB7B,0xA4C475F8,0x70A81311,0x4B3E8BCC,0x7E20D4BA,
+  0xABCA33C9,0x57BE2960,0x44F9B419,0x2E567746,0x72EB757A,0x102CC0E8,
+  0xB07F32B9,0xD0DABD59,0xBA85AD6B,0xF3E20667,0x98D77D81,0x197AFA47,
+  0x518EE9AC,0xE10CE5A2,0x01CF2C2A,0xD3A3AF3D,0x16DDFD65,0x669232F8,
+  0x1C50A301,0xB93D9151,0x9354D3F4,0x847D79D0,0xD5FE2EC6,0x1F7B0610,
+  0xFA6B90A5,0xC5879041,0x2E7DC05E,0x423F1F32,0xEF623DDB,0x49C13280,
+  0x98714E92,0xC7B6E4AD,0xC4318466,0x0737F312,0x4D3C003F,0x9ACC1F1F,
+  0x5F1C926D,0x085FA771,0x185A83A2,0xF9AA159D,0x0B0B0132,0xF98E7A43,
+  0xCD9EBDBE,0x0190CB29,0x10D93FB6,0x3B8A4D97,0x66A65A41,0xE43E766F,
+  0x77BE3C41,0xB9686364,0xCB36994D,0x6846A287,0x567E77F7,0x36178DD8,
+  0xBDE6B1F2,0xB6EFDC64,0x82950324,0x42053F47,0xC09BE51C,0x0942D762,
+  0x35F92C7F,0x367DEC61,0x6EE3D983,0xDBAAF78A,0x265D2C47,0x8EB4BF5C,
+  0x33B232D7,0xB0137E77,0x373C39A7,0x8D2B2E76,0xC7510F01,0x50F9E032,
+  0x7B1FDDDB,0x724C2AAE,0xB10ECB31,0xCCA3D1B8,0x7F0BCF10,0x4254BBBD,
+  0xE3F93B97,0x2305039B,0x53120E22,0x1A2F3B9A,0x0FDDBD97,0x0118561E,
+  0x0A798E13,0x9E0B3ACD,0xDB6C9F15,0xF512D0A2,0x9E8C3A28,0xEE2184AE,
+  0x0051EC2F,0x2432F74F,0xB0AA66EA,0x55128D88,0xF7D83A38,0x4DAE8E82,
+  0x3FDC98D6,0x5F0BD341,0x7244BE1D,0xC7B48E78,0x2D473053,0x43892E20,
+  0xBA0F1F2A,0x524D4895,0x2E10BCB1,0x4C372D81,0x5C3E50CD,0xCF61CC2E,
+  0x931709AB,0x81B3AEFC,0x39E9405E,0x7FFE108C,0x4FBB3FF8,0x06ABE450,
+  0x7F5BF51E,0xA4E3CDFD,0xDB0F6C6F,0x159A1227,0x3B9FED55,0xD20B6F7F,
+  0xFBE9CC83,0x64856619,0xBF52B8AF,0x9D7006B0,0x71165BC6,0xAE324AEE,
+  0x29D27F2C,0x794C2086,0x74445CE2,0x782915CC,0xD4CE6886,0x3289AE7C,
+  0x53DEF297,0x4185F7ED,0x88B72400,0x3C09DC11,0xBCE3AAB6,0x6A75934A,
+  0xB267E399,0x000DF1BF,0x193BA5E2,0xFA3E1977,0x179E14F6,0x1EEDE298,
+  0x691F0B06,0xB84F78AC,0xC1C15316,0xFFFF3AD6,0x0B457383,0x518CD612,
+  0x05A00F3E,0xD5B7D275,0x4C5ECCD7,0xE02CD0BE,0x5558E9F2,0x0C89BBF0,
+  0xA3D96227,0x2832D2B2,0xF667B897,0xD4556554,0xF9D2F01F,0xFA1E3FAE,
+  0x52C2E1EE,0xE5451F31,0x7E849729,0xDABDB67A,0x54BF5E7E,0xF831C271,
+  0x5F1A17E3,0x9D140AFE,0x92741C47,0x48CFABCE,0x9CBBE477,0x9C3EE57F,
+  0xB07D4C39,0xCC21BCE2,0x697708B1,0x58DA2A6B,0x2370DB16,0x6E641948,
+  0xACC5BD52,0x868F24CC,0xCA1DB0F5,0x4CADA492,0x3F443E54,0xC4A4D5E9,
+  0xF00AD670,0xE93C86E0,0xFE90651A,0xDDE532A3,0xA66458DF,0xAB7D7151,
+  0x0E2E775F,0xC9109F99,0x8D96D59F,0x73CEF14C,0xC74E88E9,0x02712DC0,
+  0x04F41735,0x2E5914A2,0x59F4B2FB,0x0287FC83,0x80BC0343,0xF6B32559,
+  0xC74178D4,0xF1D99123,0x383CCC07,0xACC0637D,0x0863A548,0xA6FCAC85,
+  0x2A13EFF0,0xAF2EEDB1,0x41E72750,0xE0C6B342,0x5DA22B46,0x635559E0,
+  0xD2EA40AC,0x10AA98C0,0x19096497,0x112C542B,0x2C85040C,0xA868E7D0,
+  0x6E260188,0xF596D390,0xC3BB5D7A,0x7A2AA937,0xDFD15032,0x6780AE3B,
+  0xDB5F9CD8,0x8BD266B0,0x7744AF12,0xB463B1B0,0x589629C9,0xE30DBC6E,
+  0x880F5569,0x209E6E16,0x9DECA50C,0x02987A57,0xBED3EA57,0xD3A678AA,
+  0x70DD030D,0x0CFD9C5D,0x92A18E99,0xF5740619,0x7F6F0A7D,0x134CAF9A,
+  0x70F5BAE4,0x23DCA7B5,0x4D788FCD,0xC7F07847,0xBCF77DA1,0x9071D568,
+  0xFC627EA1,0xAE004B77,0x66B54BCB,0x7EF2DAAC,0xDCD5AC30,0xB9BDF730,
+  0x505A97A7,0x9D881FD3,0xADB796CC,0x94A1D202,0x97535D7F,0x31EC20C0,
+  0xB1887A98,0xC1475069,0xA6F73AF3,0x71E4E067,0x46A569DE,0xD2ADE430,
+  0x6F0762C7,0xF50876F4,0x53510542,0x03741C3E,0x53502224,0xD8E54D60,
+  0x3C44AB1A,0x34972B46,0x74BFA89D,0xD7D768E0,0x37E605DC,0xE13D1BDF,
+  0x5051C421,0xB9E057BE,0xB717A14C,0xA1730C43,0xB99638BE,0xB5D5F36D,
+  0xE960D9EA,0x6B1388D3,0xECB6D3B6,0xBDBE8B83,0x2E29AFC5,0x764D71EC,
+  0x4B8F4F43,0xC21DDC00,0xA63F657F,0x82678130,0xDBF535AC,0xA594FC58,
+  0x942686BC,0xBD9B657B,0x4A0F9B61,0x44FF184F,0x38E10A2F,0x61910626,
+  0x5E247636,0x7106D137,0xC62802F0,0xBD1D1F00,0x7CC0DCB2,0xED634909,
+  0xDC13B24E,0x9799C499,0xD77E3D6A,0x14773B68,0x967A4FB7,0x35EECFB1,
+  0x2A5110B8,0xE2F0AF94,0x9D09DEA5,0x20255D27,0x5771D34B,0xE1089EE4,
+  0x246F330B,0x8F7CAEE5,0xD3064712,0x75CAFBEE,0xB94F7028,0xED953666,
+  0x5D1975B4,0x5AF81271,0x13BE2025,0x85194659,0x30805331,0xEC9D46C0,
+  0xBC027C36,0x2AF84188,0xC2141B80,0xC02B1E4A,0x04D36177,0xFC50E9D7,
+  0x39CE79DA,0x917E0A00,0xEF7A0BF4,0xA98BD8D1,0x19424DD2,0x9439DF1F,
+  0xC42AF746,0xADDBE83E,0x85221F0D,0x45563E90,0x9095EC52,0x77887B25,
+  0x8AE46064,0xBD43B71A,0xBB541956,0x7366CF9D,0xEE8E1737,0xB5A727C9,
+  0x5076B3E7,0xFC70BACA,0xCE135B75,0xC4E91AA3,0xF0341911,0x53430C3F,
+  0x886B0824,0x6BB5B8B7,0x33E21254,0xF193B456,0x5B09617F,0x215FFF50,
+  0x48D97EF1,0x356479AB,0x6EA9DDC4,0x0D352746,0xA2F5CE43,0xB226A1B3,
+  0x1329EA3C,0x7A337CC2,0xB5CCE13D,0x563E3B5B,0x534E8E8F,0x561399C9,
+  0xE1596392,0xB0F03125,0x4586645B,0x1F371847,0x94EAABD1,0x41F97EDD,
+  0xE3E5A39B,0x71C774E2,0x507296F4,0x5960133B,0x7852C494,0x3F5B2691,
+  0xA3F87774,0x5A7AF89E,0x17DA3F28,0xE9D9516D,0xFCC1C1D5,0xE4618628,
+  0x04081047,0xD8E4DB5F,0xDC380416,0x8C4933E2,0x95074D53,0xB1B0032D,
+  0xCC8102EA,0x71641243,0x98D6EB6A,0x90FEC945,0xA0914345,0x6FAB037D,
+  0x70F49C4D,0x05BF5B0E,0x927AAF7F,0xA1940F61,0xFEE0756F,0xF815369F,
+  0x5C00253B,0xF2B9762F,0x4AEB3CCC,0x1069F386,0xFBA4E7B9,0x70332665,
+  0x6BCA810E,0x85AB8058,0xAE4B2B2F,0x9D120712,0xBEE8EACB,0x776A1112
+};
+
+void
+__gmp_mt_recalc_buffer (gmp_uint_least32_t mt[])
+{
+  gmp_uint_least32_t y;
+  int kk;
+
+  for (kk = 0; kk < N - M; kk++)
+    {
+      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
+      mt[kk] = mt[kk + M] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+    }
+  for (; kk < N - 1; kk++)
+    {
+      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
+      mt[kk] = mt[kk - (N - M)] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+    }
+
+  y = (mt[N - 1] & 0x80000000) | (mt[0] & 0x7FFFFFFF);
+  mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+}
+
+
+/* Get nbits bits of output from the generator into dest.
+   Note that Mersenne Twister is designed to produce outputs in
+   32-bit words.  */
+void
+__gmp_randget_mt (gmp_randstate_t rstate, mp_ptr dest, unsigned long int nbits)
+{
+  gmp_uint_least32_t y;
+  int rbits;
+  mp_size_t i;
+  mp_size_t nlimbs;
+  int *pmti;
+  gmp_uint_least32_t *mt;
+
+  pmti = &((gmp_rand_mt_struct *) RNG_STATE (rstate))->mti;
+  mt = ((gmp_rand_mt_struct *) RNG_STATE (rstate))->mt;
+
+  nlimbs = nbits / GMP_NUMB_BITS;
+  rbits = nbits % GMP_NUMB_BITS;
+
+#define NEXT_RANDOM                    \
+  do                                   \
+    {                                  \
+      if (*pmti >= N)                  \
+       {                               \
+         __gmp_mt_recalc_buffer (mt);  \
+         *pmti = 0;                    \
+       }                               \
+      y = mt[(*pmti)++];               \
+      y ^= (y >> 11);                  \
+      y ^= (y << 7) & MASK_1;          \
+      y ^= (y << 15) & MASK_2;         \
+      y ^= (y >> 18);                  \
+    }                                  \
+  while (0)
+
+
+  /* Handle the common cases of 32- or 64-bit limbs with fast,
+     optimized routines, and the rest of cases with a general
+     routine.  In all cases, no more than 31 bits are rejected
+     for the last limb so that every version of the code is
+     consistent with the others.  */
+
+#if (GMP_NUMB_BITS == 32)
+
+  for (i = 0; i < nlimbs; i++)
+    {
+      NEXT_RANDOM;
+      dest[i] = (mp_limb_t) y;
+    }
+  if (rbits)
+    {
+      NEXT_RANDOM;
+      dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
+    }
+
+#else /* GMP_NUMB_BITS != 32 */
+#if (GMP_NUMB_BITS == 64)
+
+  for (i = 0; i < nlimbs; i++)
+    {
+      NEXT_RANDOM;
+      dest[i] = (mp_limb_t) y;
+      NEXT_RANDOM;
+      dest[i] |= (mp_limb_t) y << 32;
+    }
+  if (rbits)
+    {
+      if (rbits < 32)
+       {
+         NEXT_RANDOM;
+         dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
+       }
+      else
+       {
+         NEXT_RANDOM;
+         dest[nlimbs] = (mp_limb_t) y;
+         if (rbits > 32)
+           {
+             NEXT_RANDOM;
+             dest[nlimbs] |=
+               ((mp_limb_t) (y & ~(ULONG_MAX << (rbits-32)))) << 32;
+           }
+       }
+    }
+
+#else /* GMP_NUMB_BITS != 64 */
+
+  {
+    /* Fall back to a general algorithm.  This algorithm works by
+       keeping a pool of up to 64 bits (2 outputs from MT) acting
+       as a shift register from which bits are consumed as needed.
+       Bits are consumed using the LSB bits of bitpool_l, and
+       inserted via bitpool_h and shifted to the right place.  */
+
+    gmp_uint_least32_t bitpool_h = 0;
+    gmp_uint_least32_t bitpool_l = 0;
+    int bits_in_pool = 0;      /* Holds number of valid bits in the pool.  */
+    int bits_to_fill;          /* Holds total number of bits to put in
+                                  destination.  */
+    int bitidx;                        /* Holds the destination bit position.  */
+    mp_size_t nlimbs2;         /* Number of whole+partial limbs to fill.  */
+
+    nlimbs2 = nlimbs + (rbits != 0);
+
+    for (i = 0; i < nlimbs2; i++)
+      {
+       bitidx = 0;
+       if (i < nlimbs)
+         bits_to_fill = GMP_NUMB_BITS;
+       else
+         bits_to_fill = rbits;
+
+       dest[i] = CNST_LIMB (0);
+       while (bits_to_fill >= 32) /* Process whole 32-bit blocks first.  */
+         {
+           if (bits_in_pool < 32)      /* Need more bits.  */
+             {
+               /* 64-bit right shift.  */
+               NEXT_RANDOM;
+               bitpool_h = y;
+               bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
+               if (bits_in_pool == 0)
+                 bitpool_h = 0;
+               else
+                 bitpool_h >>= 32 - bits_in_pool;
+               bits_in_pool += 32;     /* We've got 32 more bits.  */
+             }
+
+           /* Fill a 32-bit chunk.  */
+           dest[i] |= ((mp_limb_t) bitpool_l) << bitidx;
+           bitpool_l = bitpool_h;
+           bits_in_pool -= 32;
+           bits_to_fill -= 32;
+           bitidx += 32;
+         }
+
+       /* Cover the case where GMP_NUMB_BITS is not a multiple of 32.  */
+       if (bits_to_fill != 0)
+         {
+           if (bits_in_pool < bits_to_fill)
+             {
+               NEXT_RANDOM;
+               bitpool_h = y;
+               bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
+               if (bits_in_pool == 0)
+                 bitpool_h = 0;
+               else
+                 bitpool_h >>= 32 - bits_in_pool;
+               bits_in_pool += 32;
+             }
+
+           dest[i] |= (((mp_limb_t) bitpool_l
+                        & ~(~CNST_LIMB (0) << bits_to_fill))
+                       << bitidx);
+           bitpool_l = ((bitpool_l >> bits_to_fill)
+                        | (bitpool_h << (32 - bits_to_fill))) & 0xFFFFFFFF;
+           bitpool_h >>= bits_to_fill;
+           bits_in_pool -= bits_to_fill;
+         }
+      }
+  }
+
+#endif /* GMP_NUMB_BITS != 64 */
+#endif /* GMP_NUMB_BITS != 32 */
+}
+
+void
+__gmp_randclear_mt (gmp_randstate_t rstate)
+{
+  (*__gmp_free_func) ((void *) RNG_STATE (rstate),
+                     ALLOC (rstate->_mp_seed) * BYTES_PER_MP_LIMB);
+}
+
+void __gmp_randiset_mt (gmp_randstate_ptr, gmp_randstate_srcptr);
+
+static const gmp_randfnptr_t Mersenne_Twister_Generator_Noseed = {
+  NULL,
+  __gmp_randget_mt,
+  __gmp_randclear_mt,
+  __gmp_randiset_mt
+};
+
+void
+__gmp_randiset_mt (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+  const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
+  gmp_rand_mt_struct *dstp, *srcp;
+  mp_size_t i;
+
+  /* Set the generator functions.  */
+  RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
+
+  /* Allocate the MT-specific state.  */
+  dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
+  RNG_STATE (dst) = (mp_ptr) dstp;
+  ALLOC (dst->_mp_seed) = sz;     /* Initialize alloc field to placate Camm.  */
+
+  /* Copy state.  */
+  srcp = (gmp_rand_mt_struct *) RNG_STATE (src);
+  for (i = 0; i < N; i++)
+    dstp->mt[i] = srcp->mt[i];
+
+  dstp->mti = srcp->mti;
+}
+
+void
+__gmp_randinit_mt_noseed (gmp_randstate_ptr dst)
+{
+  const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
+  gmp_rand_mt_struct *dstp;
+  mp_size_t i;
+
+  /* Set the generator functions.  */
+  RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
+
+  /* Allocate the MT-specific state.  */
+  dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
+  RNG_STATE (dst) = (mp_ptr) dstp;
+  ALLOC (dst->_mp_seed) = sz;     /* Initialize alloc field to placate Camm.  */
+
+  /* Set state for default seed.  */
+  for (i = 0; i < N; i++)
+    dstp->mt[i] = default_state[i];
+
+  dstp->mti = WARM_UP % N;
+}
diff --git a/rand/randmt.h b/rand/randmt.h

new file mode 100644 (file)

index 0000000..2f26956
--- /dev/null
+++ b/rand/randmt.h
@@ -0,0 +1,40 @@
+/* Mersenne Twister pseudo-random number generator defines.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* Number of extractions used to warm the buffer up.  */
+#define WARM_UP 2000
+
+/* Period parameters.  */
+#define N 624
+#define M 397
+#define MATRIX_A 0x9908B0DF   /* Constant vector a.  */
+
+/* State structure for MT.  */
+typedef struct
+{
+  gmp_uint_least32_t mt[N];    /* State array.  */
+  int mti;                     /* Index of current value.  */
+} gmp_rand_mt_struct;
+
+
+void __gmp_mt_recalc_buffer (gmp_uint_least32_t *);
+void __gmp_randget_mt (gmp_randstate_t, mp_ptr, unsigned long int);
+void __gmp_randclear_mt (gmp_randstate_t);
+void __gmp_randiset_mt (gmp_randstate_ptr, gmp_randstate_srcptr);
diff --git a/rand/randmts.c b/rand/randmts.c

new file mode 100644 (file)

index 0000000..e3b0338
--- /dev/null
+++ b/rand/randmts.c
@@ -0,0 +1,157 @@
+/* Mersenne Twister pseudo-random number generator functions.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "randmt.h"
+
+
+/* Calculate (b^e) mod (2^n-k) for e=1074888996, n=19937 and k=20023,
+   needed by the seeding function below.  */
+static void
+mangle_seed (mpz_ptr r, mpz_srcptr b_orig)
+{
+  mpz_t          t, b;
+  unsigned long  e = 0x40118124;
+  unsigned long  bit = 0x20000000;
+
+  mpz_init (t);
+  mpz_init_set (b, b_orig);  /* in case r==b_orig */
+
+  mpz_set (r, b);
+  do
+    {
+      mpz_mul (r, r, r);
+
+    reduce:
+      for (;;)
+        {
+          mpz_tdiv_q_2exp (t, r, 19937L);
+          if (mpz_sgn (t) == 0)
+            break;
+          mpz_tdiv_r_2exp (r, r, 19937L);
+          mpz_addmul_ui (r, t, 20023L);
+        }
+
+      if ((e & bit) != 0)
+        {
+          e &= ~bit;
+          mpz_mul (r, r, b);
+          goto reduce;
+        }
+
+      bit >>= 1;
+    }
+  while (bit != 0);
+
+  mpz_clear (t);
+  mpz_clear (b);
+}
+
+
+/* Seeding function.  Uses powering modulo a non-Mersenne prime to obtain
+   a permutation of the input seed space.  The modulus is 2^19937-20023,
+   which is probably prime.  The power is 1074888996.  In order to avoid
+   seeds 0 and 1 generating invalid or strange output, the input seed is
+   first manipulated as follows:
+
+     seed1 = seed mod (2^19937-20027) + 2
+
+   so that seed1 lies between 2 and 2^19937-20026 inclusive. Then the
+   powering is performed as follows:
+
+     seed2 = (seed1^1074888996) mod (2^19937-20023)
+
+   and then seed2 is used to bootstrap the buffer.
+
+   This method aims to give guarantees that:
+     a) seed2 will never be zero,
+     b) seed2 will very seldom have a very low population of ones in its
+       binary representation, and
+     c) every seed between 0 and 2^19937-20028 (inclusive) will yield a
+       different sequence.
+
+   CAVEATS:
+
+   The period of the seeding function is 2^19937-20027.  This means that
+   with seeds 2^19937-20027, 2^19937-20026, ... the exact same sequences
+   are obtained as with seeds 0, 1, etc.; it also means that seed -1
+   produces the same sequence as seed 2^19937-20028, etc.
+ */
+
+static void
+randseed_mt (gmp_randstate_t rstate, mpz_srcptr seed)
+{
+  int i;
+  size_t cnt;
+
+  gmp_rand_mt_struct *p;
+  mpz_t mod;    /* Modulus.  */
+  mpz_t seed1;  /* Intermediate result.  */
+
+  p = (gmp_rand_mt_struct *) RNG_STATE (rstate);
+
+  mpz_init (mod);
+  mpz_init (seed1);
+
+  mpz_set_ui (mod, 0L);
+  mpz_setbit (mod, 19937L);
+  mpz_sub_ui (mod, mod, 20027L);
+  mpz_mod (seed1, seed, mod);  /* Reduce `seed' modulo `mod'.  */
+  mpz_add_ui (seed1, seed1, 2L);       /* seed1 is now ready.  */
+  mangle_seed (seed1, seed1);  /* Perform the mangling by powering.  */
+
+  /* Copy the last bit into bit 31 of mt[0] and clear it.  */
+  p->mt[0] = (mpz_tstbit (seed1, 19936L) != 0) ? 0x80000000 : 0;
+  mpz_clrbit (seed1, 19936L);
+
+  /* Split seed1 into N-1 32-bit chunks.  */
+  mpz_export (&p->mt[1], &cnt, -1, sizeof (p->mt[1]), 0,
+              8 * sizeof (p->mt[1]) - 32, seed1);
+  cnt++;
+  ASSERT (cnt <= N);
+  while (cnt < N)
+    p->mt[cnt++] = 0;
+
+  mpz_clear (mod);
+  mpz_clear (seed1);
+
+  /* Warm the generator up if necessary.  */
+  if (WARM_UP != 0)
+    for (i = 0; i < WARM_UP / N; i++)
+      __gmp_mt_recalc_buffer (p->mt);
+
+  p->mti = WARM_UP % N;
+}
+
+
+static const gmp_randfnptr_t Mersenne_Twister_Generator = {
+  randseed_mt,
+  __gmp_randget_mt,
+  __gmp_randclear_mt,
+  __gmp_randiset_mt
+};
+
+/* Initialize MT-specific data.  */
+void
+gmp_randinit_mt (gmp_randstate_t rstate)
+{
+  __gmp_randinit_mt_noseed (rstate);
+  RNG_FNPTR (rstate) = (void *) &Mersenne_Twister_Generator;
+}
diff --git a/rand/randmui.c b/rand/randmui.c

new file mode 100644 (file)

index 0000000..f349d35
--- /dev/null
+++ b/rand/randmui.c
@@ -0,0 +1,75 @@
+/* gmp_urandomm_ui -- uniform random number 0 to N-1 for ulong N.
+
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* If n is a power of 2 then the test ret<n is always true and the loop is
+   unnecessary, but there's no need to add special code for this.  Just get
+   the "bits" calculation correct and let it go through normally.
+
+   If n is 1 then will have bits==0 and _gmp_rand will produce no output and
+   we always return 0.  Again there seems no need for a special case, just
+   initialize a[0]=0 and let it go through normally.  */
+
+#define MAX_URANDOMM_ITER  80
+
+unsigned long
+gmp_urandomm_ui (gmp_randstate_ptr rstate, unsigned long n)
+{
+  mp_limb_t      a[LIMBS_PER_ULONG];
+  unsigned long  ret, bits, leading;
+  int            i;
+
+  if (UNLIKELY (n == 0))
+    DIVIDE_BY_ZERO;
+
+  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
+     all (bits==0 arises when n==1), or if bits <= GMP_NUMB_BITS then it
+     will store only a[0].  */
+  a[0] = 0;
+#if LIMBS_PER_ULONG > 1
+  a[1] = 0;
+#endif
+
+  count_leading_zeros (leading, (mp_limb_t) n);
+  bits = GMP_LIMB_BITS - leading - (POW2_P(n) != 0);
+
+  for (i = 0; i < MAX_URANDOMM_ITER; i++)
+    {
+      _gmp_rand (a, rstate, bits);
+#if LIMBS_PER_ULONG == 1
+      ret = a[0];
+#else
+      ret = a[0] | (a[1] << GMP_NUMB_BITS);
+#endif
+      if (LIKELY (ret < n))   /* usually one iteration suffices */
+        goto done;
+    }
+
+  /* Too many iterations, there must be something degenerate about the
+     rstate algorithm.  Return r%n.  */
+  ret -= n;
+  ASSERT (ret < n);
+
+ done:
+  return ret;
+}
diff --git a/rand/rands.c b/rand/rands.c

new file mode 100644 (file)

index 0000000..93eb3e7
--- /dev/null
+++ b/rand/rands.c
@@ -0,0 +1,31 @@
+/* __gmp_rands -- global random state for old-style random functions.
+
+   EVERYTHING IN THIS FILE IS FOR INTERNAL USE ONLY.  IT'S ALMOST CERTAIN TO
+   BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU
+   MP RELEASES.  */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Use this via the RANDS macro in gmp-impl.h */
+char             __gmp_rands_initialized = 0;
+gmp_randstate_t  __gmp_rands;
diff --git a/rand/randsd.c b/rand/randsd.c

new file mode 100644 (file)

index 0000000..077382e
--- /dev/null
+++ b/rand/randsd.c
@@ -0,0 +1,28 @@
+/* gmp_randseed (state, seed) -- Set initial seed SEED in random state STATE.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randseed (gmp_randstate_t rstate,
+             mpz_srcptr seed)
+{
+  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randseed_fn) (rstate, seed);
+}
diff --git a/rand/randsdui.c b/rand/randsdui.c

new file mode 100644 (file)

index 0000000..9039eda
--- /dev/null
+++ b/rand/randsdui.c
@@ -0,0 +1,33 @@
+/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random
+   state STATE.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randseed_ui (gmp_randstate_t rstate,
+                 unsigned long int seed)
+{
+  mpz_t zseed;
+  mp_limb_t zlimbs[LIMBS_PER_ULONG];
+
+  MPZ_FAKE_UI (zseed, zlimbs, seed);
+  gmp_randseed (rstate, zseed);
+}
diff --git a/randbui.c b/randbui.c

deleted file mode 100644 (file)

index 5fc49b2..0000000
--- a/randbui.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/* gmp_urandomb_ui -- random bits returned in a ulong.
-
-Copyright 2003, 2004 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Currently bits>=BITS_PER_ULONG is quietly truncated to BITS_PER_ULONG,
-   maybe this should raise an exception or something.  */
-
-unsigned long
-gmp_urandomb_ui (gmp_randstate_ptr rstate, unsigned long bits)
-{
-  mp_limb_t  a[LIMBS_PER_ULONG];
-
-  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
-     all, or if bits <= GMP_NUMB_BITS then it will store only a[0] */
-  a[0] = 0;
-#if LIMBS_PER_ULONG > 1
-  a[1] = 0;
-#endif
-
-  _gmp_rand (a, rstate, MIN (bits, BITS_PER_ULONG));
-
-#if LIMBS_PER_ULONG == 1
-  return a[0];
-#else
-  return a[0] | (a[1] << GMP_NUMB_BITS);
-#endif
-}
diff --git a/randclr.c b/randclr.c

deleted file mode 100644 (file)

index a4e8242..0000000
--- a/randclr.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* gmp_randclear (state) -- Clear and deallocate random state STATE.
-
-Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-gmp_randclear (gmp_randstate_t rstate)
-{
-  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randclear_fn) (rstate);
-}
diff --git a/randdef.c b/randdef.c

deleted file mode 100644 (file)

index 171a0bd..0000000
--- a/randdef.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* gmp_randinit_default -- initialize a random state with a default algorithm.
-
-Copyright 2001, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-gmp_randinit_default (gmp_randstate_t rstate)
-{
-  gmp_randinit_mt (rstate);
-}
diff --git a/randiset.c b/randiset.c

deleted file mode 100644 (file)

index f140a33..0000000
--- a/randiset.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* gmp_randinit_set -- initialize with a copy of another gmp_randstate_t.
-
-Copyright 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-void
-gmp_randinit_set (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
-{
-  (*((gmp_randfnptr_t *) RNG_FNPTR (src))->randiset_fn) (dst, src);
-}
diff --git a/randlc2s.c b/randlc2s.c

deleted file mode 100644 (file)

index 4dcde73..0000000
--- a/randlc2s.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/* gmp_randinit_lc_2exp_size -- initialize a random state with a linear
-   congruential generator of a requested size.
-
-Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <stdio.h> /* for NULL */
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Array of LC-schemes, ordered in increasing order of the first
-   member (the 'm2exp' value).  The end of the array is indicated with
-   an entry containing all zeros.  */
-
-/* All multipliers are in the range 0.01*m and 0.99*m, and are
-congruent to 5 (mod 8).
-They all pass the spectral test with Vt >= 2^(30/t) and merit >= 1.
-(Up to and including 196 bits, merit is >= 3.)  */
-
-struct __gmp_rand_lc_scheme_struct
-{
-  unsigned long int m2exp;     /* Modulus is 2 ^ m2exp. */
-  const char *astr;            /* Multiplier in string form. */
-  unsigned long int c;         /* Addend. */
-};
-
-static const struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] =
-{
-  {32, "29CF535",           1},
-  {33, "51F666D",           1},
-  {34, "A3D73AD",           1},
-  {35, "147E5B85",          1},
-  {36, "28F725C5",          1},
-  {37, "51EE3105",          1},
-  {38, "A3DD5CDD",          1},
-  {39, "147AF833D",         1},
-  {40, "28F5DA175",         1},
-  {56, "AA7D735234C0DD",  1},
-  {64, "BAECD515DAF0B49D", 1},
-  {100, "292787EBD3329AD7E7575E2FD", 1},
-  {128, "48A74F367FA7B5C8ACBB36901308FA85", 1},
-  {156, "78A7FDDDC43611B527C3F1D760F36E5D7FC7C45", 1},
-  {196, "41BA2E104EE34C66B3520CE706A56498DE6D44721E5E24F5", 1},
-  {200, "4E5A24C38B981EAFE84CD9D0BEC48E83911362C114F30072C5", 1},
-  {256, "AF66BA932AAF58A071FD8F0742A99A0C76982D648509973DB802303128A14CB5", 1},
-  {0, NULL, 0}                 /* End of array. */
-};
-
-int
-gmp_randinit_lc_2exp_size (gmp_randstate_t rstate, mp_bitcnt_t size)
-{
-  const struct __gmp_rand_lc_scheme_struct *sp;
-  mpz_t a;
-
-  /* Pick a scheme.  */
-  for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++)
-    if (sp->m2exp / 2 >= size)
-      goto found;
-  return 0;
-
- found:
-  /* Install scheme.  */
-  mpz_init_set_str (a, sp->astr, 16);
-  gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp);
-  mpz_clear (a);
-  return 1;
-}
diff --git a/randlc2x.c b/randlc2x.c

deleted file mode 100644 (file)

index ba45b60..0000000
--- a/randlc2x.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/* Linear Congruential pseudo-random number generator functions.
-
-Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* State structure for LC, the RNG_STATE() pointer in a gmp_randstate_t.
-
-   _mp_seed holds the current seed value, in the range 0 to 2^m2exp-1.
-   SIZ(_mp_seed) is fixed at BITS_TO_LIMBS(_mp_m2exp) and the value is
-   padded with high zero limbs if necessary.  ALLOC(_mp_seed) is the current
-   size of PTR(_mp_seed) in the usual way.  There only needs to be
-   BITS_TO_LIMBS(_mp_m2exp) allocated, but the mpz functions in the
-   initialization and seeding end up making it a bit more than this.
-
-   _mp_a is the "a" multiplier, in the range 0 to 2^m2exp-1.  SIZ(_mp_a) is
-   the size of the value in the normal way for an mpz_t, except that a value
-   of zero is held with SIZ(_mp_a)==1 and PTR(_mp_a)[0]==0.  This makes it
-   easy to call mpn_mul, and the case of a==0 is highly un-random and not
-   worth any trouble to optimize.
-
-   {_cp,_cn} is the "c" addend.  Normally _cn is 1, but when nails are in
-   use a ulong can be bigger than one limb, and in this case _cn is 2 if
-   necessary.  c==0 is stored as _cp[0]==0 and _cn==1, which makes it easy
-   to call __GMPN_ADD.  c==0 is fairly un-random so isn't worth optimizing.
-
-   _mp_m2exp gives the modulus, namely 2^m2exp.  We demand m2exp>=1, since
-   m2exp==0 would mean no bits at all out of each iteration, which makes no
-   sense.  */
-
-typedef struct {
-  mpz_t          _mp_seed;
-  mpz_t          _mp_a;
-  mp_size_t      _cn;
-  mp_limb_t      _cp[LIMBS_PER_ULONG];
-  unsigned long  _mp_m2exp;
-} gmp_rand_lc_struct;
-
-
-/* lc (rp, state) -- Generate next number in LC sequence.  Return the
-   number of valid bits in the result.  Discards the lower half of the
-   result.  */
-
-static unsigned long int
-lc (mp_ptr rp, gmp_randstate_t rstate)
-{
-  mp_ptr tp, seedp, ap;
-  mp_size_t ta;
-  mp_size_t tn, seedn, an;
-  unsigned long int m2exp;
-  unsigned long int bits;
-  int cy;
-  mp_size_t xn;
-  gmp_rand_lc_struct *p;
-  TMP_DECL;
-
-  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
-
-  m2exp = p->_mp_m2exp;
-
-  seedp = PTR (p->_mp_seed);
-  seedn = SIZ (p->_mp_seed);
-
-  ap = PTR (p->_mp_a);
-  an = SIZ (p->_mp_a);
-
-  /* Allocate temporary storage.  Let there be room for calculation of
-     (A * seed + C) % M, or M if bigger than that.  */
-
-  TMP_MARK;
-
-  ta = an + seedn + 1;
-  tn = BITS_TO_LIMBS (m2exp);
-  if (ta <= tn) /* that is, if (ta < tn + 1) */
-    {
-      mp_size_t tmp = an + seedn;
-      ta = tn + 1;
-      tp = TMP_ALLOC_LIMBS (ta);
-      MPN_ZERO (&tp[tmp], ta - tmp); /* mpn_mul won't zero it out.  */
-    }
-  else
-    tp = TMP_ALLOC_LIMBS (ta);
-
-  /* t = a * seed.  NOTE: an is always > 0; see initialization.  */
-  ASSERT (seedn >= an && an > 0);
-  mpn_mul (tp, seedp, seedn, ap, an);
-
-  /* t = t + c.  NOTE: tn is always >= p->_cn (precondition for __GMPN_ADD);
-     see initialization.  */
-  ASSERT (tn >= p->_cn);
-  __GMPN_ADD (cy, tp, tp, tn, p->_cp, p->_cn);
-
-  /* t = t % m */
-  tp[m2exp / GMP_NUMB_BITS] &= (CNST_LIMB (1) << m2exp % GMP_NUMB_BITS) - 1;
-
-  /* Save result as next seed.  */
-  MPN_COPY (PTR (p->_mp_seed), tp, tn);
-
-  /* Discard the lower m2exp/2 of the result.  */
-  bits = m2exp / 2;
-  xn = bits / GMP_NUMB_BITS;
-
-  tn -= xn;
-  if (tn > 0)
-    {
-      unsigned int cnt = bits % GMP_NUMB_BITS;
-      if (cnt != 0)
-       {
-         mpn_rshift (tp, tp + xn, tn, cnt);
-         MPN_COPY_INCR (rp, tp, xn + 1);
-       }
-      else                     /* Even limb boundary.  */
-       MPN_COPY_INCR (rp, tp + xn, tn);
-    }
-
-  TMP_FREE;
-
-  /* Return number of valid bits in the result.  */
-  return (m2exp + 1) / 2;
-}
-
-
-/* Obtain a sequence of random numbers.  */
-static void
-randget_lc (gmp_randstate_t rstate, mp_ptr rp, unsigned long int nbits)
-{
-  unsigned long int rbitpos;
-  int chunk_nbits;
-  mp_ptr tp;
-  mp_size_t tn;
-  gmp_rand_lc_struct *p;
-  TMP_DECL;
-
-  p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
-
-  TMP_MARK;
-
-  chunk_nbits = p->_mp_m2exp / 2;
-  tn = BITS_TO_LIMBS (chunk_nbits);
-
-  tp = TMP_ALLOC_LIMBS (tn);
-
-  rbitpos = 0;
-  while (rbitpos + chunk_nbits <= nbits)
-    {
-      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
-
-      if (rbitpos % GMP_NUMB_BITS != 0)
-       {
-         mp_limb_t savelimb, rcy;
-         /* Target of new chunk is not bit aligned.  Use temp space
-            and align things by shifting it up.  */
-         lc (tp, rstate);
-         savelimb = r2p[0];
-         rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
-         r2p[0] |= savelimb;
-         /* bogus */
-         if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS)
-             > GMP_NUMB_BITS)
-           r2p[tn] = rcy;
-       }
-      else
-       {
-         /* Target of new chunk is bit aligned.  Let `lc' put bits
-            directly into our target variable.  */
-         lc (r2p, rstate);
-       }
-      rbitpos += chunk_nbits;
-    }
-
-  /* Handle last [0..chunk_nbits) bits.  */
-  if (rbitpos != nbits)
-    {
-      mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
-      int last_nbits = nbits - rbitpos;
-      tn = BITS_TO_LIMBS (last_nbits);
-      lc (tp, rstate);
-      if (rbitpos % GMP_NUMB_BITS != 0)
-       {
-         mp_limb_t savelimb, rcy;
-         /* Target of new chunk is not bit aligned.  Use temp space
-            and align things by shifting it up.  */
-         savelimb = r2p[0];
-         rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
-         r2p[0] |= savelimb;
-         if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits)
-           r2p[tn] = rcy;
-       }
-      else
-       {
-         MPN_COPY (r2p, tp, tn);
-       }
-      /* Mask off top bits if needed.  */
-      if (nbits % GMP_NUMB_BITS != 0)
-       rp[nbits / GMP_NUMB_BITS]
-         &= ~(~CNST_LIMB (0) << nbits % GMP_NUMB_BITS);
-    }
-
-  TMP_FREE;
-}
-
-
-static void
-randseed_lc (gmp_randstate_t rstate, mpz_srcptr seed)
-{
-  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
-  mpz_ptr seedz = p->_mp_seed;
-  mp_size_t seedn = BITS_TO_LIMBS (p->_mp_m2exp);
-
-  /* Store p->_mp_seed as an unnormalized integer with size enough
-     for numbers up to 2^m2exp-1.  That size can't be zero.  */
-  mpz_fdiv_r_2exp (seedz, seed, p->_mp_m2exp);
-  MPN_ZERO (&PTR (seedz)[SIZ (seedz)], seedn - SIZ (seedz));
-  SIZ (seedz) = seedn;
-}
-
-
-static void
-randclear_lc (gmp_randstate_t rstate)
-{
-  gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
-
-  mpz_clear (p->_mp_seed);
-  mpz_clear (p->_mp_a);
-  (*__gmp_free_func) (p, sizeof (gmp_rand_lc_struct));
-}
-
-static void randiset_lc __GMP_PROTO ((gmp_randstate_ptr dst, gmp_randstate_srcptr src));
-
-static const gmp_randfnptr_t Linear_Congruential_Generator = {
-  randseed_lc,
-  randget_lc,
-  randclear_lc,
-  randiset_lc
-};
-
-static void
-randiset_lc (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
-{
-  gmp_rand_lc_struct *dstp, *srcp;
-
-  srcp = (gmp_rand_lc_struct *) RNG_STATE (src);
-  dstp = (*__gmp_allocate_func) (sizeof (gmp_rand_lc_struct));
-
-  RNG_STATE (dst) = (void *) dstp;
-  RNG_FNPTR (dst) = (void *) &Linear_Congruential_Generator;
-
-  /* _mp_seed and _mp_a might be unnormalized (high zero limbs), but
-     mpz_init_set won't worry about that */
-  mpz_init_set (dstp->_mp_seed, srcp->_mp_seed);
-  mpz_init_set (dstp->_mp_a,    srcp->_mp_a);
-
-  dstp->_cn = srcp->_cn;
-
-  dstp->_cp[0] = srcp->_cp[0];
-  if (LIMBS_PER_ULONG > 1)
-    dstp->_cp[1] = srcp->_cp[1];
-  if (LIMBS_PER_ULONG > 2)  /* usually there's only 1 or 2 */
-    MPN_COPY (dstp->_cp + 2, srcp->_cp + 2, LIMBS_PER_ULONG - 2);
-
-  dstp->_mp_m2exp = srcp->_mp_m2exp;
-}
-
-
-void
-gmp_randinit_lc_2exp (gmp_randstate_t rstate,
-                     mpz_srcptr a,
-                     unsigned long int c,
-                     mp_bitcnt_t m2exp)
-{
-  gmp_rand_lc_struct *p;
-  mp_size_t seedn = BITS_TO_LIMBS (m2exp);
-
-  ASSERT_ALWAYS (m2exp != 0);
-
-  p = __GMP_ALLOCATE_FUNC_TYPE (1, gmp_rand_lc_struct);
-  RNG_STATE (rstate) = (void *) p;
-  RNG_FNPTR (rstate) = (void *) &Linear_Congruential_Generator;
-
-  /* allocate m2exp bits of space for p->_mp_seed, and initial seed "1" */
-  mpz_init2 (p->_mp_seed, m2exp);
-  MPN_ZERO (PTR (p->_mp_seed), seedn);
-  SIZ (p->_mp_seed) = seedn;
-  PTR (p->_mp_seed)[0] = 1;
-
-  /* "a", forced to 0 to 2^m2exp-1 */
-  mpz_init (p->_mp_a);
-  mpz_fdiv_r_2exp (p->_mp_a, a, m2exp);
-
-  /* Avoid SIZ(a) == 0 to avoid checking for special case in lc().  */
-  if (SIZ (p->_mp_a) == 0)
-    {
-      SIZ (p->_mp_a) = 1;
-      PTR (p->_mp_a)[0] = CNST_LIMB (0);
-    }
-
-  MPN_SET_UI (p->_cp, p->_cn, c);
-
-  /* Internally we may discard any bits of c above m2exp.  The following
-     code ensures that __GMPN_ADD in lc() will always work.  */
-  if (seedn < p->_cn)
-    p->_cn = (p->_cp[0] != 0);
-
-  p->_mp_m2exp = m2exp;
-}
diff --git a/randmt.c b/randmt.c

deleted file mode 100644 (file)

index ccd4a11..0000000
--- a/randmt.c
+++ /dev/null
@@ -1,405 +0,0 @@
-/* Mersenne Twister pseudo-random number generator functions.
-
-   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
-   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
-   FUTURE GNU MP RELEASES.
-
-Copyright 2002, 2003, 2006 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <stdio.h>   /* for NULL */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "randmt.h"
-
-
-/* This code implements the Mersenne Twister pseudorandom number generator
-   by Takuji Nishimura and Makoto Matsumoto.  The buffer initialization
-   function is different in order to permit seeds greater than 2^32-1.
-
-   This file contains a special __gmp_randinit_mt_noseed which excludes the
-   seeding function from the gmp_randfnptr_t routines.  This is for use by
-   mpn_random and mpn_random2 on the global random generator.  MT seeding
-   uses mpz functions, and we don't want mpn routines dragging mpz functions
-   into the link.  */
-
-
-/* Default seed to use when the generator is not initialized.  */
-#define DEFAULT_SEED 5489 /* was 4357 */
-
-/* Tempering masks.  */
-#define MASK_1 0x9D2C5680
-#define MASK_2 0xEFC60000
-
-/* Initial state of buffer when initialized with default seed.  */
-static const gmp_uint_least32_t default_state[N] =
-{
-  0xD247B233,0x9E5AA8F1,0x0FFA981B,0x9DCB0980,0x74200F2B,0xA576D044,
-  0xE9F05ADF,0x1538BFF5,0x59818BBF,0xCF9E58D8,0x09FCE032,0x6A1C663F,
-  0x5116E78A,0x69B3E0FA,0x6D92D665,0xD0A8BE98,0xF669B734,0x41AC1B68,
-  0x630423F1,0x4B8D6B8A,0xC2C46DD7,0x5680747D,0x43703E8F,0x3B6103D2,
-  0x49E5EB3F,0xCBDAB4C1,0x9C988E23,0x747BEE0B,0x9111E329,0x9F031B5A,
-  0xECCA71B9,0x2AFE4EF8,0x8421C7ED,0xAC89AFF1,0xAED90DF3,0x2DD74F01,
-  0x14906A13,0x75873FA9,0xFF83F877,0x5028A0C9,0x11B4C41D,0x7CAEDBC4,
-  0x8672D0A7,0x48A7C109,0x8320E59F,0xBC0B3D5F,0x75A30886,0xF9E0D128,
-  0x41AF7580,0x239BB94D,0xC67A3C81,0x74EEBD6E,0xBC02B53C,0x727EA449,
-  0x6B8A2806,0x5853B0DA,0xBDE032F4,0xCE234885,0x320D6145,0x48CC053F,
-  0x00DBC4D2,0xD55A2397,0xE1059B6F,0x1C3E05D1,0x09657C64,0xD07CB661,
-  0x6E982E34,0x6DD1D777,0xEDED1071,0xD79DFD65,0xF816DDCE,0xB6FAF1E4,
-  0x1C771074,0x311835BD,0x18F952F7,0xF8F40350,0x4ECED354,0x7C8AC12B,
-  0x31A9994D,0x4FD47747,0xDC227A23,0x6DFAFDDF,0x6796E748,0x0C6F634F,
-  0xF992FA1D,0x4CF670C9,0x067DFD31,0xA7A3E1A5,0x8CD7D9DF,0x972CCB34,
-  0x67C82156,0xD548F6A8,0x045CEC21,0xF3240BFB,0xDEF656A7,0x43DE08C5,
-  0xDAD1F92F,0x3726C56B,0x1409F19A,0x942FD147,0xB926749C,0xADDC31B8,
-  0x53D0D869,0xD1BA52FE,0x6722DF8C,0x22D95A74,0x7DC1B52A,0x1DEC6FD5,
-  0x7262874D,0x0A725DC9,0xE6A8193D,0xA052835A,0xDC9AD928,0xE59EBB90,
-  0x70DBA9FF,0xD612749D,0x5A5A638C,0x6086EC37,0x2A579709,0x1449EA3A,
-  0xBC8E3C06,0x2F900666,0xFBE74FD1,0x6B35B911,0xF8335008,0xEF1E979D,
-  0x738AB29D,0xA2DC0FDC,0x7696305D,0xF5429DAC,0x8C41813B,0x8073E02E,
-  0xBEF83CCD,0x7B50A95A,0x05EE5862,0x00829ECE,0x8CA1958C,0xBE4EA2E2,
-  0x4293BB73,0x656F7B23,0x417316D8,0x4467D7CF,0x2200E63B,0x109050C8,
-  0x814CBE47,0x36B1D4A8,0x36AF9305,0x308327B3,0xEBCD7344,0xA738DE27,
-  0x5A10C399,0x4142371D,0x64A18528,0x0B31E8B2,0x641057B9,0x6AFC363B,
-  0x108AD953,0x9D4DA234,0x0C2D9159,0x1C8A1A1F,0x310C66BA,0x87AA1070,
-  0xDAC832FF,0x0A433422,0x7AF15812,0x2D8D9BD0,0x995A25E9,0x25326CAC,
-  0xA34384DB,0x4C8421CC,0x4F0315EC,0x29E8649E,0xA7732D6F,0x2E94D3E3,
-  0x7D98A340,0x397C4D74,0x659DB4DE,0x747D4E9A,0xD9DB8435,0x4659DBE9,
-  0x313E6DC5,0x29D104DC,0x9F226CBA,0x452F18B0,0xD0BC5068,0x844CA299,
-  0x782B294E,0x4AE2EB7B,0xA4C475F8,0x70A81311,0x4B3E8BCC,0x7E20D4BA,
-  0xABCA33C9,0x57BE2960,0x44F9B419,0x2E567746,0x72EB757A,0x102CC0E8,
-  0xB07F32B9,0xD0DABD59,0xBA85AD6B,0xF3E20667,0x98D77D81,0x197AFA47,
-  0x518EE9AC,0xE10CE5A2,0x01CF2C2A,0xD3A3AF3D,0x16DDFD65,0x669232F8,
-  0x1C50A301,0xB93D9151,0x9354D3F4,0x847D79D0,0xD5FE2EC6,0x1F7B0610,
-  0xFA6B90A5,0xC5879041,0x2E7DC05E,0x423F1F32,0xEF623DDB,0x49C13280,
-  0x98714E92,0xC7B6E4AD,0xC4318466,0x0737F312,0x4D3C003F,0x9ACC1F1F,
-  0x5F1C926D,0x085FA771,0x185A83A2,0xF9AA159D,0x0B0B0132,0xF98E7A43,
-  0xCD9EBDBE,0x0190CB29,0x10D93FB6,0x3B8A4D97,0x66A65A41,0xE43E766F,
-  0x77BE3C41,0xB9686364,0xCB36994D,0x6846A287,0x567E77F7,0x36178DD8,
-  0xBDE6B1F2,0xB6EFDC64,0x82950324,0x42053F47,0xC09BE51C,0x0942D762,
-  0x35F92C7F,0x367DEC61,0x6EE3D983,0xDBAAF78A,0x265D2C47,0x8EB4BF5C,
-  0x33B232D7,0xB0137E77,0x373C39A7,0x8D2B2E76,0xC7510F01,0x50F9E032,
-  0x7B1FDDDB,0x724C2AAE,0xB10ECB31,0xCCA3D1B8,0x7F0BCF10,0x4254BBBD,
-  0xE3F93B97,0x2305039B,0x53120E22,0x1A2F3B9A,0x0FDDBD97,0x0118561E,
-  0x0A798E13,0x9E0B3ACD,0xDB6C9F15,0xF512D0A2,0x9E8C3A28,0xEE2184AE,
-  0x0051EC2F,0x2432F74F,0xB0AA66EA,0x55128D88,0xF7D83A38,0x4DAE8E82,
-  0x3FDC98D6,0x5F0BD341,0x7244BE1D,0xC7B48E78,0x2D473053,0x43892E20,
-  0xBA0F1F2A,0x524D4895,0x2E10BCB1,0x4C372D81,0x5C3E50CD,0xCF61CC2E,
-  0x931709AB,0x81B3AEFC,0x39E9405E,0x7FFE108C,0x4FBB3FF8,0x06ABE450,
-  0x7F5BF51E,0xA4E3CDFD,0xDB0F6C6F,0x159A1227,0x3B9FED55,0xD20B6F7F,
-  0xFBE9CC83,0x64856619,0xBF52B8AF,0x9D7006B0,0x71165BC6,0xAE324AEE,
-  0x29D27F2C,0x794C2086,0x74445CE2,0x782915CC,0xD4CE6886,0x3289AE7C,
-  0x53DEF297,0x4185F7ED,0x88B72400,0x3C09DC11,0xBCE3AAB6,0x6A75934A,
-  0xB267E399,0x000DF1BF,0x193BA5E2,0xFA3E1977,0x179E14F6,0x1EEDE298,
-  0x691F0B06,0xB84F78AC,0xC1C15316,0xFFFF3AD6,0x0B457383,0x518CD612,
-  0x05A00F3E,0xD5B7D275,0x4C5ECCD7,0xE02CD0BE,0x5558E9F2,0x0C89BBF0,
-  0xA3D96227,0x2832D2B2,0xF667B897,0xD4556554,0xF9D2F01F,0xFA1E3FAE,
-  0x52C2E1EE,0xE5451F31,0x7E849729,0xDABDB67A,0x54BF5E7E,0xF831C271,
-  0x5F1A17E3,0x9D140AFE,0x92741C47,0x48CFABCE,0x9CBBE477,0x9C3EE57F,
-  0xB07D4C39,0xCC21BCE2,0x697708B1,0x58DA2A6B,0x2370DB16,0x6E641948,
-  0xACC5BD52,0x868F24CC,0xCA1DB0F5,0x4CADA492,0x3F443E54,0xC4A4D5E9,
-  0xF00AD670,0xE93C86E0,0xFE90651A,0xDDE532A3,0xA66458DF,0xAB7D7151,
-  0x0E2E775F,0xC9109F99,0x8D96D59F,0x73CEF14C,0xC74E88E9,0x02712DC0,
-  0x04F41735,0x2E5914A2,0x59F4B2FB,0x0287FC83,0x80BC0343,0xF6B32559,
-  0xC74178D4,0xF1D99123,0x383CCC07,0xACC0637D,0x0863A548,0xA6FCAC85,
-  0x2A13EFF0,0xAF2EEDB1,0x41E72750,0xE0C6B342,0x5DA22B46,0x635559E0,
-  0xD2EA40AC,0x10AA98C0,0x19096497,0x112C542B,0x2C85040C,0xA868E7D0,
-  0x6E260188,0xF596D390,0xC3BB5D7A,0x7A2AA937,0xDFD15032,0x6780AE3B,
-  0xDB5F9CD8,0x8BD266B0,0x7744AF12,0xB463B1B0,0x589629C9,0xE30DBC6E,
-  0x880F5569,0x209E6E16,0x9DECA50C,0x02987A57,0xBED3EA57,0xD3A678AA,
-  0x70DD030D,0x0CFD9C5D,0x92A18E99,0xF5740619,0x7F6F0A7D,0x134CAF9A,
-  0x70F5BAE4,0x23DCA7B5,0x4D788FCD,0xC7F07847,0xBCF77DA1,0x9071D568,
-  0xFC627EA1,0xAE004B77,0x66B54BCB,0x7EF2DAAC,0xDCD5AC30,0xB9BDF730,
-  0x505A97A7,0x9D881FD3,0xADB796CC,0x94A1D202,0x97535D7F,0x31EC20C0,
-  0xB1887A98,0xC1475069,0xA6F73AF3,0x71E4E067,0x46A569DE,0xD2ADE430,
-  0x6F0762C7,0xF50876F4,0x53510542,0x03741C3E,0x53502224,0xD8E54D60,
-  0x3C44AB1A,0x34972B46,0x74BFA89D,0xD7D768E0,0x37E605DC,0xE13D1BDF,
-  0x5051C421,0xB9E057BE,0xB717A14C,0xA1730C43,0xB99638BE,0xB5D5F36D,
-  0xE960D9EA,0x6B1388D3,0xECB6D3B6,0xBDBE8B83,0x2E29AFC5,0x764D71EC,
-  0x4B8F4F43,0xC21DDC00,0xA63F657F,0x82678130,0xDBF535AC,0xA594FC58,
-  0x942686BC,0xBD9B657B,0x4A0F9B61,0x44FF184F,0x38E10A2F,0x61910626,
-  0x5E247636,0x7106D137,0xC62802F0,0xBD1D1F00,0x7CC0DCB2,0xED634909,
-  0xDC13B24E,0x9799C499,0xD77E3D6A,0x14773B68,0x967A4FB7,0x35EECFB1,
-  0x2A5110B8,0xE2F0AF94,0x9D09DEA5,0x20255D27,0x5771D34B,0xE1089EE4,
-  0x246F330B,0x8F7CAEE5,0xD3064712,0x75CAFBEE,0xB94F7028,0xED953666,
-  0x5D1975B4,0x5AF81271,0x13BE2025,0x85194659,0x30805331,0xEC9D46C0,
-  0xBC027C36,0x2AF84188,0xC2141B80,0xC02B1E4A,0x04D36177,0xFC50E9D7,
-  0x39CE79DA,0x917E0A00,0xEF7A0BF4,0xA98BD8D1,0x19424DD2,0x9439DF1F,
-  0xC42AF746,0xADDBE83E,0x85221F0D,0x45563E90,0x9095EC52,0x77887B25,
-  0x8AE46064,0xBD43B71A,0xBB541956,0x7366CF9D,0xEE8E1737,0xB5A727C9,
-  0x5076B3E7,0xFC70BACA,0xCE135B75,0xC4E91AA3,0xF0341911,0x53430C3F,
-  0x886B0824,0x6BB5B8B7,0x33E21254,0xF193B456,0x5B09617F,0x215FFF50,
-  0x48D97EF1,0x356479AB,0x6EA9DDC4,0x0D352746,0xA2F5CE43,0xB226A1B3,
-  0x1329EA3C,0x7A337CC2,0xB5CCE13D,0x563E3B5B,0x534E8E8F,0x561399C9,
-  0xE1596392,0xB0F03125,0x4586645B,0x1F371847,0x94EAABD1,0x41F97EDD,
-  0xE3E5A39B,0x71C774E2,0x507296F4,0x5960133B,0x7852C494,0x3F5B2691,
-  0xA3F87774,0x5A7AF89E,0x17DA3F28,0xE9D9516D,0xFCC1C1D5,0xE4618628,
-  0x04081047,0xD8E4DB5F,0xDC380416,0x8C4933E2,0x95074D53,0xB1B0032D,
-  0xCC8102EA,0x71641243,0x98D6EB6A,0x90FEC945,0xA0914345,0x6FAB037D,
-  0x70F49C4D,0x05BF5B0E,0x927AAF7F,0xA1940F61,0xFEE0756F,0xF815369F,
-  0x5C00253B,0xF2B9762F,0x4AEB3CCC,0x1069F386,0xFBA4E7B9,0x70332665,
-  0x6BCA810E,0x85AB8058,0xAE4B2B2F,0x9D120712,0xBEE8EACB,0x776A1112
-};
-
-void
-__gmp_mt_recalc_buffer (gmp_uint_least32_t mt[])
-{
-  gmp_uint_least32_t y;
-  int kk;
-
-  for (kk = 0; kk < N - M; kk++)
-    {
-      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
-      mt[kk] = mt[kk + M] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
-    }
-  for (; kk < N - 1; kk++)
-    {
-      y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
-      mt[kk] = mt[kk - (N - M)] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
-    }
-
-  y = (mt[N - 1] & 0x80000000) | (mt[0] & 0x7FFFFFFF);
-  mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
-}
-
-
-/* Get nbits bits of output from the generator into dest.
-   Note that Mersenne Twister is designed to produce outputs in
-   32-bit words.  */
-void
-__gmp_randget_mt (gmp_randstate_t rstate, mp_ptr dest, unsigned long int nbits)
-{
-  gmp_uint_least32_t y;
-  int rbits;
-  mp_size_t i;
-  mp_size_t nlimbs;
-  int *pmti;
-  gmp_uint_least32_t *mt;
-
-  pmti = &((gmp_rand_mt_struct *) RNG_STATE (rstate))->mti;
-  mt = ((gmp_rand_mt_struct *) RNG_STATE (rstate))->mt;
-
-  nlimbs = nbits / GMP_NUMB_BITS;
-  rbits = nbits % GMP_NUMB_BITS;
-
-#define NEXT_RANDOM                    \
-  do                                   \
-    {                                  \
-      if (*pmti >= N)                  \
-       {                               \
-         __gmp_mt_recalc_buffer (mt);  \
-         *pmti = 0;                    \
-       }                               \
-      y = mt[(*pmti)++];               \
-      y ^= (y >> 11);                  \
-      y ^= (y << 7) & MASK_1;          \
-      y ^= (y << 15) & MASK_2;         \
-      y ^= (y >> 18);                  \
-    }                                  \
-  while (0)
-
-
-  /* Handle the common cases of 32- or 64-bit limbs with fast,
-     optimized routines, and the rest of cases with a general
-     routine.  In all cases, no more than 31 bits are rejected
-     for the last limb so that every version of the code is
-     consistent with the others.  */
-
-#if (GMP_NUMB_BITS == 32)
-
-  for (i = 0; i < nlimbs; i++)
-    {
-      NEXT_RANDOM;
-      dest[i] = (mp_limb_t) y;
-    }
-  if (rbits)
-    {
-      NEXT_RANDOM;
-      dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
-    }
-
-#else /* GMP_NUMB_BITS != 32 */
-#if (GMP_NUMB_BITS == 64)
-
-  for (i = 0; i < nlimbs; i++)
-    {
-      NEXT_RANDOM;
-      dest[i] = (mp_limb_t) y;
-      NEXT_RANDOM;
-      dest[i] |= (mp_limb_t) y << 32;
-    }
-  if (rbits)
-    {
-      if (rbits < 32)
-       {
-         NEXT_RANDOM;
-         dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
-       }
-      else
-       {
-         NEXT_RANDOM;
-         dest[nlimbs] = (mp_limb_t) y;
-         if (rbits > 32)
-           {
-             NEXT_RANDOM;
-             dest[nlimbs] |=
-               ((mp_limb_t) (y & ~(ULONG_MAX << (rbits-32)))) << 32;
-           }
-       }
-    }
-
-#else /* GMP_NUMB_BITS != 64 */
-
-  {
-    /* Fall back to a general algorithm.  This algorithm works by
-       keeping a pool of up to 64 bits (2 outputs from MT) acting
-       as a shift register from which bits are consumed as needed.
-       Bits are consumed using the LSB bits of bitpool_l, and
-       inserted via bitpool_h and shifted to the right place.  */
-
-    gmp_uint_least32_t bitpool_h = 0;
-    gmp_uint_least32_t bitpool_l = 0;
-    int bits_in_pool = 0;      /* Holds number of valid bits in the pool.  */
-    int bits_to_fill;          /* Holds total number of bits to put in
-                                  destination.  */
-    int bitidx;                        /* Holds the destination bit position.  */
-    mp_size_t nlimbs2;         /* Number of whole+partial limbs to fill.  */
-
-    nlimbs2 = nlimbs + (rbits != 0);
-
-    for (i = 0; i < nlimbs2; i++)
-      {
-       bitidx = 0;
-       if (i < nlimbs)
-         bits_to_fill = GMP_NUMB_BITS;
-       else
-         bits_to_fill = rbits;
-
-       dest[i] = CNST_LIMB (0);
-       while (bits_to_fill >= 32) /* Process whole 32-bit blocks first.  */
-         {
-           if (bits_in_pool < 32)      /* Need more bits.  */
-             {
-               /* 64-bit right shift.  */
-               NEXT_RANDOM;
-               bitpool_h = y;
-               bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
-               if (bits_in_pool == 0)
-                 bitpool_h = 0;
-               else
-                 bitpool_h >>= 32 - bits_in_pool;
-               bits_in_pool += 32;     /* We've got 32 more bits.  */
-             }
-
-           /* Fill a 32-bit chunk.  */
-           dest[i] |= ((mp_limb_t) bitpool_l) << bitidx;
-           bitpool_l = bitpool_h;
-           bits_in_pool -= 32;
-           bits_to_fill -= 32;
-           bitidx += 32;
-         }
-
-       /* Cover the case where GMP_NUMB_BITS is not a multiple of 32.  */
-       if (bits_to_fill != 0)
-         {
-           if (bits_in_pool < bits_to_fill)
-             {
-               NEXT_RANDOM;
-               bitpool_h = y;
-               bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
-               if (bits_in_pool == 0)
-                 bitpool_h = 0;
-               else
-                 bitpool_h >>= 32 - bits_in_pool;
-               bits_in_pool += 32;
-             }
-
-           dest[i] |= (((mp_limb_t) bitpool_l
-                        & ~(~CNST_LIMB (0) << bits_to_fill))
-                       << bitidx);
-           bitpool_l = ((bitpool_l >> bits_to_fill)
-                        | (bitpool_h << (32 - bits_to_fill))) & 0xFFFFFFFF;
-           bitpool_h >>= bits_to_fill;
-           bits_in_pool -= bits_to_fill;
-         }
-      }
-  }
-
-#endif /* GMP_NUMB_BITS != 64 */
-#endif /* GMP_NUMB_BITS != 32 */
-}
-
-void
-__gmp_randclear_mt (gmp_randstate_t rstate)
-{
-  (*__gmp_free_func) ((void *) RNG_STATE (rstate),
-                     ALLOC (rstate->_mp_seed) * BYTES_PER_MP_LIMB);
-}
-
-void __gmp_randiset_mt __GMP_PROTO ((gmp_randstate_ptr dst, gmp_randstate_srcptr src));
-
-static const gmp_randfnptr_t Mersenne_Twister_Generator_Noseed = {
-  NULL,
-  __gmp_randget_mt,
-  __gmp_randclear_mt,
-  __gmp_randiset_mt
-};
-
-void
-__gmp_randiset_mt (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
-{
-  const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
-  gmp_rand_mt_struct *dstp, *srcp;
-  mp_size_t i;
-
-  /* Set the generator functions.  */
-  RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
-
-  /* Allocate the MT-specific state.  */
-  dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
-  RNG_STATE (dst) = (mp_ptr) dstp;
-  ALLOC (dst->_mp_seed) = sz;     /* Initialize alloc field to placate Camm.  */
-
-  /* Copy state.  */
-  srcp = (gmp_rand_mt_struct *) RNG_STATE (src);
-  for (i = 0; i < N; i++)
-    dstp->mt[i] = srcp->mt[i];
-
-  dstp->mti = srcp->mti;
-}
-
-void
-__gmp_randinit_mt_noseed (gmp_randstate_ptr dst)
-{
-  const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
-  gmp_rand_mt_struct *dstp;
-  mp_size_t i;
-
-  /* Set the generator functions.  */
-  RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
-
-  /* Allocate the MT-specific state.  */
-  dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
-  RNG_STATE (dst) = (mp_ptr) dstp;
-  ALLOC (dst->_mp_seed) = sz;     /* Initialize alloc field to placate Camm.  */
-
-  /* Set state for default seed.  */
-  for (i = 0; i < N; i++)
-    dstp->mt[i] = default_state[i];
-
-  dstp->mti = WARM_UP % N;
-}
diff --git a/randmt.h b/randmt.h

deleted file mode 100644 (file)

index fc23381..0000000
--- a/randmt.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Mersenne Twister pseudo-random number generator defines.
-
-Copyright 2002, 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-
-/* Number of extractions used to warm the buffer up.  */
-#define WARM_UP 2000
-
-/* Period parameters.  */
-#define N 624
-#define M 397
-#define MATRIX_A 0x9908B0DF   /* Constant vector a.  */
-
-/* State structure for MT.  */
-typedef struct
-{
-  gmp_uint_least32_t mt[N];    /* State array.  */
-  int mti;                     /* Index of current value.  */
-} gmp_rand_mt_struct;
-
-
-void __gmp_mt_recalc_buffer __GMP_PROTO ((gmp_uint_least32_t *));
-void __gmp_randget_mt __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
-void __gmp_randclear_mt __GMP_PROTO ((gmp_randstate_t rstate));
-void __gmp_randiset_mt __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
diff --git a/randmts.c b/randmts.c

deleted file mode 100644 (file)

index e3b0338..0000000
--- a/randmts.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/* Mersenne Twister pseudo-random number generator functions.
-
-Copyright 2002, 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "randmt.h"
-
-
-/* Calculate (b^e) mod (2^n-k) for e=1074888996, n=19937 and k=20023,
-   needed by the seeding function below.  */
-static void
-mangle_seed (mpz_ptr r, mpz_srcptr b_orig)
-{
-  mpz_t          t, b;
-  unsigned long  e = 0x40118124;
-  unsigned long  bit = 0x20000000;
-
-  mpz_init (t);
-  mpz_init_set (b, b_orig);  /* in case r==b_orig */
-
-  mpz_set (r, b);
-  do
-    {
-      mpz_mul (r, r, r);
-
-    reduce:
-      for (;;)
-        {
-          mpz_tdiv_q_2exp (t, r, 19937L);
-          if (mpz_sgn (t) == 0)
-            break;
-          mpz_tdiv_r_2exp (r, r, 19937L);
-          mpz_addmul_ui (r, t, 20023L);
-        }
-
-      if ((e & bit) != 0)
-        {
-          e &= ~bit;
-          mpz_mul (r, r, b);
-          goto reduce;
-        }
-
-      bit >>= 1;
-    }
-  while (bit != 0);
-
-  mpz_clear (t);
-  mpz_clear (b);
-}
-
-
-/* Seeding function.  Uses powering modulo a non-Mersenne prime to obtain
-   a permutation of the input seed space.  The modulus is 2^19937-20023,
-   which is probably prime.  The power is 1074888996.  In order to avoid
-   seeds 0 and 1 generating invalid or strange output, the input seed is
-   first manipulated as follows:
-
-     seed1 = seed mod (2^19937-20027) + 2
-
-   so that seed1 lies between 2 and 2^19937-20026 inclusive. Then the
-   powering is performed as follows:
-
-     seed2 = (seed1^1074888996) mod (2^19937-20023)
-
-   and then seed2 is used to bootstrap the buffer.
-
-   This method aims to give guarantees that:
-     a) seed2 will never be zero,
-     b) seed2 will very seldom have a very low population of ones in its
-       binary representation, and
-     c) every seed between 0 and 2^19937-20028 (inclusive) will yield a
-       different sequence.
-
-   CAVEATS:
-
-   The period of the seeding function is 2^19937-20027.  This means that
-   with seeds 2^19937-20027, 2^19937-20026, ... the exact same sequences
-   are obtained as with seeds 0, 1, etc.; it also means that seed -1
-   produces the same sequence as seed 2^19937-20028, etc.
- */
-
-static void
-randseed_mt (gmp_randstate_t rstate, mpz_srcptr seed)
-{
-  int i;
-  size_t cnt;
-
-  gmp_rand_mt_struct *p;
-  mpz_t mod;    /* Modulus.  */
-  mpz_t seed1;  /* Intermediate result.  */
-
-  p = (gmp_rand_mt_struct *) RNG_STATE (rstate);
-
-  mpz_init (mod);
-  mpz_init (seed1);
-
-  mpz_set_ui (mod, 0L);
-  mpz_setbit (mod, 19937L);
-  mpz_sub_ui (mod, mod, 20027L);
-  mpz_mod (seed1, seed, mod);  /* Reduce `seed' modulo `mod'.  */
-  mpz_add_ui (seed1, seed1, 2L);       /* seed1 is now ready.  */
-  mangle_seed (seed1, seed1);  /* Perform the mangling by powering.  */
-
-  /* Copy the last bit into bit 31 of mt[0] and clear it.  */
-  p->mt[0] = (mpz_tstbit (seed1, 19936L) != 0) ? 0x80000000 : 0;
-  mpz_clrbit (seed1, 19936L);
-
-  /* Split seed1 into N-1 32-bit chunks.  */
-  mpz_export (&p->mt[1], &cnt, -1, sizeof (p->mt[1]), 0,
-              8 * sizeof (p->mt[1]) - 32, seed1);
-  cnt++;
-  ASSERT (cnt <= N);
-  while (cnt < N)
-    p->mt[cnt++] = 0;
-
-  mpz_clear (mod);
-  mpz_clear (seed1);
-
-  /* Warm the generator up if necessary.  */
-  if (WARM_UP != 0)
-    for (i = 0; i < WARM_UP / N; i++)
-      __gmp_mt_recalc_buffer (p->mt);
-
-  p->mti = WARM_UP % N;
-}
-
-
-static const gmp_randfnptr_t Mersenne_Twister_Generator = {
-  randseed_mt,
-  __gmp_randget_mt,
-  __gmp_randclear_mt,
-  __gmp_randiset_mt
-};
-
-/* Initialize MT-specific data.  */
-void
-gmp_randinit_mt (gmp_randstate_t rstate)
-{
-  __gmp_randinit_mt_noseed (rstate);
-  RNG_FNPTR (rstate) = (void *) &Mersenne_Twister_Generator;
-}
diff --git a/randmui.c b/randmui.c

deleted file mode 100644 (file)

index f349d35..0000000
--- a/randmui.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/* gmp_urandomm_ui -- uniform random number 0 to N-1 for ulong N.
-
-Copyright 2003, 2004 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-/* If n is a power of 2 then the test ret<n is always true and the loop is
-   unnecessary, but there's no need to add special code for this.  Just get
-   the "bits" calculation correct and let it go through normally.
-
-   If n is 1 then will have bits==0 and _gmp_rand will produce no output and
-   we always return 0.  Again there seems no need for a special case, just
-   initialize a[0]=0 and let it go through normally.  */
-
-#define MAX_URANDOMM_ITER  80
-
-unsigned long
-gmp_urandomm_ui (gmp_randstate_ptr rstate, unsigned long n)
-{
-  mp_limb_t      a[LIMBS_PER_ULONG];
-  unsigned long  ret, bits, leading;
-  int            i;
-
-  if (UNLIKELY (n == 0))
-    DIVIDE_BY_ZERO;
-
-  /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
-     all (bits==0 arises when n==1), or if bits <= GMP_NUMB_BITS then it
-     will store only a[0].  */
-  a[0] = 0;
-#if LIMBS_PER_ULONG > 1
-  a[1] = 0;
-#endif
-
-  count_leading_zeros (leading, (mp_limb_t) n);
-  bits = GMP_LIMB_BITS - leading - (POW2_P(n) != 0);
-
-  for (i = 0; i < MAX_URANDOMM_ITER; i++)
-    {
-      _gmp_rand (a, rstate, bits);
-#if LIMBS_PER_ULONG == 1
-      ret = a[0];
-#else
-      ret = a[0] | (a[1] << GMP_NUMB_BITS);
-#endif
-      if (LIKELY (ret < n))   /* usually one iteration suffices */
-        goto done;
-    }
-
-  /* Too many iterations, there must be something degenerate about the
-     rstate algorithm.  Return r%n.  */
-  ret -= n;
-  ASSERT (ret < n);
-
- done:
-  return ret;
-}
diff --git a/rands.c b/rands.c

deleted file mode 100644 (file)

index 93eb3e7..0000000
--- a/rands.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/* __gmp_rands -- global random state for old-style random functions.
-
-   EVERYTHING IN THIS FILE IS FOR INTERNAL USE ONLY.  IT'S ALMOST CERTAIN TO
-   BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU
-   MP RELEASES.  */
-
-/*
-Copyright 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Use this via the RANDS macro in gmp-impl.h */
-char             __gmp_rands_initialized = 0;
-gmp_randstate_t  __gmp_rands;
diff --git a/randsd.c b/randsd.c

deleted file mode 100644 (file)

index 077382e..0000000
--- a/randsd.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/* gmp_randseed (state, seed) -- Set initial seed SEED in random state STATE.
-
-Copyright 2000, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-gmp_randseed (gmp_randstate_t rstate,
-             mpz_srcptr seed)
-{
-  (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randseed_fn) (rstate, seed);
-}
diff --git a/randsdui.c b/randsdui.c

deleted file mode 100644 (file)

index 9039eda..0000000
--- a/randsdui.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random
-   state STATE.
-
-Copyright 2000, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-gmp_randseed_ui (gmp_randstate_t rstate,
-                 unsigned long int seed)
-{
-  mpz_t zseed;
-  mp_limb_t zlimbs[LIMBS_PER_ULONG];
-
-  MPZ_FAKE_UI (zseed, zlimbs, seed);
-  gmp_randseed (rstate, zseed);
-}
diff --git a/scanf/Makefile.in b/scanf/Makefile.in

index c95eea68592dad8644b0f7ceb00c72550456d70c..3f42a08c230ceeeeaac03c3c7c0983b614f6c6d0 100644 (file)
--- a/scanf/Makefile.in
+++ b/scanf/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -33,6 +33,23 @@
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -51,12 +68,11 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  subdir = scanf
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -65,9 +81,8 @@ CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  LTLIBRARIES = $(noinst_LTLIBRARIES)
  libscanf_la_LIBADD =
-am_libscanf_la_OBJECTS = doscan$U.lo fscanf$U.lo fscanffuns$U.lo \
-       scanf$U.lo sscanf$U.lo sscanffuns$U.lo vfscanf$U.lo \
-       vscanf$U.lo vsscanf$U.lo
+am_libscanf_la_OBJECTS = doscan.lo fscanf.lo fscanffuns.lo scanf.lo \
+       sscanf.lo sscanffuns.lo vfscanf.lo vscanf.lo vsscanf.lo
  libscanf_la_OBJECTS = $(am_libscanf_la_OBJECTS)
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
  depcomp =
@@ -83,6 +98,11 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(libscanf_la_SOURCES)
  DIST_SOURCES = $(libscanf_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -184,8 +204,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -232,7 +252,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -295,7 +314,7 @@ clean-noinstLTLIBRARIES:
           echo "rm -f \"$${dir}/so_locations\""; \
           rm -f "$${dir}/so_locations"; \
         done
-libscanf.la: $(libscanf_la_OBJECTS) $(libscanf_la_DEPENDENCIES) 
+libscanf.la: $(libscanf_la_OBJECTS) $(libscanf_la_DEPENDENCIES) $(EXTRA_libscanf_la_DEPENDENCIES) 
         $(LINK)  $(libscanf_la_OBJECTS) $(libscanf_la_LIBADD) $(LIBS)
  
  mostlyclean-compile:
@@ -303,11 +322,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -317,29 +331,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-doscan_.c: doscan.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doscan.c; then echo $(srcdir)/doscan.c; else echo doscan.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fscanf_.c: fscanf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fscanf.c; then echo $(srcdir)/fscanf.c; else echo fscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fscanffuns_.c: fscanffuns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fscanffuns.c; then echo $(srcdir)/fscanffuns.c; else echo fscanffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scanf_.c: scanf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scanf.c; then echo $(srcdir)/scanf.c; else echo scanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sscanf_.c: sscanf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sscanf.c; then echo $(srcdir)/sscanf.c; else echo sscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sscanffuns_.c: sscanffuns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sscanffuns.c; then echo $(srcdir)/sscanffuns.c; else echo sscanffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vfscanf_.c: vfscanf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vfscanf.c; then echo $(srcdir)/vfscanf.c; else echo vfscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vscanf_.c: vscanf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vscanf.c; then echo $(srcdir)/vscanf.c; else echo vscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vsscanf_.c: vsscanf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vsscanf.c; then echo $(srcdir)/vsscanf.c; else echo vsscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-doscan_.$(OBJEXT) doscan_.lo fscanf_.$(OBJEXT) fscanf_.lo \
-fscanffuns_.$(OBJEXT) fscanffuns_.lo scanf_.$(OBJEXT) scanf_.lo \
-sscanf_.$(OBJEXT) sscanf_.lo sscanffuns_.$(OBJEXT) sscanffuns_.lo \
-vfscanf_.$(OBJEXT) vfscanf_.lo vscanf_.$(OBJEXT) vscanf_.lo \
-vsscanf_.$(OBJEXT) vsscanf_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -443,10 +434,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -514,7 +510,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -527,7 +523,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool clean-noinstLTLIBRARIES ctags distclean \
@@ -539,9 +535,8 @@ uninstall-am:
         install-pdf install-pdf-am install-ps install-ps-am \
         install-strip installcheck installcheck-am installdirs \
         maintainer-clean maintainer-clean-generic mostlyclean \
-       mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
-       uninstall-am
+       mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+       pdf pdf-am ps ps-am tags uninstall uninstall-am
  
  
  # Tell versions [3.59,3.63) of GNU make to not export all variables.
diff --git a/scanf/fscanffuns.c b/scanf/fscanffuns.c

index 236aec663f815c22fcb15fd5211e8a54d5603e23..74590232d7bb6f2de381c6bb8ae6097d83cdca47 100644 (file)
--- a/scanf/fscanffuns.c
+++ b/scanf/fscanffuns.c
@@ -28,13 +28,13 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /* SunOS 4 stdio.h doesn't provide prototypes for these */
  #if ! HAVE_DECL_FGETC
-int fgetc __GMP_PROTO ((FILE *fp));
+int fgetc (FILE *);
  #endif
  #if ! HAVE_DECL_FSCANF
-int fscanf __GMP_PROTO ((FILE *fp, const char *fmt, ...));
+int fscanf (FILE *, const char *, ...);
  #endif
  #if ! HAVE_DECL_UNGETC
-int ungetc __GMP_PROTO ((int c, FILE *fp));
+int ungetc (int, FILE *);
  #endif
  
  
diff --git a/tal-reent.c b/tal-reent.c

index 32db9342c920e2fecfa4395bd65edf980dbe7d51..87da7f3c8e654fb27ba6579588ae6a4e605e8f3d 100644 (file)
--- a/tal-reent.c
+++ b/tal-reent.c
@@ -50,7 +50,7 @@ __gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **markp, size_t size)
  #define P   ((struct tmp_reentrant_t *) p)
  
    total_size = size + HSIZ;
-  p = (*__gmp_allocate_func) (total_size);
+  p = (char *) (*__gmp_allocate_func) (total_size);
    P->size = total_size;
    P->next = *markp;
    *markp = P;
diff --git a/tests/Makefile.am b/tests/Makefile.am

index 30975cbf432f972e8fc8435e6daf275702559a8f..dc85df36ca69305e0bc6592acf11194dbb2e7ece 100644 (file)
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1,24 +1,24 @@
  ## Process this file with automake to generate Makefile.in
  
-# Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2004, 2013 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
-SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx mpbsd
+SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx
  
  include ../mpn/Makeasm.am
  
@@ -27,7 +27,8 @@ LDADD = libtests.la $(top_builddir)/libgmp.la
  
  check_LTLIBRARIES = libtests.la
  
-EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c
+EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c \
+    arm32call.asm arm32check.c
  libtests_la_SOURCES = tests.h \
    memory.c misc.c refmpf.c refmpn.c refmpq.c refmpz.c spinner.c trace.c
  libtests_la_DEPENDENCIES = @CALLING_CONVENTIONS_OBJS@
diff --git a/tests/Makefile.in b/tests/Makefile.in

index 76ac384528a4d5c0e507fd112ecae37406b738b7..bf49e60c9e207f931f394027a156c3bc7691a396 100644 (file)
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -15,22 +15,22 @@
  
  @SET_MAKE@
  
-# Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2004, 2013 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  # Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
  # Inc.
@@ -50,6 +50,23 @@
  # You should have received a copy of the GNU Lesser General Public License
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -68,7 +85,6 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  DIST_COMMON = $(srcdir)/../mpn/Makeasm.am $(srcdir)/Makefile.am \
         $(srcdir)/Makefile.in
  check_PROGRAMS = t-bswap$(EXEEXT) t-constants$(EXEEXT) \
@@ -78,7 +94,7 @@ check_PROGRAMS = t-bswap$(EXEEXT) t-constants$(EXEEXT) \
  subdir = tests
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -86,43 +102,43 @@ CONFIG_HEADER = $(top_builddir)/config.h
  CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  am__DEPENDENCIES_1 =
-am_libtests_la_OBJECTS = memory$U.lo misc$U.lo refmpf$U.lo refmpn$U.lo \
-       refmpq$U.lo refmpz$U.lo spinner$U.lo trace$U.lo
+am_libtests_la_OBJECTS = memory.lo misc.lo refmpf.lo refmpn.lo \
+       refmpq.lo refmpz.lo spinner.lo trace.lo
  libtests_la_OBJECTS = $(am_libtests_la_OBJECTS)
  t_bswap_SOURCES = t-bswap.c
-t_bswap_OBJECTS = t-bswap$U.$(OBJEXT)
+t_bswap_OBJECTS = t-bswap.$(OBJEXT)
  t_bswap_LDADD = $(LDADD)
  t_bswap_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
  t_constants_SOURCES = t-constants.c
-t_constants_OBJECTS = t-constants$U.$(OBJEXT)
+t_constants_OBJECTS = t-constants.$(OBJEXT)
  t_constants_LDADD = $(LDADD)
  t_constants_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
  t_count_zeros_SOURCES = t-count_zeros.c
-t_count_zeros_OBJECTS = t-count_zeros$U.$(OBJEXT)
+t_count_zeros_OBJECTS = t-count_zeros.$(OBJEXT)
  t_count_zeros_LDADD = $(LDADD)
  t_count_zeros_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
  t_gmpmax_SOURCES = t-gmpmax.c
-t_gmpmax_OBJECTS = t-gmpmax$U.$(OBJEXT)
+t_gmpmax_OBJECTS = t-gmpmax.$(OBJEXT)
  t_gmpmax_LDADD = $(LDADD)
  t_gmpmax_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
  t_hightomask_SOURCES = t-hightomask.c
-t_hightomask_OBJECTS = t-hightomask$U.$(OBJEXT)
+t_hightomask_OBJECTS = t-hightomask.$(OBJEXT)
  t_hightomask_LDADD = $(LDADD)
  t_hightomask_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
  t_modlinv_SOURCES = t-modlinv.c
-t_modlinv_OBJECTS = t-modlinv$U.$(OBJEXT)
+t_modlinv_OBJECTS = t-modlinv.$(OBJEXT)
  t_modlinv_LDADD = $(LDADD)
  t_modlinv_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
  t_parity_SOURCES = t-parity.c
-t_parity_OBJECTS = t-parity$U.$(OBJEXT)
+t_parity_OBJECTS = t-parity.$(OBJEXT)
  t_parity_LDADD = $(LDADD)
  t_parity_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
  t_popc_SOURCES = t-popc.c
-t_popc_OBJECTS = t-popc$U.$(OBJEXT)
+t_popc_OBJECTS = t-popc.$(OBJEXT)
  t_popc_LDADD = $(LDADD)
  t_popc_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
  t_sub_SOURCES = t-sub.c
-t_sub_OBJECTS = t-sub$U.$(OBJEXT)
+t_sub_OBJECTS = t-sub.$(OBJEXT)
  t_sub_LDADD = $(LDADD)
  t_sub_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
  DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
@@ -150,6 +166,11 @@ RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
         install-pdf-recursive install-ps-recursive install-recursive \
         installcheck-recursive installdirs-recursive pdf-recursive \
         ps-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive        \
    distclean-recursive maintainer-clean-recursive
  AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
@@ -284,8 +305,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -332,7 +353,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -346,7 +366,7 @@ target_alias = @target_alias@
  top_build_prefix = @top_build_prefix@
  top_builddir = @top_builddir@
  top_srcdir = @top_srcdir@
-SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx mpbsd
+SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx
  
  # COMPILE minus CC.
  #
@@ -379,7 +399,9 @@ RM_TMP = rm -f
  INCLUDES = -I$(top_srcdir)
  LDADD = libtests.la $(top_builddir)/libgmp.la
  check_LTLIBRARIES = libtests.la
-EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c
+EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c \
+    arm32call.asm arm32check.c
+
  libtests_la_SOURCES = tests.h \
    memory.c misc.c refmpf.c refmpn.c refmpq.c refmpz.c spinner.c trace.c
  
@@ -411,6 +433,7 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
             echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
             cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
         esac;
+$(srcdir)/../mpn/Makeasm.am:
  
  $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
         cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -429,7 +452,7 @@ clean-checkLTLIBRARIES:
           echo "rm -f \"$${dir}/so_locations\""; \
           rm -f "$${dir}/so_locations"; \
         done
-libtests.la: $(libtests_la_OBJECTS) $(libtests_la_DEPENDENCIES) 
+libtests.la: $(libtests_la_OBJECTS) $(libtests_la_DEPENDENCIES) $(EXTRA_libtests_la_DEPENDENCIES) 
         $(LINK)  $(libtests_la_OBJECTS) $(libtests_la_LIBADD) $(LIBS)
  
  clean-checkPROGRAMS:
@@ -440,31 +463,31 @@ clean-checkPROGRAMS:
         list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
         echo " rm -f" $$list; \
         rm -f $$list
-t-bswap$(EXEEXT): $(t_bswap_OBJECTS) $(t_bswap_DEPENDENCIES) 
+t-bswap$(EXEEXT): $(t_bswap_OBJECTS) $(t_bswap_DEPENDENCIES) $(EXTRA_t_bswap_DEPENDENCIES) 
         @rm -f t-bswap$(EXEEXT)
         $(LINK) $(t_bswap_OBJECTS) $(t_bswap_LDADD) $(LIBS)
-t-constants$(EXEEXT): $(t_constants_OBJECTS) $(t_constants_DEPENDENCIES) 
+t-constants$(EXEEXT): $(t_constants_OBJECTS) $(t_constants_DEPENDENCIES) $(EXTRA_t_constants_DEPENDENCIES) 
         @rm -f t-constants$(EXEEXT)
         $(LINK) $(t_constants_OBJECTS) $(t_constants_LDADD) $(LIBS)
-t-count_zeros$(EXEEXT): $(t_count_zeros_OBJECTS) $(t_count_zeros_DEPENDENCIES) 
+t-count_zeros$(EXEEXT): $(t_count_zeros_OBJECTS) $(t_count_zeros_DEPENDENCIES) $(EXTRA_t_count_zeros_DEPENDENCIES) 
         @rm -f t-count_zeros$(EXEEXT)
         $(LINK) $(t_count_zeros_OBJECTS) $(t_count_zeros_LDADD) $(LIBS)
-t-gmpmax$(EXEEXT): $(t_gmpmax_OBJECTS) $(t_gmpmax_DEPENDENCIES) 
+t-gmpmax$(EXEEXT): $(t_gmpmax_OBJECTS) $(t_gmpmax_DEPENDENCIES) $(EXTRA_t_gmpmax_DEPENDENCIES) 
         @rm -f t-gmpmax$(EXEEXT)
         $(LINK) $(t_gmpmax_OBJECTS) $(t_gmpmax_LDADD) $(LIBS)
-t-hightomask$(EXEEXT): $(t_hightomask_OBJECTS) $(t_hightomask_DEPENDENCIES) 
+t-hightomask$(EXEEXT): $(t_hightomask_OBJECTS) $(t_hightomask_DEPENDENCIES) $(EXTRA_t_hightomask_DEPENDENCIES) 
         @rm -f t-hightomask$(EXEEXT)
         $(LINK) $(t_hightomask_OBJECTS) $(t_hightomask_LDADD) $(LIBS)
-t-modlinv$(EXEEXT): $(t_modlinv_OBJECTS) $(t_modlinv_DEPENDENCIES) 
+t-modlinv$(EXEEXT): $(t_modlinv_OBJECTS) $(t_modlinv_DEPENDENCIES) $(EXTRA_t_modlinv_DEPENDENCIES) 
         @rm -f t-modlinv$(EXEEXT)
         $(LINK) $(t_modlinv_OBJECTS) $(t_modlinv_LDADD) $(LIBS)
-t-parity$(EXEEXT): $(t_parity_OBJECTS) $(t_parity_DEPENDENCIES) 
+t-parity$(EXEEXT): $(t_parity_OBJECTS) $(t_parity_DEPENDENCIES) $(EXTRA_t_parity_DEPENDENCIES) 
         @rm -f t-parity$(EXEEXT)
         $(LINK) $(t_parity_OBJECTS) $(t_parity_LDADD) $(LIBS)
-t-popc$(EXEEXT): $(t_popc_OBJECTS) $(t_popc_DEPENDENCIES) 
+t-popc$(EXEEXT): $(t_popc_OBJECTS) $(t_popc_DEPENDENCIES) $(EXTRA_t_popc_DEPENDENCIES) 
         @rm -f t-popc$(EXEEXT)
         $(LINK) $(t_popc_OBJECTS) $(t_popc_LDADD) $(LIBS)
-t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES) 
+t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES) $(EXTRA_t_sub_DEPENDENCIES) 
         @rm -f t-sub$(EXEEXT)
         $(LINK) $(t_sub_OBJECTS) $(t_sub_LDADD) $(LIBS)
  
@@ -473,11 +496,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -487,55 +505,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-amd64check_.c: amd64check.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/amd64check.c; then echo $(srcdir)/amd64check.c; else echo amd64check.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-memory_.c: memory.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/memory.c; then echo $(srcdir)/memory.c; else echo memory.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-misc_.c: misc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/misc.c; then echo $(srcdir)/misc.c; else echo misc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-refmpf_.c: refmpf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpf.c; then echo $(srcdir)/refmpf.c; else echo refmpf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-refmpn_.c: refmpn.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpn.c; then echo $(srcdir)/refmpn.c; else echo refmpn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-refmpq_.c: refmpq.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpq.c; then echo $(srcdir)/refmpq.c; else echo refmpq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-refmpz_.c: refmpz.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpz.c; then echo $(srcdir)/refmpz.c; else echo refmpz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-spinner_.c: spinner.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/spinner.c; then echo $(srcdir)/spinner.c; else echo spinner.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-bswap_.c: t-bswap.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-bswap.c; then echo $(srcdir)/t-bswap.c; else echo t-bswap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-constants_.c: t-constants.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-constants.c; then echo $(srcdir)/t-constants.c; else echo t-constants.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-count_zeros_.c: t-count_zeros.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-count_zeros.c; then echo $(srcdir)/t-count_zeros.c; else echo t-count_zeros.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-gmpmax_.c: t-gmpmax.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gmpmax.c; then echo $(srcdir)/t-gmpmax.c; else echo t-gmpmax.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-hightomask_.c: t-hightomask.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-hightomask.c; then echo $(srcdir)/t-hightomask.c; else echo t-hightomask.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-modlinv_.c: t-modlinv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-modlinv.c; then echo $(srcdir)/t-modlinv.c; else echo t-modlinv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-parity_.c: t-parity.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-parity.c; then echo $(srcdir)/t-parity.c; else echo t-parity.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-popc_.c: t-popc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-popc.c; then echo $(srcdir)/t-popc.c; else echo t-popc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sub_.c: t-sub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sub.c; then echo $(srcdir)/t-sub.c; else echo t-sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-trace_.c: trace.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/trace.c; then echo $(srcdir)/trace.c; else echo trace.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-x86check_.c: x86check.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/x86check.c; then echo $(srcdir)/x86check.c; else echo x86check.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-amd64check_.$(OBJEXT) amd64check_.lo memory_.$(OBJEXT) memory_.lo \
-misc_.$(OBJEXT) misc_.lo refmpf_.$(OBJEXT) refmpf_.lo \
-refmpn_.$(OBJEXT) refmpn_.lo refmpq_.$(OBJEXT) refmpq_.lo \
-refmpz_.$(OBJEXT) refmpz_.lo spinner_.$(OBJEXT) spinner_.lo \
-t-bswap_.$(OBJEXT) t-bswap_.lo t-constants_.$(OBJEXT) t-constants_.lo \
-t-count_zeros_.$(OBJEXT) t-count_zeros_.lo t-gmpmax_.$(OBJEXT) \
-t-gmpmax_.lo t-hightomask_.$(OBJEXT) t-hightomask_.lo \
-t-modlinv_.$(OBJEXT) t-modlinv_.lo t-parity_.$(OBJEXT) t-parity_.lo \
-t-popc_.$(OBJEXT) t-popc_.lo t-sub_.$(OBJEXT) t-sub_.lo \
-trace_.$(OBJEXT) trace_.lo x86check_.$(OBJEXT) x86check_.lo : \
-$(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -759,14 +728,15 @@ check-TESTS: $(TESTS)
           fi; \
           dashes=`echo "$$dashes" | sed s/./=/g`; \
           if test "$$failed" -eq 0; then \
-           echo "$$grn$$dashes"; \
+           col="$$grn"; \
           else \
-           echo "$$red$$dashes"; \
+           col="$$red"; \
           fi; \
-         echo "$$banner"; \
-         test -z "$$skipped" || echo "$$skipped"; \
-         test -z "$$report" || echo "$$report"; \
-         echo "$$dashes$$std"; \
+         echo "$${col}$$dashes$${std}"; \
+         echo "$${col}$$banner$${std}"; \
+         test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+         test -z "$$report" || echo "$${col}$$report$${std}"; \
+         echo "$${col}$$dashes$${std}"; \
           test "$$failed" -eq 0; \
         else :; fi
  
@@ -802,13 +772,10 @@ distdir: $(DISTFILES)
         done
         @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
           if test "$$subdir" = .; then :; else \
-           test -d "$(distdir)/$$subdir" \
-           || $(MKDIR_P) "$(distdir)/$$subdir" \
-           || exit 1; \
-         fi; \
-       done
-       @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
-         if test "$$subdir" = .; then :; else \
+           $(am__make_dryrun) \
+             || test -d "$(distdir)/$$subdir" \
+             || $(MKDIR_P) "$(distdir)/$$subdir" \
+             || exit 1; \
             dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
             $(am__relativize); \
             new_distdir=$$reldir; \
@@ -845,10 +812,15 @@ install-am: all-am
  
  installcheck: installcheck-recursive
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -916,7 +888,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-recursive
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-recursive
@@ -929,9 +901,8 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \
-       $(top_builddir)/ansi2knr check-am ctags-recursive install-am \
-       install-strip tags-recursive
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) check-am \
+       ctags-recursive install-am install-strip tags-recursive
  
  .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
         all all-am check check-TESTS check-am clean \
@@ -946,8 +917,8 @@ uninstall-am:
         install-strip installcheck installcheck-am installdirs \
         installdirs-am maintainer-clean maintainer-clean-generic \
         mostlyclean mostlyclean-compile mostlyclean-generic \
-       mostlyclean-kr mostlyclean-libtool pdf pdf-am ps ps-am tags \
-       tags-recursive uninstall uninstall-am
+       mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
+       uninstall uninstall-am
  
  
  # .s assembler, no preprocessing.
diff --git a/tests/amd64call.asm b/tests/amd64call.asm

index f23f4768a7cfedf01cb573d13acb32bcc02937a3..05daceb8aa72bcdb90aca39c8c25429d53c79f52 100644 (file)
--- a/tests/amd64call.asm
+++ b/tests/amd64call.asm
@@ -1,23 +1,27 @@
  dnl  AMD64 calling conventions checking.
  
-dnl  Copyright 2000, 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
-dnl
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  Copyright 2000, 2003, 2004, 2006, 2007, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library test suite.
+
+dnl  The GNU MP Library test suite is free software; you can redistribute it
+dnl  and/or modify it under the terms of the GNU General Public License as
  dnl  published by the Free Software Foundation; either version 3 of the
  dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+dnl  The GNU MP Library test suite is distributed in the hope that it will be
+dnl  useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+dnl  Public License for more details.
+
+dnl  You should have received a copy of the GNU General Public License along
+dnl  with the GNU MP Library test suite.  If not, see
+dnl  http://www.gnu.org/licenses/.
  
  
+dnl  The current version of the code attempts to keep the call/return
+dnl  prediction stack valid, but matching calls and returns.
+
  include(`../config.m4')
  
  
@@ -37,17 +41,17 @@ C
  C Execute an fstcw, returning the current x87 control word.
  
  PROLOGUE(x86_fstcw)
-        movq   $0, -8(%rsp)
-        fstcw  -8(%rsp)
-        movq   -8(%rsp), %rax
+       movq    $0, -8(%rsp)
+       fstcw   -8(%rsp)
+       movq    -8(%rsp), %rax
         ret
  EPILOGUE()
  
  
-dnl  Instrumented profiling won't come out quite right below, since we don't
-dnl  do an actual "ret".  There's only a few instructions here, so there's
-dnl  no great need to get them separately accounted, just let them get
-dnl  attributed to the caller.
+dnl  Instrumented profiling won't come out quite right below, since we don't do
+dnl  an actual "ret".  There's only a few instructions here, so there's no
+dnl  great need to get them separately accounted, just let them get attributed
+dnl  to the caller.  FIXME this comment might no longer be true.
  
  ifelse(WANT_PROFILING,instrument,
  `define(`WANT_PROFILING',no)')
@@ -97,69 +101,67 @@ m4_assert_numargs(1)
         TEXT
         ALIGN(32)
  PROLOGUE(calling_conventions)
-       push    %rdi
-       movq    G(calling_conventions_values)@GOTPCREL(%rip), %rdi
-
-       movq    8(%rsp), %rax
-       movq    %rax, RETADDR(%rdi)
-
-       leaq    L(return)(%rip), %rax
-       movq    %rax, 8(%rsp)
-
-       movq    %rbx, SAVE_RBX(%rdi)
-       movq    %rbp, SAVE_RBP(%rdi)
-       movq    %r12, SAVE_R12(%rdi)
-       movq    %r13, SAVE_R13(%rdi)
-       movq    %r14, SAVE_R14(%rdi)
-       movq    %r15, SAVE_R15(%rdi)
-
-       C values we expect to see unchanged, as per amd64check.c
-       movq    WANT_RBX(%rdi), %rbx
-       movq    WANT_RBP(%rdi), %rbp
-       movq    WANT_R12(%rdi), %r12
-       movq    WANT_R13(%rdi), %r13
-       movq    WANT_R14(%rdi), %r14
-       movq    WANT_R15(%rdi), %r15
-
-       C Try to provoke a problem by starting with junk in the registers,
-       C especially %rax which will be the return value.
-       C
-       C ENHANCE-ME: If we knew how many of the parameter registers were
-       C actually being used we could put junk in the rest.  Maybe we could
-       C get try.c to communicate this to us.
-C      movq    JUNK_RAX(%rdi), %rax            C overwritten below anyway
-       movq    JUNK_R10(%rdi), %r10
-       movq    JUNK_R11(%rdi), %r11
+       movq    G(calling_conventions_values)@GOTPCREL(%rip), %rax
+       popq    RETADDR(%rax)
+
+       movq    %rbx, SAVE_RBX(%rax)
+       movq    %rbp, SAVE_RBP(%rax)
+       movq    %r12, SAVE_R12(%rax)
+       movq    %r13, SAVE_R13(%rax)
+       movq    %r14, SAVE_R14(%rax)
+       movq    %r15, SAVE_R15(%rax)
+
+       C Values we expect to see unchanged, as per amd64check.c
+       movq    WANT_RBX(%rax), %rbx
+       movq    WANT_RBP(%rax), %rbp
+       movq    WANT_R12(%rax), %r12
+       movq    WANT_R13(%rax), %r13
+       movq    WANT_R14(%rax), %r14
+       movq    WANT_R15(%rax), %r15
+
+       C Try to provoke a problem by starting with junk in the caller-saves
+       C registers, especially %rax which will be the return value.
+C      movq    JUNK_RAX(%rax), %rax            C overwritten below anyway
+       movq    JUNK_R10(%rax), %r10
+       movq    JUNK_R11(%rax), %r11
  
         movq    G(calling_conventions_function)@GOTPCREL(%rip), %rax
-       pop     %rdi
-       jmp     *(%rax)
+       call    *(%rax)
  
-L(return):
-       movq    G(calling_conventions_values)@GOTPCREL(%rip), %rdi
+       movq    G(calling_conventions_values)@GOTPCREL(%rip), %rcx
  
-       movq    %rbx, RBX(%rdi)
-       movq    %rbp, RBP(%rdi)
-       movq    %r12, R12(%rdi)
-       movq    %r13, R13(%rdi)
-       movq    %r14, R14(%rdi)
-       movq    %r15, R15(%rdi)
+       movq    %rbx, RBX(%rcx)
+       movq    %rbp, RBP(%rcx)
+       movq    %r12, R12(%rcx)
+       movq    %r13, R13(%rcx)
+       movq    %r14, R14(%rcx)
+       movq    %r15, R15(%rcx)
  
         pushfq
         popq    %rbx
-       movq    %rbx, RFLAGS(%rdi)
-
-       movq    G(calling_conventions_fenv)@GOTPCREL(%rip), %rbx
-       fstenv  (%rbx)
+       movq    %rbx, RFLAGS(%rcx)
+
+       movq    SAVE_RBX(%rcx), %rbx
+       movq    SAVE_RBP(%rcx), %rbp
+       movq    SAVE_R12(%rcx), %r12
+       movq    SAVE_R13(%rcx), %r13
+       movq    SAVE_R14(%rcx), %r14
+       movq    SAVE_R15(%rcx), %r15
+
+       C Overwrite parameter registers
+C      mov     JUNK_R9(%rcx), %r9
+C      mov     JUNK_R8(%rcx), %r8
+C      mov     JUNK_RCX(%rcx), %rcx
+C      mov     JUNK_RDX(%rcx), %rdx
+C      mov     JUNK_RSI(%rcx), %rsi
+C      mov     JUNK_RDI(%rcx), %rdi
+
+       pushq   RETADDR(%rcx)
+
+       movq    G(calling_conventions_fenv)@GOTPCREL(%rip), %rcx
+       fstenv  (%rcx)
         finit
  
-       movq    SAVE_RBX(%rdi), %rbx
-       movq    SAVE_RBP(%rdi), %rbp
-       movq    SAVE_R12(%rdi), %r12
-       movq    SAVE_R13(%rdi), %r13
-       movq    SAVE_R14(%rdi), %r14
-       movq    SAVE_R15(%rdi), %r15
-
-       jmp     *RETADDR(%rdi)
+       ret
  
  EPILOGUE()
diff --git a/tests/amd64check.c b/tests/amd64check.c

index 46eaae0d850c71981bc27f00ddd7a365c62c1b5c..7c313f3d3b168ac82b102166f53d92406bbd05cb 100644 (file)
--- a/tests/amd64check.c
+++ b/tests/amd64check.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2004, 2007 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include "gmp.h"
@@ -58,7 +58,7 @@ struct {
  } calling_conventions_fenv;
  
  
-char *regname[6] = {"rbx", "rbp", "r12", "r13", "r14", "r15"};
+const char *regname[6] = {"rbx", "rbp", "r12", "r13", "r14", "r15"};
  
  #define DIR_BIT(rflags)   (((rflags) & (1<<10)) != 0)
  
diff --git a/tests/arm32call.asm b/tests/arm32call.asm

new file mode 100644 (file)

index 0000000..167ee9b
--- /dev/null
+++ b/tests/arm32call.asm
@@ -0,0 +1,83 @@
+dnl  ARM32 calling conventions checking.
+
+dnl  Copyright 2000, 2003, 2004, 2006, 2007, 2010, 2013 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library test suite.
+
+dnl  The GNU MP Library test suite is free software; you can redistribute it
+dnl  and/or modify it under the terms of the GNU General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+
+dnl  The GNU MP Library test suite is distributed in the hope that it will be
+dnl  useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+dnl  Public License for more details.
+
+dnl  You should have received a copy of the GNU General Public License along
+dnl  with the GNU MP Library test suite.  If not, see
+dnl  http://www.gnu.org/licenses/.
+
+
+dnl  The current version of the code attempts to keep the call/return
+dnl  prediction stack valid, but matching calls and returns.
+
+include(`../config.m4')
+
+
+C int calling_conventions (...);
+C
+C The global variable "calling_conventions_function" is the function to
+C call, with the arguments as passed here.
+
+define(`WANT_CALLEE_SAVES',    eval(4*0))
+define(`SAVE_CALLEE_SAVES',    eval(4*8))
+define(`RETADDR',              eval(4*16))
+define(`GOT_CALLEE_SAVES',     eval(4*17))
+define(`JUNK_PARAMS',          eval(4*25))
+
+       TEXT
+       ALIGN(32)
+PROLOGUE(calling_conventions)
+       LEA(    r12, calling_conventions_values)
+
+       C Preserve callee-saves registers, including the link register r14
+       add     r12, r12, #SAVE_CALLEE_SAVES
+       stm     r12, {r4-r11,r14}
+       sub     r12, r12, #SAVE_CALLEE_SAVES
+
+       C Put chosen junk into callee-saves registers
+       add     r12, r12, #WANT_CALLEE_SAVES
+       ldm     r12, {r4-r11}
+       sub     r12, r12, #WANT_CALLEE_SAVES
+
+       C No callee-saves registers on arm except r12 and parameter registers
+       C
+
+       C Make the actual call
+       LEA(    r12, calling_conventions_function)
+       ldr     r12, [r12]
+       mov     r14, pc
+       bx      r12
+
+       LEA(    r12, calling_conventions_values)
+
+       C Save callee-saves registers after call
+       add     r12, r12, #GOT_CALLEE_SAVES
+       stm     r12, {r4-r11}
+       sub     r12, r12, #GOT_CALLEE_SAVES
+
+       C Restore callee-saves registers, including the link register r14
+       add     r12, r12, #SAVE_CALLEE_SAVES
+       ldm     r12, {r4-r11,r14}
+       sub     r12, r12, #SAVE_CALLEE_SAVES
+
+       C Overwrite parameter registers.  Note that we overwrite r1, which
+       C could hold one half of a 64-bit return value, since we don't use that
+       C in GMP.
+       add     r12, r12, #JUNK_PARAMS
+       ldm     r12, {r1-r3}
+
+       bx      r14
+EPILOGUE()
diff --git a/tests/arm32check.c b/tests/arm32check.c

new file mode 100644 (file)

index 0000000..5e8f837
--- /dev/null
+++ b/tests/arm32check.c
@@ -0,0 +1,96 @@
+/* ARM32 calling conventions checking.
+
+Copyright 2000, 2001, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Vector if constants and register values.  */
+mp_limb_t calling_conventions_values[29] =
+{
+  0x12345678,  /*  0 want_r4 */
+  0x87654321,  /*  1 want_r5 */
+  0x89ABCDEF,  /*  2 want_r6 */
+  0xFEDCBA98,  /*  3 want_r7 */
+  0xDEADBEEF,  /*  4 want_r8 */
+  0xBADECAFE,  /*  5 want_r9 */
+  0xFFEEDDCC,  /*  6 want_r10 */
+  0xBBAA9988,  /*  7 want_r11 */
+
+  0x00000000,  /*  8 save_r4 */
+  0x00000000,  /*  9 save_r5 */
+  0x00000000,  /* 10 save_r6 */
+  0x00000000,  /* 11 save_r7 */
+  0x00000000,  /* 12 save_r8 */
+  0x00000000,  /* 13 save_r9 */
+  0x00000000,  /* 14 save_r10 */
+  0x00000000,  /* 15 save_r11 */
+  0x00000000,  /* 16 save_r14 */
+
+  0x00000000,  /* 17 got_r4 */
+  0x00000000,  /* 18 got_r5 */
+  0x00000000,  /* 19 got_r6 */
+  0x00000000,  /* 20 got_r7 */
+  0x00000000,  /* 21 got_r8 */
+  0x00000000,  /* 22 got_r9 */
+  0x00000000,  /* 23 got_r10 */
+  0x00000000,  /* 24 got_r11 */
+
+  0x00112233,  /* 25 junk_r0 */
+  0x44556677,  /* 26 junk_r1 */
+  0x12344321,  /* 27 junk_r2 */
+  0x56788765,  /* 28 junk_r3 */
+};
+
+/* Index starts for various regions in above vector.  */
+#define WANT_CALLEE_SAVES      0
+#define SAVE_CALLEE_SAVES      8
+#define RETADDR                        16
+#define GOT_CALLEE_SAVES       17
+#define JUNK_PARAMS            25
+
+/* Return 1 if ok, 0 if not */
+
+int
+calling_conventions_check (void)
+{
+  const char  *header = "Violated calling conventions:\n";
+  int  ret = 1;
+  int i;
+
+#define CHECK(callreg, regnum, value)                                  \
+  if (callreg != value)                                                        \
+    {                                                                  \
+      printf ("%s   r%d        got 0x%08lX want 0x%08lX\n",                    \
+             header, regnum, callreg, value);                          \
+      header = "";                                                     \
+      ret = 0;                                                         \
+    }
+
+  for (i = 0; i < 8; i++)
+    {
+      CHECK (calling_conventions_values[GOT_CALLEE_SAVES + i],
+            i + 4,
+            calling_conventions_values[WANT_CALLEE_SAVES + i]);
+    }
+
+  return ret;
+}
diff --git a/tests/cxx/Makefile.am b/tests/cxx/Makefile.am

index 41764ee6cc1b74e6dc4d78fc5affea0ba9571d81..dfcb1ea343f6c58fc954135318aaf47cb99afdb2 100644 (file)
--- a/tests/cxx/Makefile.am
+++ b/tests/cxx/Makefile.am
@@ -2,36 +2,42 @@
  
  # Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
  # LDADD has an explicit -L of $(top_builddir)/.libs for the benefit of gcc
  # 3.2 on itanium2-hp-hpux11.22.  Without this option, the libgmp.sl.6
  # required by libgmpxx.sl (ie. in its NEEDED records) is not found by the
  # linker.  FIXME: Presumably libtool should do something about this itself.
+# -lm is needed for t-ops2 which compares the results of trunc and mpf_trunc.
  #
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
  LDADD = -L$(top_builddir)/.libs \
    $(top_builddir)/tests/libtests.la \
    $(top_builddir)/libgmpxx.la \
-  $(top_builddir)/libgmp.la
+  $(top_builddir)/libgmp.la \
+  -lm
  
  if WANT_CXX
-check_PROGRAMS = t-assign t-binary t-cast t-constr t-headers \
-  t-istream t-locale t-misc t-ops t-ostream t-prec t-rand t-ternary t-unary
+check_PROGRAMS = t-binary t-cast t-cxx11 \
+  t-headers t-iostream t-istream t-locale t-misc t-mix \
+  t-ops t-ops2 t-ops3 t-ostream t-prec \
+  t-ternary t-unary \
+  t-do-exceptions-work-at-all-with-this-compiler \
+  t-assign t-constr t-rand
  TESTS = $(check_PROGRAMS)
  endif
  
@@ -39,16 +45,23 @@ t_assign_SOURCES  = t-assign.cc
  t_binary_SOURCES  = t-binary.cc
  t_cast_SOURCES    = t-cast.cc
  t_constr_SOURCES  = t-constr.cc
+t_cxx11_SOURCES   = t-cxx11.cc
  t_headers_SOURCES = t-headers.cc
+t_iostream_SOURCES= t-iostream.cc
  t_istream_SOURCES = t-istream.cc
  t_locale_SOURCES  = t-locale.cc clocale.c
  t_misc_SOURCES    = t-misc.cc
+t_mix_SOURCES     = t-mix.cc
  t_ops_SOURCES     = t-ops.cc
+t_ops2_SOURCES    = t-ops2.cc
+t_ops3_SOURCES    = t-ops3.cc
  t_ostream_SOURCES = t-ostream.cc
  t_prec_SOURCES    = t-prec.cc
  t_rand_SOURCES    = t-rand.cc
  t_ternary_SOURCES = t-ternary.cc
  t_unary_SOURCES   = t-unary.cc
+t_do_exceptions_work_at_all_with_this_compiler_SOURCES = \
+  t-do-exceptions-work-at-all-with-this-compiler.cc
  
  $(top_builddir)/tests/libtests.la:
         cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/tests/cxx/Makefile.in b/tests/cxx/Makefile.in

index 86592f9928e1cc6e03ca3337b8bb745e9d1fdd3a..9b56844618f5ba84e15a56cd381c9f1b1c3a5598 100644 (file)
--- a/tests/cxx/Makefile.in
+++ b/tests/cxx/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -17,21 +17,38 @@
  
  # Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -50,19 +67,22 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
-@WANT_CXX_TRUE@check_PROGRAMS = t-assign$(EXEEXT) t-binary$(EXEEXT) \
-@WANT_CXX_TRUE@        t-cast$(EXEEXT) t-constr$(EXEEXT) \
-@WANT_CXX_TRUE@        t-headers$(EXEEXT) t-istream$(EXEEXT) \
+@WANT_CXX_TRUE@check_PROGRAMS = t-binary$(EXEEXT) t-cast$(EXEEXT) \
+@WANT_CXX_TRUE@        t-cxx11$(EXEEXT) t-headers$(EXEEXT) \
+@WANT_CXX_TRUE@        t-iostream$(EXEEXT) t-istream$(EXEEXT) \
  @WANT_CXX_TRUE@        t-locale$(EXEEXT) t-misc$(EXEEXT) \
-@WANT_CXX_TRUE@        t-ops$(EXEEXT) t-ostream$(EXEEXT) \
-@WANT_CXX_TRUE@        t-prec$(EXEEXT) t-rand$(EXEEXT) \
-@WANT_CXX_TRUE@        t-ternary$(EXEEXT) t-unary$(EXEEXT)
+@WANT_CXX_TRUE@        t-mix$(EXEEXT) t-ops$(EXEEXT) t-ops2$(EXEEXT) \
+@WANT_CXX_TRUE@        t-ops3$(EXEEXT) t-ostream$(EXEEXT) \
+@WANT_CXX_TRUE@        t-prec$(EXEEXT) t-ternary$(EXEEXT) \
+@WANT_CXX_TRUE@        t-unary$(EXEEXT) \
+@WANT_CXX_TRUE@        t-do-exceptions-work-at-all-with-this-compiler$(EXEEXT) \
+@WANT_CXX_TRUE@        t-assign$(EXEEXT) t-constr$(EXEEXT) \
+@WANT_CXX_TRUE@        t-rand$(EXEEXT)
  subdir = tests/cxx
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -89,17 +109,35 @@ t_constr_OBJECTS = $(am_t_constr_OBJECTS)
  t_constr_LDADD = $(LDADD)
  t_constr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_cxx11_OBJECTS = t-cxx11.$(OBJEXT)
+t_cxx11_OBJECTS = $(am_t_cxx11_OBJECTS)
+t_cxx11_LDADD = $(LDADD)
+t_cxx11_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_do_exceptions_work_at_all_with_this_compiler_OBJECTS =  \
+       t-do-exceptions-work-at-all-with-this-compiler.$(OBJEXT)
+t_do_exceptions_work_at_all_with_this_compiler_OBJECTS =  \
+       $(am_t_do_exceptions_work_at_all_with_this_compiler_OBJECTS)
+t_do_exceptions_work_at_all_with_this_compiler_LDADD = $(LDADD)
+t_do_exceptions_work_at_all_with_this_compiler_DEPENDENCIES =  \
+       $(top_builddir)/tests/libtests.la $(top_builddir)/libgmpxx.la \
+       $(top_builddir)/libgmp.la
  am_t_headers_OBJECTS = t-headers.$(OBJEXT)
  t_headers_OBJECTS = $(am_t_headers_OBJECTS)
  t_headers_LDADD = $(LDADD)
  t_headers_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_iostream_OBJECTS = t-iostream.$(OBJEXT)
+t_iostream_OBJECTS = $(am_t_iostream_OBJECTS)
+t_iostream_LDADD = $(LDADD)
+t_iostream_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
  am_t_istream_OBJECTS = t-istream.$(OBJEXT)
  t_istream_OBJECTS = $(am_t_istream_OBJECTS)
  t_istream_LDADD = $(LDADD)
  t_istream_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
-am_t_locale_OBJECTS = t-locale.$(OBJEXT) clocale$U.$(OBJEXT)
+am_t_locale_OBJECTS = t-locale.$(OBJEXT) clocale.$(OBJEXT)
  t_locale_OBJECTS = $(am_t_locale_OBJECTS)
  t_locale_LDADD = $(LDADD)
  t_locale_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
@@ -109,11 +147,26 @@ t_misc_OBJECTS = $(am_t_misc_OBJECTS)
  t_misc_LDADD = $(LDADD)
  t_misc_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_mix_OBJECTS = t-mix.$(OBJEXT)
+t_mix_OBJECTS = $(am_t_mix_OBJECTS)
+t_mix_LDADD = $(LDADD)
+t_mix_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
  am_t_ops_OBJECTS = t-ops.$(OBJEXT)
  t_ops_OBJECTS = $(am_t_ops_OBJECTS)
  t_ops_LDADD = $(LDADD)
  t_ops_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_ops2_OBJECTS = t-ops2.$(OBJEXT)
+t_ops2_OBJECTS = $(am_t_ops2_OBJECTS)
+t_ops2_LDADD = $(LDADD)
+t_ops2_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_ops3_OBJECTS = t-ops3.$(OBJEXT)
+t_ops3_OBJECTS = $(am_t_ops3_OBJECTS)
+t_ops3_LDADD = $(LDADD)
+t_ops3_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
  am_t_ostream_OBJECTS = t-ostream.$(OBJEXT)
  t_ostream_OBJECTS = $(am_t_ostream_OBJECTS)
  t_ostream_LDADD = $(LDADD)
@@ -161,15 +214,26 @@ CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(t_assign_SOURCES) $(t_binary_SOURCES) $(t_cast_SOURCES) \
-       $(t_constr_SOURCES) $(t_headers_SOURCES) $(t_istream_SOURCES) \
-       $(t_locale_SOURCES) $(t_misc_SOURCES) $(t_ops_SOURCES) \
-       $(t_ostream_SOURCES) $(t_prec_SOURCES) $(t_rand_SOURCES) \
-       $(t_ternary_SOURCES) $(t_unary_SOURCES)
+       $(t_constr_SOURCES) $(t_cxx11_SOURCES) \
+       $(t_do_exceptions_work_at_all_with_this_compiler_SOURCES) \
+       $(t_headers_SOURCES) $(t_iostream_SOURCES) \
+       $(t_istream_SOURCES) $(t_locale_SOURCES) $(t_misc_SOURCES) \
+       $(t_mix_SOURCES) $(t_ops_SOURCES) $(t_ops2_SOURCES) \
+       $(t_ops3_SOURCES) $(t_ostream_SOURCES) $(t_prec_SOURCES) \
+       $(t_rand_SOURCES) $(t_ternary_SOURCES) $(t_unary_SOURCES)
  DIST_SOURCES = $(t_assign_SOURCES) $(t_binary_SOURCES) \
-       $(t_cast_SOURCES) $(t_constr_SOURCES) $(t_headers_SOURCES) \
+       $(t_cast_SOURCES) $(t_constr_SOURCES) $(t_cxx11_SOURCES) \
+       $(t_do_exceptions_work_at_all_with_this_compiler_SOURCES) \
+       $(t_headers_SOURCES) $(t_iostream_SOURCES) \
         $(t_istream_SOURCES) $(t_locale_SOURCES) $(t_misc_SOURCES) \
-       $(t_ops_SOURCES) $(t_ostream_SOURCES) $(t_prec_SOURCES) \
+       $(t_mix_SOURCES) $(t_ops_SOURCES) $(t_ops2_SOURCES) \
+       $(t_ops3_SOURCES) $(t_ostream_SOURCES) $(t_prec_SOURCES) \
         $(t_rand_SOURCES) $(t_ternary_SOURCES) $(t_unary_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  am__tty_colors = \
@@ -273,8 +337,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -321,7 +385,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -340,28 +403,38 @@ top_srcdir = @top_srcdir@
  # 3.2 on itanium2-hp-hpux11.22.  Without this option, the libgmp.sl.6
  # required by libgmpxx.sl (ie. in its NEEDED records) is not found by the
  # linker.  FIXME: Presumably libtool should do something about this itself.
+# -lm is needed for t-ops2 which compares the results of trunc and mpf_trunc.
  #
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
  LDADD = -L$(top_builddir)/.libs \
    $(top_builddir)/tests/libtests.la \
    $(top_builddir)/libgmpxx.la \
-  $(top_builddir)/libgmp.la
+  $(top_builddir)/libgmp.la \
+  -lm
  
  @WANT_CXX_TRUE@TESTS = $(check_PROGRAMS)
  t_assign_SOURCES = t-assign.cc
  t_binary_SOURCES = t-binary.cc
  t_cast_SOURCES = t-cast.cc
  t_constr_SOURCES = t-constr.cc
+t_cxx11_SOURCES = t-cxx11.cc
  t_headers_SOURCES = t-headers.cc
+t_iostream_SOURCES = t-iostream.cc
  t_istream_SOURCES = t-istream.cc
  t_locale_SOURCES = t-locale.cc clocale.c
  t_misc_SOURCES = t-misc.cc
+t_mix_SOURCES = t-mix.cc
  t_ops_SOURCES = t-ops.cc
+t_ops2_SOURCES = t-ops2.cc
+t_ops3_SOURCES = t-ops3.cc
  t_ostream_SOURCES = t-ostream.cc
  t_prec_SOURCES = t-prec.cc
  t_rand_SOURCES = t-rand.cc
  t_ternary_SOURCES = t-ternary.cc
  t_unary_SOURCES = t-unary.cc
+t_do_exceptions_work_at_all_with_this_compiler_SOURCES = \
+  t-do-exceptions-work-at-all-with-this-compiler.cc
+
  
  # Libtool (1.5) somehow botches its uninstalled shared library setups on
  # OpenBSD 3.2, making the C++ test programs here fail.  libgmpxx.so ends up
@@ -418,46 +491,64 @@ clean-checkPROGRAMS:
         list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
         echo " rm -f" $$list; \
         rm -f $$list
-t-assign$(EXEEXT): $(t_assign_OBJECTS) $(t_assign_DEPENDENCIES) 
+t-assign$(EXEEXT): $(t_assign_OBJECTS) $(t_assign_DEPENDENCIES) $(EXTRA_t_assign_DEPENDENCIES) 
         @rm -f t-assign$(EXEEXT)
         $(CXXLINK) $(t_assign_OBJECTS) $(t_assign_LDADD) $(LIBS)
-t-binary$(EXEEXT): $(t_binary_OBJECTS) $(t_binary_DEPENDENCIES) 
+t-binary$(EXEEXT): $(t_binary_OBJECTS) $(t_binary_DEPENDENCIES) $(EXTRA_t_binary_DEPENDENCIES) 
         @rm -f t-binary$(EXEEXT)
         $(CXXLINK) $(t_binary_OBJECTS) $(t_binary_LDADD) $(LIBS)
-t-cast$(EXEEXT): $(t_cast_OBJECTS) $(t_cast_DEPENDENCIES) 
+t-cast$(EXEEXT): $(t_cast_OBJECTS) $(t_cast_DEPENDENCIES) $(EXTRA_t_cast_DEPENDENCIES) 
         @rm -f t-cast$(EXEEXT)
         $(CXXLINK) $(t_cast_OBJECTS) $(t_cast_LDADD) $(LIBS)
-t-constr$(EXEEXT): $(t_constr_OBJECTS) $(t_constr_DEPENDENCIES) 
+t-constr$(EXEEXT): $(t_constr_OBJECTS) $(t_constr_DEPENDENCIES) $(EXTRA_t_constr_DEPENDENCIES) 
         @rm -f t-constr$(EXEEXT)
         $(CXXLINK) $(t_constr_OBJECTS) $(t_constr_LDADD) $(LIBS)
-t-headers$(EXEEXT): $(t_headers_OBJECTS) $(t_headers_DEPENDENCIES) 
+t-cxx11$(EXEEXT): $(t_cxx11_OBJECTS) $(t_cxx11_DEPENDENCIES) $(EXTRA_t_cxx11_DEPENDENCIES) 
+       @rm -f t-cxx11$(EXEEXT)
+       $(CXXLINK) $(t_cxx11_OBJECTS) $(t_cxx11_LDADD) $(LIBS)
+t-do-exceptions-work-at-all-with-this-compiler$(EXEEXT): $(t_do_exceptions_work_at_all_with_this_compiler_OBJECTS) $(t_do_exceptions_work_at_all_with_this_compiler_DEPENDENCIES) $(EXTRA_t_do_exceptions_work_at_all_with_this_compiler_DEPENDENCIES) 
+       @rm -f t-do-exceptions-work-at-all-with-this-compiler$(EXEEXT)
+       $(CXXLINK) $(t_do_exceptions_work_at_all_with_this_compiler_OBJECTS) $(t_do_exceptions_work_at_all_with_this_compiler_LDADD) $(LIBS)
+t-headers$(EXEEXT): $(t_headers_OBJECTS) $(t_headers_DEPENDENCIES) $(EXTRA_t_headers_DEPENDENCIES) 
         @rm -f t-headers$(EXEEXT)
         $(CXXLINK) $(t_headers_OBJECTS) $(t_headers_LDADD) $(LIBS)
-t-istream$(EXEEXT): $(t_istream_OBJECTS) $(t_istream_DEPENDENCIES) 
+t-iostream$(EXEEXT): $(t_iostream_OBJECTS) $(t_iostream_DEPENDENCIES) $(EXTRA_t_iostream_DEPENDENCIES) 
+       @rm -f t-iostream$(EXEEXT)
+       $(CXXLINK) $(t_iostream_OBJECTS) $(t_iostream_LDADD) $(LIBS)
+t-istream$(EXEEXT): $(t_istream_OBJECTS) $(t_istream_DEPENDENCIES) $(EXTRA_t_istream_DEPENDENCIES) 
         @rm -f t-istream$(EXEEXT)
         $(CXXLINK) $(t_istream_OBJECTS) $(t_istream_LDADD) $(LIBS)
-t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES) 
+t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES) $(EXTRA_t_locale_DEPENDENCIES) 
         @rm -f t-locale$(EXEEXT)
         $(CXXLINK) $(t_locale_OBJECTS) $(t_locale_LDADD) $(LIBS)
-t-misc$(EXEEXT): $(t_misc_OBJECTS) $(t_misc_DEPENDENCIES) 
+t-misc$(EXEEXT): $(t_misc_OBJECTS) $(t_misc_DEPENDENCIES) $(EXTRA_t_misc_DEPENDENCIES) 
         @rm -f t-misc$(EXEEXT)
         $(CXXLINK) $(t_misc_OBJECTS) $(t_misc_LDADD) $(LIBS)
-t-ops$(EXEEXT): $(t_ops_OBJECTS) $(t_ops_DEPENDENCIES) 
+t-mix$(EXEEXT): $(t_mix_OBJECTS) $(t_mix_DEPENDENCIES) $(EXTRA_t_mix_DEPENDENCIES) 
+       @rm -f t-mix$(EXEEXT)
+       $(CXXLINK) $(t_mix_OBJECTS) $(t_mix_LDADD) $(LIBS)
+t-ops$(EXEEXT): $(t_ops_OBJECTS) $(t_ops_DEPENDENCIES) $(EXTRA_t_ops_DEPENDENCIES) 
         @rm -f t-ops$(EXEEXT)
         $(CXXLINK) $(t_ops_OBJECTS) $(t_ops_LDADD) $(LIBS)
-t-ostream$(EXEEXT): $(t_ostream_OBJECTS) $(t_ostream_DEPENDENCIES) 
+t-ops2$(EXEEXT): $(t_ops2_OBJECTS) $(t_ops2_DEPENDENCIES) $(EXTRA_t_ops2_DEPENDENCIES) 
+       @rm -f t-ops2$(EXEEXT)
+       $(CXXLINK) $(t_ops2_OBJECTS) $(t_ops2_LDADD) $(LIBS)
+t-ops3$(EXEEXT): $(t_ops3_OBJECTS) $(t_ops3_DEPENDENCIES) $(EXTRA_t_ops3_DEPENDENCIES) 
+       @rm -f t-ops3$(EXEEXT)
+       $(CXXLINK) $(t_ops3_OBJECTS) $(t_ops3_LDADD) $(LIBS)
+t-ostream$(EXEEXT): $(t_ostream_OBJECTS) $(t_ostream_DEPENDENCIES) $(EXTRA_t_ostream_DEPENDENCIES) 
         @rm -f t-ostream$(EXEEXT)
         $(CXXLINK) $(t_ostream_OBJECTS) $(t_ostream_LDADD) $(LIBS)
-t-prec$(EXEEXT): $(t_prec_OBJECTS) $(t_prec_DEPENDENCIES) 
+t-prec$(EXEEXT): $(t_prec_OBJECTS) $(t_prec_DEPENDENCIES) $(EXTRA_t_prec_DEPENDENCIES) 
         @rm -f t-prec$(EXEEXT)
         $(CXXLINK) $(t_prec_OBJECTS) $(t_prec_LDADD) $(LIBS)
-t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES) 
+t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES) $(EXTRA_t_rand_DEPENDENCIES) 
         @rm -f t-rand$(EXEEXT)
         $(CXXLINK) $(t_rand_OBJECTS) $(t_rand_LDADD) $(LIBS)
-t-ternary$(EXEEXT): $(t_ternary_OBJECTS) $(t_ternary_DEPENDENCIES) 
+t-ternary$(EXEEXT): $(t_ternary_OBJECTS) $(t_ternary_DEPENDENCIES) $(EXTRA_t_ternary_DEPENDENCIES) 
         @rm -f t-ternary$(EXEEXT)
         $(CXXLINK) $(t_ternary_OBJECTS) $(t_ternary_LDADD) $(LIBS)
-t-unary$(EXEEXT): $(t_unary_OBJECTS) $(t_unary_DEPENDENCIES) 
+t-unary$(EXEEXT): $(t_unary_OBJECTS) $(t_unary_DEPENDENCIES) $(EXTRA_t_unary_DEPENDENCIES) 
         @rm -f t-unary$(EXEEXT)
         $(CXXLINK) $(t_unary_OBJECTS) $(t_unary_LDADD) $(LIBS)
  
@@ -466,11 +557,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -480,9 +566,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-clocale_.c: clocale.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clocale.c; then echo $(srcdir)/clocale.c; else echo clocale.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clocale_.$(OBJEXT) clocale_.lo : $(ANSI2KNR)
  
  .cc.o:
         $(CXXCOMPILE) -c -o $@ $<
@@ -632,14 +715,15 @@ check-TESTS: $(TESTS)
           fi; \
           dashes=`echo "$$dashes" | sed s/./=/g`; \
           if test "$$failed" -eq 0; then \
-           echo "$$grn$$dashes"; \
+           col="$$grn"; \
           else \
-           echo "$$red$$dashes"; \
+           col="$$red"; \
           fi; \
-         echo "$$banner"; \
-         test -z "$$skipped" || echo "$$skipped"; \
-         test -z "$$report" || echo "$$report"; \
-         echo "$$dashes$$std"; \
+         echo "$${col}$$dashes$${std}"; \
+         echo "$${col}$$banner$${std}"; \
+         test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+         test -z "$$report" || echo "$${col}$$report$${std}"; \
+         echo "$${col}$$dashes$${std}"; \
           test "$$failed" -eq 0; \
         else :; fi
  
@@ -689,10 +773,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -760,7 +849,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -773,7 +862,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
         clean-checkPROGRAMS clean-generic clean-libtool ctags \
@@ -786,8 +875,8 @@ uninstall-am:
         install-ps install-ps-am install-strip installcheck \
         installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  $(top_builddir)/tests/libtests.la:
diff --git a/tests/cxx/clocale.c b/tests/cxx/clocale.c

index 9c942b176d48723dcf560e195f03f13c87273a63..9e93280b909e743b144ebc497db23ce512001ad7 100644 (file)
--- a/tests/cxx/clocale.c
+++ b/tests/cxx/clocale.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
diff --git a/tests/cxx/t-assign.cc b/tests/cxx/t-assign.cc

index 407f7262658d5d6dc67c7516efbf77dbe6399e6b..2127b567999a64b0a1756b6d426eecb12d836c6d 100644 (file)
--- a/tests/cxx/t-assign.cc
+++ b/tests/cxx/t-assign.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
@@ -27,7 +27,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-using namespace std;
+using std::string;
+using std::invalid_argument;
  
  
  void
@@ -185,6 +186,35 @@ check_mpz (void)
      } catch (invalid_argument) {
      }
    }
+
+  // swap(mpz_class &)
+  {
+    mpz_class a(123);
+    mpz_class b(456);
+    a.swap(b);
+    a.swap(a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+
+  // swap(mpz_class &, mpz_class &)
+  {
+    mpz_class a(123);
+    mpz_class b(456);
+    ::swap(a, b);
+    ::swap(a, a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+  {
+    using std::swap;
+    mpz_class a(123);
+    mpz_class b(456);
+    swap(a, b);
+    swap(a, a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
  }
  
  void
@@ -257,7 +287,7 @@ check_mpq (void)
    // operator=(unsigned short int)
    {
      unsigned short int a = 54321u;
-    mpz_class b;
+    mpq_class b;
      b = a; ASSERT_ALWAYS(b == 54321u);
    }
  
@@ -342,6 +372,35 @@ check_mpq (void)
      } catch (invalid_argument) {
      }
    }
+
+  // swap(mpq_class &)
+  {
+    mpq_class a(3, 2);
+    mpq_class b(-1, 4);
+    a.swap(b);
+    a.swap(a);
+    ASSERT_ALWAYS(a == -.25);
+    ASSERT_ALWAYS(b == 1.5);
+  }
+
+  // swap(mpq_class &, mpq_class &)
+  {
+    mpq_class a(3, 2);
+    mpq_class b(-1, 4);
+    ::swap(a, b);
+    ::swap(a, a);
+    ASSERT_ALWAYS(a == -.25);
+    ASSERT_ALWAYS(b == 1.5);
+  }
+  {
+    using std::swap;
+    mpq_class a(3, 2);
+    mpq_class b(-1, 4);
+    swap(a, b);
+    swap(a, a);
+    ASSERT_ALWAYS(a == -.25);
+    ASSERT_ALWAYS(b == 1.5);
+  }
  }
  
  void
@@ -499,6 +558,35 @@ check_mpf (void)
      } catch (invalid_argument) {
      }
    }
+
+  // swap(mpf_class &)
+  {
+    mpf_class a(123);
+    mpf_class b(456);
+    a.swap(b);
+    a.swap(a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+
+  // swap(mpf_class &, mpf_class &)
+  {
+    mpf_class a(123);
+    mpf_class b(456);
+    ::swap(a, b);
+    ::swap(a, a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
+  {
+    using std::swap;
+    mpf_class a(123);
+    mpf_class b(456);
+    swap(a, b);
+    swap(a, a);
+    ASSERT_ALWAYS(a == 456);
+    ASSERT_ALWAYS(b == 123);
+  }
  }
  
  
diff --git a/tests/cxx/t-binary.cc b/tests/cxx/t-binary.cc

index 80996626989157c3083674e435494f39ac05bbbc..04c4627f726db43ba31a26276e85fbc96dc08e08 100644 (file)
--- a/tests/cxx/t-binary.cc
+++ b/tests/cxx/t-binary.cc
@@ -1,21 +1,21 @@
  /* Test mp*_class binary expressions.
  
-Copyright 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2003, 2008, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
@@ -94,6 +94,7 @@ check_mpz (void)
    {
      mpz_class a(3), b(4);
      mpz_class c(a * (-b)); ASSERT_ALWAYS(c == -12);
+    c = c * (-b); ASSERT_ALWAYS(c == 48);
    }
  
    // template <class T, class U, class Op>
@@ -102,6 +103,7 @@ check_mpz (void)
      mpz_class a(3), b(2), c(1);
      mpz_class d;
      d = (a % b) + c; ASSERT_ALWAYS(d == 2);
+    d = (a % b) + d; ASSERT_ALWAYS(d == 3);
    }
  
    // template <class T, class U, class V, class Op>
@@ -146,6 +148,8 @@ check_mpz (void)
      mpz_class a(3), b(5), c(7);
      mpz_class d;
      d = (a - b) * (-c); ASSERT_ALWAYS(d == 14);
+    d = (b - d) * (-a); ASSERT_ALWAYS(d == 27);
+    d = (a - b) * (-d); ASSERT_ALWAYS(d == 54);
    }
  
    {
@@ -215,6 +219,7 @@ check_mpq (void)
      double c = 2.0;
      mpq_class d;
      d = a * (b + c); ASSERT_ALWAYS(d == 2);
+    d = d * (b + c); ASSERT_ALWAYS(d == 6);
    }
  
    // template <class T, class U, class V, class Op>
@@ -229,6 +234,7 @@ check_mpq (void)
      mpz_class b(1), c(4);
      mpq_class d;
      d = (b - c) * a; ASSERT_ALWAYS(d == -2);
+    d = (b - c) * d; ASSERT_ALWAYS(d == 6);
    }
  
    // template <class T, class U, class Op>
@@ -237,6 +243,7 @@ check_mpq (void)
      mpq_class a(1, 3), b(3, 4);
      mpq_class c;
      c = a * (-b); ASSERT_ALWAYS(c == -0.25);
+    a = a * (-b); ASSERT_ALWAYS(a == -0.25);
    }
  
    // template <class T, class U, class Op>
@@ -244,6 +251,7 @@ check_mpq (void)
    {
      mpq_class a(1, 3), b(2, 3), c(1, 4);
      mpq_class d((a / b) + c); ASSERT_ALWAYS(d == 0.75);
+    c = (a / b) + c; ASSERT_ALWAYS(c == 0.75);
    }
  
    // template <class T, class U, class V, class Op>
@@ -269,12 +277,17 @@ check_mpq (void)
      mpq_class a(1, 2), b(1, 4);
      mpz_class c(1);
      mpq_class d((a + b) - c); ASSERT_ALWAYS(d == -0.25);
+    d = (a + d) - c; ASSERT_ALWAYS(d == -0.75);
+    d = (a + d) - d.get_num(); ASSERT_ALWAYS(d == 2.75);
+    d = (2 * d) * d.get_den(); ASSERT_ALWAYS(d == 22);
+    d = (b * d) / -d.get_num(); ASSERT_ALWAYS(d == -0.25);
    }
    {
      mpq_class a(1, 3), b(3, 2);
      mpz_class c(2), d(4);
      mpq_class e;
      e = (a * b) / (c - d); ASSERT_ALWAYS(e == -0.25);
+    e = (2 * e) / (c - d); ASSERT_ALWAYS(e ==  0.25);
    }
  
    // template <class T, class U, class V, class W, class Op>
@@ -290,6 +303,7 @@ check_mpq (void)
      signed int d = 4;
      mpq_class e;
      e = (c % d) / (a * b); ASSERT_ALWAYS(e == 10);
+    e = (e.get_num() % d) / (2 / e); ASSERT_ALWAYS(e == 10);
    }
  
    // template <class T, class U, class V, class Op>
@@ -298,6 +312,8 @@ check_mpq (void)
      mpq_class a(1, 3), b(3, 4), c(2, 5);
      mpq_class d;
      d = (a * b) / (-c); ASSERT_ALWAYS(d == -0.625);
+    d = (c * d) / (-b); ASSERT_ALWAYS(3 * d == 1);
+    d = (a * c) / (-d); ASSERT_ALWAYS(5 * d == -2);
    }
  }
  
@@ -350,6 +366,7 @@ check_mpf (void)
      mpz_class b(2), c(1);
      mpf_class d;
      d = a * (b + c); ASSERT_ALWAYS(d == 9);
+    a = a * (b + c); ASSERT_ALWAYS(a == 9);
    }
  
    // template <class T, class U, class V, class Op>
@@ -366,6 +383,7 @@ check_mpf (void)
      mpf_class a(2), b(-3);
      mpf_class c;
      c = a * (-b); ASSERT_ALWAYS(c == 6);
+    c = c * (-b); ASSERT_ALWAYS(c == 18);
    }
  
    // template <class T, class U, class Op>
@@ -374,6 +392,7 @@ check_mpf (void)
      mpf_class a(3), b(4), c(5);
      mpf_class d;
      d = (a / b) - c; ASSERT_ALWAYS(d == -4.25);
+    c = (a / b) - c; ASSERT_ALWAYS(c == -4.25);
    }
  
    // template <class T, class U, class V, class Op>
diff --git a/tests/cxx/t-cast.cc b/tests/cxx/t-cast.cc

index 58fda585a1606e011c895aae8a684d626cceacdc..a546d6bb8ed7f704f3ac9cbd9a2e931c36909d22 100644 (file)
--- a/tests/cxx/t-cast.cc
+++ b/tests/cxx/t-cast.cc
@@ -2,20 +2,20 @@
  
  Copyright 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "gmp.h"
  #include "gmpxx.h"
diff --git a/tests/cxx/t-constr.cc b/tests/cxx/t-constr.cc

index 6d588d31d39c40ce0fda0755efe68a565ce8d3b6..261005d8676acbcf774b27b3d1cc796071739984 100644 (file)
--- a/tests/cxx/t-constr.cc
+++ b/tests/cxx/t-constr.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
@@ -320,6 +320,7 @@ check_mpq (void)
      const char *a = "FFFF";
      int base = 16;
      mpq_class b(a, base); ASSERT_ALWAYS(b == 65535u);
+    mpq_class c(0, 1); ASSERT_ALWAYS(c == 0);
    }
  
    // mpq_class(const std::string &)
diff --git a/tests/cxx/t-cxx11.cc b/tests/cxx/t-cxx11.cc

new file mode 100644 (file)

index 0000000..c9ae822
--- /dev/null
+++ b/tests/cxx/t-cxx11.cc
@@ -0,0 +1,220 @@
+/* Test C++11 features
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMPXX_USE_CXX11
+
+#include <utility>
+#include <type_traits>
+
+void check_noexcept ()
+{
+  mpz_class z1, z2;
+  mpq_class q1, q2;
+  mpf_class f1, f2;
+  static_assert(noexcept(z1 = std::move(z2)), "sorry");
+  static_assert(noexcept(q1 = std::move(q2)), "sorry");
+  static_assert(noexcept(f1 = std::move(f2)), "sorry");
+  static_assert(noexcept(q1 = std::move(z1)), "sorry");
+}
+
+void check_common_type ()
+{
+#define CHECK_COMMON_TYPE(T, U, Res) \
+  static_assert(std::is_same<std::common_type<T, U>::type, Res>::value, "sorry")
+#define CHECK_COMMON_TYPE_BUILTIN1(T, Res) \
+  CHECK_COMMON_TYPE(  signed char , T, Res); \
+  CHECK_COMMON_TYPE(unsigned char , T, Res); \
+  CHECK_COMMON_TYPE(  signed short, T, Res); \
+  CHECK_COMMON_TYPE(unsigned short, T, Res); \
+  CHECK_COMMON_TYPE(  signed int  , T, Res); \
+  CHECK_COMMON_TYPE(unsigned int  , T, Res); \
+  CHECK_COMMON_TYPE(  signed long , T, Res); \
+  CHECK_COMMON_TYPE(unsigned long , T, Res); \
+  CHECK_COMMON_TYPE(float , T, Res); \
+  CHECK_COMMON_TYPE(double, T, Res)
+#define CHECK_COMMON_TYPE_BUILTIN2(T, Res) \
+  CHECK_COMMON_TYPE(T,   signed char , Res); \
+  CHECK_COMMON_TYPE(T, unsigned char , Res); \
+  CHECK_COMMON_TYPE(T,   signed short, Res); \
+  CHECK_COMMON_TYPE(T, unsigned short, Res); \
+  CHECK_COMMON_TYPE(T,   signed int  , Res); \
+  CHECK_COMMON_TYPE(T, unsigned int  , Res); \
+  CHECK_COMMON_TYPE(T,   signed long , Res); \
+  CHECK_COMMON_TYPE(T, unsigned long , Res); \
+  CHECK_COMMON_TYPE(T, float , Res); \
+  CHECK_COMMON_TYPE(T, double, Res)
+#define CHECK_COMMON_TYPE_BUILTIN(T, Res) \
+  CHECK_COMMON_TYPE_BUILTIN1(T, Res); \
+  CHECK_COMMON_TYPE_BUILTIN2(T, Res)
+  /* These would just work with implicit conversions */
+  CHECK_COMMON_TYPE (mpz_class, mpq_class, mpq_class);
+  CHECK_COMMON_TYPE (mpz_class, mpf_class, mpf_class);
+  CHECK_COMMON_TYPE (mpf_class, mpq_class, mpf_class);
+
+  CHECK_COMMON_TYPE_BUILTIN (mpz_class, mpz_class);
+  CHECK_COMMON_TYPE_BUILTIN (mpq_class, mpq_class);
+  CHECK_COMMON_TYPE_BUILTIN (mpf_class, mpf_class);
+
+  mpz_class z; mpq_class q; mpf_class f;
+
+  CHECK_COMMON_TYPE (decltype(-z), mpz_class, mpz_class);
+  CHECK_COMMON_TYPE (decltype(-q), mpq_class, mpq_class);
+  CHECK_COMMON_TYPE (decltype(-f), mpf_class, mpf_class);
+
+  CHECK_COMMON_TYPE (decltype(-z), mpq_class, mpq_class);
+  CHECK_COMMON_TYPE (decltype(-z), mpf_class, mpf_class);
+  CHECK_COMMON_TYPE (decltype(-q), mpf_class, mpf_class);
+
+  /* These require a common_type specialization */
+  CHECK_COMMON_TYPE (decltype(-z), decltype(z+z), mpz_class);
+  CHECK_COMMON_TYPE (decltype(-q), decltype(q+q), mpq_class);
+  CHECK_COMMON_TYPE (decltype(-f), decltype(f+f), mpf_class);
+
+  CHECK_COMMON_TYPE (decltype(-q), mpz_class, mpq_class);
+  CHECK_COMMON_TYPE (decltype(-f), mpz_class, mpf_class);
+  CHECK_COMMON_TYPE (decltype(-f), mpq_class, mpf_class);
+
+  CHECK_COMMON_TYPE (decltype(-z), decltype(-q), mpq_class);
+  CHECK_COMMON_TYPE (decltype(-z), decltype(-f), mpf_class);
+  CHECK_COMMON_TYPE (decltype(-q), decltype(-f), mpf_class);
+
+  /* These could be broken by a naive common_type specialization */
+  CHECK_COMMON_TYPE (decltype(-z), decltype(-z), decltype(-z));
+  CHECK_COMMON_TYPE (decltype(-q), decltype(-q), decltype(-q));
+  CHECK_COMMON_TYPE (decltype(-f), decltype(-f), decltype(-f));
+
+  /* Painful */
+  CHECK_COMMON_TYPE_BUILTIN (decltype(-z), mpz_class);
+  CHECK_COMMON_TYPE_BUILTIN (decltype(-q), mpq_class);
+  CHECK_COMMON_TYPE_BUILTIN (decltype(-f), mpf_class);
+}
+
+template<class T, class U = T>
+void check_move_init ()
+{
+  {
+    // Delete moved-from x1
+    T x1 = 3;
+    U x2 = std::move(x1);
+    ASSERT_ALWAYS (x2 == 3);
+  }
+  {
+    // Assign to moved-from x1
+    T x1 = 2;
+    U x2 = std::move(x1);
+    x1 = -7;
+    ASSERT_ALWAYS (x1 == -7);
+    ASSERT_ALWAYS (x2 == 2);
+  }
+}
+
+template<class T, class U = T>
+void check_move_assign ()
+{
+  {
+    // Delete moved-from x1
+    T x1 = 3; U x2;
+    x2 = std::move(x1);
+    ASSERT_ALWAYS (x2 == 3);
+  }
+  {
+    // Assign to moved-from x1
+    T x1 = 2; U x2;
+    x2 = std::move(x1);
+    x1 = -7;
+    ASSERT_ALWAYS (x1 == -7);
+    ASSERT_ALWAYS (x2 == 2);
+  }
+  {
+    // Self move-assign (not necessary, but it happens to work...)
+    T x = 4;
+    x = std::move(x);
+    ASSERT_ALWAYS (x == 4);
+  }
+}
+
+void check_user_defined_literal ()
+{
+  ASSERT_ALWAYS (123_mpz % 5 == 3);
+  ASSERT_ALWAYS (-11_mpq / 22 == -.5);
+  ASSERT_ALWAYS (112.5e-1_mpf * 4 == 45);
+  {
+    mpz_class ref ( "123456789abcdef0123456789abcdef0123", 16);
+    ASSERT_ALWAYS (0x123456789abcdef0123456789abcdef0123_mpz == ref);
+  }
+}
+
+// Check for explicit conversion to bool
+void implicit_bool(bool);
+int implicit_bool(...);
+
+void check_bool_conversion ()
+{
+  const mpz_class zn = -2;
+  const mpq_class qn = -2;
+  const mpf_class fn = -2;
+  const mpz_class z0 =  0;
+  const mpq_class q0 =  0;
+  const mpf_class f0 =  0;
+  const mpz_class zp = +2;
+  const mpq_class qp = +2;
+  const mpf_class fp = +2;
+  if (zn && qn && fn && zp && qp && fp && !z0 && !q0 && !f0)
+    {
+      if (z0 || q0 || f0) ASSERT_ALWAYS(false);
+    }
+  else ASSERT_ALWAYS(false);
+  decltype(implicit_bool(zn)) zi = 1;
+  decltype(implicit_bool(qn)) qi = 1;
+  decltype(implicit_bool(fn)) fi = 1;
+  (void)(zi+qi+fi);
+}
+
+int
+main (void)
+{
+  tests_start();
+
+  check_noexcept();
+  check_common_type();
+  check_move_init<mpz_class>();
+  check_move_init<mpq_class>();
+  check_move_init<mpf_class>();
+  check_move_assign<mpz_class>();
+  check_move_assign<mpq_class>();
+  check_move_assign<mpf_class>();
+  check_move_init<mpz_class,mpq_class>();
+  check_move_assign<mpz_class,mpq_class>();
+  check_user_defined_literal();
+  check_bool_conversion();
+
+  tests_end();
+  return 0;
+}
+
+#else
+int main () { return 0; }
+#endif
diff --git a/tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc b/tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc

new file mode 100644 (file)

index 0000000..7244c32
--- /dev/null
+++ b/tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc
@@ -0,0 +1,38 @@
+/* Test if the compiler has working try / throw / catch.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdexcept>
+
+inline void
+throw_expr ()
+{
+  throw std::invalid_argument ("Test");
+}
+
+using namespace std;
+
+int
+main ()
+{
+  try
+  {
+    throw_expr();
+  }
+  catch (invalid_argument) { }
+}
diff --git a/tests/cxx/t-headers.cc b/tests/cxx/t-headers.cc

index 0f7b74f2c79585e4ee1c86b9122a875aaa8cd7f3..19be2014e34e9e51716b0e026cb0e9b02378a213 100644 (file)
--- a/tests/cxx/t-headers.cc
+++ b/tests/cxx/t-headers.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "gmpxx.h"
  
diff --git a/tests/cxx/t-iostream.cc b/tests/cxx/t-iostream.cc

new file mode 100644 (file)

index 0000000..1fe83b3
--- /dev/null
+++ b/tests/cxx/t-iostream.cc
@@ -0,0 +1,107 @@
+/* Test stream formatted input and output on mp*_class
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <sstream>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+// The tests are extremely basic. These functions just forward to the
+// ones tested in t-istream.cc and t-ostream.cc; we rely on those for
+// advanced tests and only check the syntax here.
+
+void
+checki ()
+{
+  {
+    istringstream i("123");
+    mpz_class x;
+    i >> x;
+    ASSERT_ALWAYS (x == 123);
+  }
+  {
+    istringstream i("3/4");
+    mpq_class x;
+    i >> x;
+    ASSERT_ALWAYS (x == .75);
+  }
+  {
+    istringstream i("1.5");
+    mpf_class x;
+    i >> x;
+    ASSERT_ALWAYS (x == 1.5);
+  }
+}
+
+void
+checko ()
+{
+  {
+    ostringstream o;
+    mpz_class x=123;
+    o << x;
+    ASSERT_ALWAYS (o.str() == "123");
+  }
+  {
+    ostringstream o;
+    mpz_class x=123;
+    o << (x+1);
+    ASSERT_ALWAYS (o.str() == "124");
+  }
+  {
+    ostringstream o;
+    mpq_class x(3,4);
+    o << x;
+    ASSERT_ALWAYS (o.str() == "3/4");
+  }
+  {
+    ostringstream o;
+    mpq_class x(3,4);
+    o << (x+1);
+    ASSERT_ALWAYS (o.str() == "7/4");
+  }
+  {
+    ostringstream o;
+    mpf_class x=1.5;
+    o << x;
+    ASSERT_ALWAYS (o.str() == "1.5");
+  }
+  {
+    ostringstream o;
+    mpf_class x=1.5;
+    o << (x+1);
+    ASSERT_ALWAYS (o.str() == "2.5");
+  }
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  checki ();
+  checko ();
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/cxx/t-istream.cc b/tests/cxx/t-istream.cc

index 59600be16695e21c37f33c39d334787c9359aa5a..6cd806bbd0d7e14854efdac34dfdbe8f7462f5d6 100644 (file)
--- a/tests/cxx/t-istream.cc
+++ b/tests/cxx/t-istream.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <iostream>
  #include <cstdlib>
@@ -37,7 +37,7 @@ using namespace std;
  // since there's no mantissa digits, but g++ reads the whole thing and only
  // then decides it's bad.
  
-int   option_check_standard = 0;
+bool option_check_standard = false;
  
  
  // On some versions of g++ 2.96 it's been observed that putback() may leave
@@ -45,7 +45,7 @@ int   option_check_standard = 0;
  // result of a bug, since for instance it's ok in g++ 2.95 and g++ 3.3.  We
  // detect the problem at runtime and disable affected checks.
  
-int putback_tellg_works = 1;
+bool putback_tellg_works = true;
  
  void
  check_putback_tellg (void)
@@ -63,7 +63,7 @@ check_putback_tellg (void)
      {
        cout << "Warning, istringstream has a bug: putback() doesn't update tellg().\n";;
        cout << "Tests on tellg() will be skipped.\n";
-      putback_tellg_works = 0;
+      putback_tellg_works = false;
      }
  }
  
@@ -132,7 +132,8 @@ check_mpz (void)
    };
  
    mpz_t      got, want;
-  int        got_ok, want_ok;
+  bool       got_ok, want_ok;
+  bool       got_eof, want_eof;
    long       got_si, want_si;
    streampos  init_tellg, got_pos, want_pos;
  
@@ -141,8 +142,10 @@ check_mpz (void)
  
    for (size_t i = 0; i < numberof (data); i++)
      {
+      size_t input_length = strlen (data[i].input);
        want_pos = (data[i].want_pos == -1
-                  ? strlen (data[i].input) : data[i].want_pos);
+                  ? input_length : data[i].want_pos);
+      want_eof = (want_pos == streampos(input_length));
  
        want_ok = (data[i].want != NULL);
  
@@ -159,7 +162,8 @@ check_mpz (void)
            want_si = mpz_get_si (want);
  
            input >> got_si;
-          got_ok = (input ? 1 : 0);
+          got_ok = !input.fail();
+          got_eof = input.eof();
            input.clear();
            got_pos = input.tellg() - init_tellg;
  
@@ -175,6 +179,12 @@ check_mpz (void)
                cout << "  got_si:  " << got_si << "\n";
                cout << "  want_si: " << want_si << "\n";
              }
+          if (want_ok && got_eof != want_eof)
+            {
+              WRONG ("stdc++ operator>> wrong EOF state, check_mpz");
+              cout << "  got_eof:  " << got_eof << "\n";
+              cout << "  want_eof: " << want_eof << "\n";
+            }
            if (putback_tellg_works && got_pos != want_pos)
              {
                WRONG ("stdc++ operator>> wrong position, check_mpz");
@@ -190,7 +200,8 @@ check_mpz (void)
  
          mpz_set_ui (got, 0xDEAD);
          input >> got;
-        got_ok = (input ? 1 : 0);
+        got_ok = !input.fail();
+       got_eof = input.eof();
          input.clear();
          got_pos = input.tellg() - init_tellg;
  
@@ -208,6 +219,13 @@ check_mpz (void)
              mpz_trace ("  want", want);
              abort ();
            }
+        if (want_ok && got_eof != want_eof)
+          {
+            WRONG ("mpz operator>> wrong EOF state");
+            cout << "  want_eof: " << want_eof << "\n";
+            cout << "  got_eof:  " << got_eof << "\n";
+            abort ();
+          }
          if (putback_tellg_works && got_pos != want_pos)
            {
              WRONG ("mpz operator>> wrong position");
@@ -271,10 +289,17 @@ check_mpq (void)
  
      { " 123",  0, NULL,  (ios::fmtflags) 0 },   // not without skipws
      { " 123", -1, "123", ios::skipws },
+
+    { "123 /456",    3, "123",  (ios::fmtflags) 0 },
+    { "123/ 456",    4,  NULL,  (ios::fmtflags) 0 },
+    { "123/"    ,   -1,  NULL,  (ios::fmtflags) 0 },
+    { "123 /456",    3, "123",  ios::skipws },
+    { "123/ 456",    4,  NULL,  ios::skipws },
    };
  
    mpq_t      got, want;
-  int        got_ok, want_ok;
+  bool       got_ok, want_ok;
+  bool       got_eof, want_eof;
    long       got_si, want_si;
    streampos  init_tellg, got_pos, want_pos;
  
@@ -283,8 +308,10 @@ check_mpq (void)
  
    for (size_t i = 0; i < numberof (data); i++)
      {
+      size_t input_length = strlen (data[i].input);
        want_pos = (data[i].want_pos == -1
-                  ? strlen (data[i].input) : data[i].want_pos);
+                  ? input_length : data[i].want_pos);
+      want_eof = (want_pos == streampos(input_length));
  
        want_ok = (data[i].want != NULL);
  
@@ -295,7 +322,8 @@ check_mpq (void)
  
        if (option_check_standard
            && mpz_fits_slong_p (mpq_numref(want))
-          && mpz_cmp_ui (mpq_denref(want), 1L) == 0)
+          && mpz_cmp_ui (mpq_denref(want), 1L) == 0
+          && strchr (data[i].input, '/') == NULL)
          {
            istringstream  input (data[i].input);
            input.flags (data[i].flags);
@@ -303,7 +331,8 @@ check_mpq (void)
            want_si = mpz_get_si (mpq_numref(want));
  
            input >> got_si;
-          got_ok = (input ? 1 : 0);
+          got_ok = !input.fail();
+          got_eof = input.eof();
            input.clear();
            got_pos = input.tellg() - init_tellg;
  
@@ -319,6 +348,12 @@ check_mpq (void)
                cout << "  got_si:  " << got_si << "\n";
                cout << "  want_si: " << want_si << "\n";
              }
+          if (want_ok && got_eof != want_eof)
+            {
+              WRONG ("stdc++ operator>> wrong EOF state, check_mpq");
+              cout << "  got_eof:  " << got_eof << "\n";
+              cout << "  want_eof: " << want_eof << "\n";
+            }
            if (putback_tellg_works && got_pos != want_pos)
              {
                WRONG ("stdc++ operator>> wrong position, check_mpq");
@@ -334,7 +369,8 @@ check_mpq (void)
          mpq_set_si (got, 0xDEAD, 0xBEEF);
  
          input >> got;
-        got_ok = (input ? 1 : 0);
+        got_ok = !input.fail();
+       got_eof = input.eof();
          input.clear();
          got_pos = input.tellg() - init_tellg;
  
@@ -355,6 +391,13 @@ check_mpq (void)
              mpq_trace ("  want", want);
              abort ();
            }
+        if (want_ok && got_eof != want_eof)
+          {
+            WRONG ("mpq operator>> wrong EOF state");
+            cout << "  want_eof: " << want_eof << "\n";
+            cout << "  got_eof:  " << got_eof << "\n";
+            abort ();
+          }
          if (putback_tellg_works && got_pos != want_pos)
            {
              WRONG ("mpq operator>> wrong position");
@@ -428,7 +471,8 @@ check_mpf (void)
    };
  
    mpf_t      got, want;
-  int        got_ok, want_ok;
+  bool       got_ok, want_ok;
+  bool       got_eof, want_eof;
    double     got_d, want_d;
    streampos  init_tellg, got_pos, want_pos;
  
@@ -437,8 +481,10 @@ check_mpf (void)
  
    for (size_t i = 0; i < numberof (data); i++)
      {
+      size_t input_length = strlen (data[i].input);
        want_pos = (data[i].want_pos == -1
-                  ? strlen (data[i].input) : data[i].want_pos);
+                  ? input_length : data[i].want_pos);
+      want_eof = (want_pos == streampos(input_length));
  
        want_ok = (data[i].want != NULL);
  
@@ -455,7 +501,8 @@ check_mpf (void)
            init_tellg = input.tellg();
  
            input >> got_d;
-          got_ok = (input ? 1 : 0);
+          got_ok = !input.fail();
+          got_eof = input.eof();
            input.clear();
            got_pos = input.tellg() - init_tellg;
  
@@ -471,6 +518,12 @@ check_mpf (void)
                cout << "  got:   " << got_d << "\n";
                cout << "  want:  " << want_d << "\n";
              }
+          if (want_ok && got_eof != want_eof)
+            {
+              WRONG ("stdc++ operator>> wrong EOF state, check_mpf");
+              cout << "  got_eof:  " << got_eof << "\n";
+              cout << "  want_eof: " << want_eof << "\n";
+            }
            if (putback_tellg_works && got_pos != want_pos)
              {
                WRONG ("stdc++ operator>> wrong position, check_mpf");
@@ -486,7 +539,8 @@ check_mpf (void)
  
          mpf_set_ui (got, 0xDEAD);
          input >> got;
-        got_ok = (input ? 1 : 0);
+        got_ok = !input.fail();
+       got_eof = input.eof();
          input.clear();
          got_pos = input.tellg() - init_tellg;
  
@@ -504,6 +558,13 @@ check_mpf (void)
              mpf_trace ("  want", want);
              abort ();
            }
+        if (want_ok && got_eof != want_eof)
+          {
+            WRONG ("mpf operator>> wrong EOF state");
+            cout << "  want_eof: " << want_eof << "\n";
+            cout << "  got_eof:  " << got_eof << "\n";
+            abort ();
+          }
          if (putback_tellg_works && got_pos != want_pos)
            {
              WRONG ("mpf operator>> wrong position");
@@ -524,7 +585,7 @@ int
  main (int argc, char *argv[])
  {
    if (argc > 1 && strcmp (argv[1], "-s") == 0)
-    option_check_standard = 1;
+    option_check_standard = true;
  
    tests_start ();
  
diff --git a/tests/cxx/t-locale.cc b/tests/cxx/t-locale.cc

index 24e08ff730d5876c93ea676f57dea85e14b987ae..b649d95577f556ce62e3c2de68f8a8a016d0561f 100644 (file)
--- a/tests/cxx/t-locale.cc
+++ b/tests/cxx/t-locale.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003, 2007 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <clocale>
  #include <iostream>
diff --git a/tests/cxx/t-misc.cc b/tests/cxx/t-misc.cc

index 19c54992babeaeef10769146a056c292576bd959..08a34c1ce391d36b7c14029ae1bb8e7733184ae6 100644 (file)
--- a/tests/cxx/t-misc.cc
+++ b/tests/cxx/t-misc.cc
@@ -2,20 +2,20 @@
  
  Copyright 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* Note that we don't use <climits> for LONG_MIN, but instead our own
@@ -41,7 +41,7 @@ check_mpz (void)
  {
    // mpz_class::fits_sint_p
    {
-    int        fits;
+    bool       fits;
      mpz_class  z;
      z = INT_MIN; fits = z.fits_sint_p(); ASSERT_ALWAYS (fits);
      z--;         fits = z.fits_sint_p(); ASSERT_ALWAYS (! fits);
@@ -51,7 +51,7 @@ check_mpz (void)
  
    // mpz_class::fits_uint_p
    {
-    int        fits;
+    bool       fits;
      mpz_class  z;
      z = 0;        fits = z.fits_uint_p(); ASSERT_ALWAYS (fits);
      z--;          fits = z.fits_uint_p(); ASSERT_ALWAYS (! fits);
@@ -61,7 +61,7 @@ check_mpz (void)
  
    // mpz_class::fits_slong_p
    {
-    int        fits;
+    bool       fits;
      mpz_class  z;
      z = LONG_MIN; fits = z.fits_slong_p(); ASSERT_ALWAYS (fits);
      z--;          fits = z.fits_slong_p(); ASSERT_ALWAYS (! fits);
@@ -71,7 +71,7 @@ check_mpz (void)
  
    // mpz_class::fits_ulong_p
    {
-    int        fits;
+    bool       fits;
      mpz_class  z;
      z = 0;         fits = z.fits_ulong_p(); ASSERT_ALWAYS (fits);
      z--;           fits = z.fits_ulong_p(); ASSERT_ALWAYS (! fits);
@@ -81,7 +81,7 @@ check_mpz (void)
  
    // mpz_class::fits_sshort_p
    {
-    int        fits;
+    bool       fits;
      mpz_class  z;
      z = SHRT_MIN; fits = z.fits_sshort_p(); ASSERT_ALWAYS (fits);
      z--;          fits = z.fits_sshort_p(); ASSERT_ALWAYS (! fits);
@@ -91,7 +91,7 @@ check_mpz (void)
  
    // mpz_class::fits_ushort_p
    {
-    int        fits;
+    bool       fits;
      mpz_class  z;
      z = 0;         fits = z.fits_ushort_p(); ASSERT_ALWAYS (fits);
      z--;           fits = z.fits_ushort_p(); ASSERT_ALWAYS (! fits);
@@ -186,7 +186,7 @@ check_mpq (void)
  
    // mpq_class::get_num, mpq_class::get_den
    {
-    mpq_class  q(4,5);
+    const mpq_class  q(4,5);
      mpz_class  z;
      z = q.get_num(); ASSERT_ALWAYS (z == 4);
      z = q.get_den(); ASSERT_ALWAYS (z == 5);
@@ -200,7 +200,7 @@ check_mpq (void)
      p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);
    }
    {
-    mpq_class  q(4,5);
+    const mpq_class  q(4,5);
      mpz_srcptr p;
      p = q.get_num_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 4) == 0);
      p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);
@@ -241,7 +241,7 @@ check_mpf (void)
  {
    // mpf_class::fits_sint_p
    {
-    int        fits;
+    bool       fits;
      mpf_class  f (0, 2*8*sizeof(int));
      f = INT_MIN; fits = f.fits_sint_p(); ASSERT_ALWAYS (fits);
      f--;         fits = f.fits_sint_p(); ASSERT_ALWAYS (! fits);
@@ -251,7 +251,7 @@ check_mpf (void)
  
    // mpf_class::fits_uint_p
    {
-    int        fits;
+    bool       fits;
      mpf_class  f (0, 2*8*sizeof(int));
      f = 0;        fits = f.fits_uint_p(); ASSERT_ALWAYS (fits);
      f--;          fits = f.fits_uint_p(); ASSERT_ALWAYS (! fits);
@@ -261,7 +261,7 @@ check_mpf (void)
  
    // mpf_class::fits_slong_p
    {
-    int        fits;
+    bool       fits;
      mpf_class  f (0, 2*8*sizeof(long));
      f = LONG_MIN; fits = f.fits_slong_p(); ASSERT_ALWAYS (fits);
      f--;          fits = f.fits_slong_p(); ASSERT_ALWAYS (! fits);
@@ -271,7 +271,7 @@ check_mpf (void)
  
    // mpf_class::fits_ulong_p
    {
-    int        fits;
+    bool       fits;
      mpf_class  f (0, 2*8*sizeof(long));
      f = 0;         fits = f.fits_ulong_p(); ASSERT_ALWAYS (fits);
      f--;           fits = f.fits_ulong_p(); ASSERT_ALWAYS (! fits);
@@ -281,7 +281,7 @@ check_mpf (void)
  
    // mpf_class::fits_sshort_p
    {
-    int        fits;
+    bool       fits;
      mpf_class  f (0, 2*8*sizeof(short));
      f = SHRT_MIN; fits = f.fits_sshort_p(); ASSERT_ALWAYS (fits);
      f--;          fits = f.fits_sshort_p(); ASSERT_ALWAYS (! fits);
@@ -291,7 +291,7 @@ check_mpf (void)
  
    // mpf_class::fits_ushort_p
    {
-    int        fits;
+    bool       fits;
      mpf_class  f (0, 2*8*sizeof(short));
      f = 0;         fits = f.fits_ushort_p(); ASSERT_ALWAYS (fits);
      f--;           fits = f.fits_ushort_p(); ASSERT_ALWAYS (! fits);
@@ -370,6 +370,18 @@ check_mpf (void)
    }
  }
  
+// std::numeric_limits
+void
+check_limits (void)
+{
+  // Check that the content is not private.
+  ASSERT_ALWAYS ( std::numeric_limits<mpz_class>::is_integer);
+  ASSERT_ALWAYS (!std::numeric_limits<mpf_class>::is_integer);
+
+  // Check that symbols are emitted.
+  ASSERT_ALWAYS (&std::numeric_limits<mpz_class>::is_integer
+             != &std::numeric_limits<mpq_class>::is_integer);
+}
  
  int
  main (void)
@@ -379,6 +391,7 @@ main (void)
    check_mpz();
    check_mpq();
    check_mpf();
+  check_limits();
  
    tests_end();
    return 0;
diff --git a/tests/cxx/t-mix.cc b/tests/cxx/t-mix.cc

new file mode 100644 (file)

index 0000000..5ac3b49
--- /dev/null
+++ b/tests/cxx/t-mix.cc
@@ -0,0 +1,83 @@
+/* Test legality of conversion between the different mp*_class
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+int f_z  (mpz_class){return 0;}
+int f_q  (mpq_class){return 1;}
+int f_f  (mpf_class){return 2;}
+int f_zq (mpz_class){return 0;}
+int f_zq (mpq_class){return 1;}
+int f_zf (mpz_class){return 0;}
+int f_zf (mpf_class){return 2;}
+int f_qf (mpq_class){return 1;}
+int f_qf (mpf_class){return 2;}
+int f_zqf(mpz_class){return 0;}
+int f_zqf(mpq_class){return 1;}
+int f_zqf(mpf_class){return 2;}
+
+void
+check (void)
+{
+  mpz_class z=42;
+  mpq_class q=33;
+  mpf_class f=18;
+
+  ASSERT_ALWAYS(f_z  (z)==0); ASSERT_ALWAYS(f_z  (-z)==0);
+  ASSERT_ALWAYS(f_q  (z)==1); ASSERT_ALWAYS(f_q  (-z)==1);
+  ASSERT_ALWAYS(f_q  (q)==1); ASSERT_ALWAYS(f_q  (-q)==1);
+  ASSERT_ALWAYS(f_f  (z)==2); ASSERT_ALWAYS(f_f  (-z)==2);
+  ASSERT_ALWAYS(f_f  (q)==2); ASSERT_ALWAYS(f_f  (-q)==2);
+  ASSERT_ALWAYS(f_f  (f)==2); ASSERT_ALWAYS(f_f  (-f)==2);
+  ASSERT_ALWAYS(f_zq (z)==0);
+  ASSERT_ALWAYS(f_zq (q)==1); ASSERT_ALWAYS(f_zq (-q)==1);
+  ASSERT_ALWAYS(f_zf (z)==0);
+  ASSERT_ALWAYS(f_zf (f)==2); ASSERT_ALWAYS(f_zf (-f)==2);
+  ASSERT_ALWAYS(f_qf (q)==1);
+  ASSERT_ALWAYS(f_qf (f)==2); ASSERT_ALWAYS(f_qf (-f)==2);
+  ASSERT_ALWAYS(f_zqf(z)==0);
+  ASSERT_ALWAYS(f_zqf(q)==1);
+  ASSERT_ALWAYS(f_zqf(f)==2); ASSERT_ALWAYS(f_zqf(-f)==2);
+
+  ASSERT_ALWAYS(f_zqf(mpz_class(z))==0); ASSERT_ALWAYS(f_zqf(mpz_class(-z))==0);
+  ASSERT_ALWAYS(f_zqf(mpz_class(q))==0); ASSERT_ALWAYS(f_zqf(mpz_class(-q))==0);
+  ASSERT_ALWAYS(f_zqf(mpz_class(f))==0); ASSERT_ALWAYS(f_zqf(mpz_class(-f))==0);
+  ASSERT_ALWAYS(f_zqf(mpq_class(z))==1); ASSERT_ALWAYS(f_zqf(mpq_class(-z))==1);
+  ASSERT_ALWAYS(f_zqf(mpq_class(q))==1); ASSERT_ALWAYS(f_zqf(mpq_class(-q))==1);
+  ASSERT_ALWAYS(f_zqf(mpq_class(f))==1); ASSERT_ALWAYS(f_zqf(mpq_class(-f))==1);
+  ASSERT_ALWAYS(f_zqf(mpf_class(z))==2); ASSERT_ALWAYS(f_zqf(mpf_class(-z))==2);
+  ASSERT_ALWAYS(f_zqf(mpf_class(q))==2); ASSERT_ALWAYS(f_zqf(mpf_class(-q))==2);
+  ASSERT_ALWAYS(f_zqf(mpf_class(f))==2); ASSERT_ALWAYS(f_zqf(mpf_class(-f))==2);
+}
+
+int
+main (void)
+{
+  tests_start();
+
+  check();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-ops.cc b/tests/cxx/t-ops.cc

index 01fe3b0fbc79da3eddc8658e8457b94369617b04..f9563fff26f13ac7f8ee6ada88eab2fe1d47ad25 100644 (file)
--- a/tests/cxx/t-ops.cc
+++ b/tests/cxx/t-ops.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
@@ -60,6 +60,8 @@ check_mpz (void)
      mpz_class a(-123);
      mpz_class b;
      b = abs(a); ASSERT_ALWAYS(b == 123);
+    a <<= 300;
+    b = abs(a); ASSERT_ALWAYS(a + b == 0);
    }
  
    // sqrt
@@ -492,6 +494,7 @@ check_mpq (void)
      mpq_class a(1, 3), b(3, 4);
      mpq_class c;
      c = a * b; ASSERT_ALWAYS(c == 0.25);
+    c = b * b; ASSERT_ALWAYS(c == 0.5625);
    }
  
    // operator/
diff --git a/tests/cxx/t-ops2.cc b/tests/cxx/t-ops2.cc

new file mode 100644 (file)

index 0000000..3fc472b
--- /dev/null
+++ b/tests/cxx/t-ops2.cc
@@ -0,0 +1,246 @@
+/* Test mp*_class operators and functions.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <math.h>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define CHECK1(Type,a,fun) \
+  ASSERT_ALWAYS(fun((Type)(a))==fun(a))
+#define CHECK(Type1,Type2,a,b,op) \
+  ASSERT_ALWAYS(((Type1)(a) op (Type2)(b))==((a) op (b)))
+#define CHECK_G(Type,a,b,op) \
+  CHECK(Type,Type,a,b,op)
+#define CHECK_UI(Type,a,b,op) \
+  CHECK(Type,unsigned long,a,b,op); \
+  CHECK(unsigned long,Type,a,b,op)
+#define CHECK_SI(Type,a,b,op) \
+  CHECK(Type,long,a,b,op); \
+  CHECK(long,Type,a,b,op)
+#define CHECK_D(Type,a,b,op) \
+  CHECK(Type,double,a,b,op); \
+  CHECK(double,Type,a,b,op)
+#define CHECK_MPZ(Type,a,b,op) \
+  CHECK(Type,mpz_class,a,b,op); \
+  CHECK(mpz_class,Type,a,b,op)
+#define CHECK_MPQ(Type,a,b,op) \
+  CHECK(Type,mpq_class,a,b,op); \
+  CHECK(mpq_class,Type,a,b,op)
+#define CHECK_ALL_SIGNED(Type,a,b,op) \
+  CHECK_G(Type,a,b,op); \
+  CHECK_SI(Type,a,b,op); \
+  CHECK_D(Type,a,b,op)
+#define CHECK_ALL_SIGNS(Type,a,b,op) \
+  CHECK_ALL_SIGNED(Type,a,b,op); \
+  CHECK_ALL_SIGNED(Type,-(a),b,op); \
+  CHECK_ALL_SIGNED(Type,a,-(b),op); \
+  CHECK_ALL_SIGNED(Type,-(a),-(b),op)
+#define CHECK_ALL(Type,a,b,op) \
+  CHECK_ALL_SIGNED(Type,a,b,op); \
+  CHECK_UI(Type,a,b,op)
+#define CHECK_ALL_SIGNED_COMPARISONS(Type,a,b) \
+  CHECK_ALL_SIGNED(Type,a,b,<); \
+  CHECK_ALL_SIGNED(Type,a,b,>); \
+  CHECK_ALL_SIGNED(Type,a,b,<=); \
+  CHECK_ALL_SIGNED(Type,a,b,>=); \
+  CHECK_ALL_SIGNED(Type,a,b,==); \
+  CHECK_ALL_SIGNED(Type,a,b,!=)
+#define CHECK_ALL_SIGNS_COMPARISONS(Type,a,b) \
+  CHECK_ALL_SIGNS(Type,a,b,<); \
+  CHECK_ALL_SIGNS(Type,a,b,>); \
+  CHECK_ALL_SIGNS(Type,a,b,<=); \
+  CHECK_ALL_SIGNS(Type,a,b,>=); \
+  CHECK_ALL_SIGNS(Type,a,b,==); \
+  CHECK_ALL_SIGNS(Type,a,b,!=)
+#define CHECK_ALL_COMPARISONS(Type,a,b) \
+  CHECK_ALL(Type,a,b,<); \
+  CHECK_ALL(Type,a,b,>); \
+  CHECK_ALL(Type,a,b,<=); \
+  CHECK_ALL(Type,a,b,>=); \
+  CHECK_ALL(Type,a,b,==); \
+  CHECK_ALL(Type,a,b,!=)
+
+
+void checkz (){
+  CHECK_ALL(mpz_class,5,2,+);
+  CHECK_ALL(mpz_class,5,2,-);
+  CHECK_ALL(mpz_class,5,2,*);
+  CHECK_ALL(mpz_class,5,2,/);
+  CHECK_ALL(mpz_class,5,2,%);
+  CHECK_ALL_COMPARISONS(mpz_class,5,2);
+  CHECK_ALL_SIGNS(mpz_class,11,3,+);
+  CHECK_ALL_SIGNS(mpz_class,11,3,-);
+  CHECK_ALL_SIGNS(mpz_class,11,3,*);
+  CHECK_ALL_SIGNS(mpz_class,11,3,/);
+  CHECK_ALL_SIGNS(mpz_class,11,3,%);
+  CHECK_ALL_SIGNS(mpz_class,17,2,*);
+  CHECK_ALL_SIGNS(mpz_class,17,2,/);
+  CHECK_ALL_SIGNS(mpz_class,17,2,%);
+  CHECK(unsigned long,mpz_class,5,-2,/);
+  CHECK(unsigned long,mpz_class,5,-2,%);
+  ASSERT_ALWAYS(7ul/mpz_class(1e35)==0);
+  ASSERT_ALWAYS(7ul%mpz_class(1e35)==7);
+  ASSERT_ALWAYS(7ul/mpz_class(-1e35)==0);
+  ASSERT_ALWAYS(7ul%mpz_class(-1e35)==7);
+  CHECK_ALL_SIGNS_COMPARISONS(mpz_class,11,3);
+  CHECK_ALL(mpz_class,6,3,&);
+  CHECK_ALL(mpz_class,6,3,|);
+  CHECK_ALL(mpz_class,6,3,^);
+  CHECK(mpz_class,unsigned long,6,2,<<);
+  CHECK(mpz_class,unsigned long,6,2,>>);
+  CHECK(mpz_class,unsigned long,-13,2,<<);
+  CHECK(mpz_class,unsigned long,-13,2,>>);
+  ASSERT_ALWAYS(++mpz_class(7)==8);
+  ASSERT_ALWAYS(++mpz_class(-8)==-7);
+  ASSERT_ALWAYS(--mpz_class(8)==7);
+  ASSERT_ALWAYS(--mpz_class(-7)==-8);
+  ASSERT_ALWAYS(~mpz_class(7)==-8);
+  ASSERT_ALWAYS(~mpz_class(-8)==7);
+  ASSERT_ALWAYS(+mpz_class(7)==7);
+  ASSERT_ALWAYS(+mpz_class(-8)==-8);
+  ASSERT_ALWAYS(-mpz_class(7)==-7);
+  ASSERT_ALWAYS(-mpz_class(-8)==8);
+  ASSERT_ALWAYS(abs(mpz_class(7))==7);
+  ASSERT_ALWAYS(abs(mpz_class(-8))==8);
+  ASSERT_ALWAYS(sqrt(mpz_class(7))==2);
+  ASSERT_ALWAYS(sqrt(mpz_class(0))==0);
+  ASSERT_ALWAYS(sgn(mpz_class(0))==0);
+  ASSERT_ALWAYS(sgn(mpz_class(9))==1);
+  ASSERT_ALWAYS(sgn(mpz_class(-17))==-1);
+}
+
+template<class T>
+void checkqf (){
+  CHECK_ALL(T,5.,2,+); CHECK_MPZ(T,5.,2,+);
+  CHECK_ALL(T,5.,2,-); CHECK_MPZ(T,5.,2,-);
+  CHECK_ALL(T,5.,2,*); CHECK_MPZ(T,5.,2,*);
+  CHECK_ALL(T,5.,2,/); CHECK_MPZ(T,5.,2,/);
+  CHECK_ALL(T,0.,2,/);
+  CHECK_ALL_SIGNS(T,11.,3,+);
+  CHECK_ALL_SIGNS(T,11.,3,-);
+  CHECK_ALL_SIGNS(T,11.,3,*);
+  CHECK_ALL_SIGNS(T,11.,4,/);
+  CHECK_SI(T,LONG_MIN,1,*);
+  CHECK_SI(T,0,3,*);
+  CHECK_ALL_COMPARISONS(T,5.,2);
+  CHECK_ALL_SIGNS_COMPARISONS(T,11.,3);
+  CHECK_MPZ(T,5,-2,<);
+  CHECK_MPZ(T,5,-2,>);
+  CHECK_MPZ(T,5,-2,<=);
+  CHECK_MPZ(T,5,-2,>=);
+  CHECK_MPZ(T,5,-2,==);
+  CHECK_MPZ(T,5,-2,!=);
+  CHECK_MPZ(T,0,0,<);
+  CHECK_MPZ(T,0,0,>);
+  CHECK_MPZ(T,0,0,<=);
+  CHECK_MPZ(T,0,0,>=);
+  CHECK_MPZ(T,0,0,==);
+  CHECK_MPZ(T,0,0,!=);
+  ASSERT_ALWAYS(T(6)<<2==6.*4);
+  ASSERT_ALWAYS(T(6)>>2==6./4);
+  ASSERT_ALWAYS(T(-13)<<2==-13.*4);
+  ASSERT_ALWAYS(T(-13)>>2==-13./4);
+  ASSERT_ALWAYS(++T(7)==8);
+  ASSERT_ALWAYS(++T(-8)==-7);
+  ASSERT_ALWAYS(--T(8)==7);
+  ASSERT_ALWAYS(--T(-7)==-8);
+  ASSERT_ALWAYS(+T(7)==7);
+  ASSERT_ALWAYS(+T(-8)==-8);
+  ASSERT_ALWAYS(-T(7)==-7);
+  ASSERT_ALWAYS(-T(-8)==8);
+  ASSERT_ALWAYS(abs(T(7))==7);
+  ASSERT_ALWAYS(abs(T(-8))==8);
+  ASSERT_ALWAYS(sgn(T(0))==0);
+  ASSERT_ALWAYS(sgn(T(9))==1);
+  ASSERT_ALWAYS(sgn(T(-17))==-1);
+}
+
+void checkf (){
+  ASSERT_ALWAYS(sqrt(mpf_class(7))>2.64);
+  ASSERT_ALWAYS(sqrt(mpf_class(7))<2.65);
+  ASSERT_ALWAYS(sqrt(mpf_class(0))==0);
+  // TODO: add some consistency checks, as described in
+  // http://gmplib.org/list-archives/gmp-bugs/2013-February/002940.html
+  CHECK1(mpf_class,1.9,trunc);
+  CHECK1(mpf_class,1.9,floor);
+  CHECK1(mpf_class,1.9,ceil);
+  CHECK1(mpf_class,4.3,trunc);
+  CHECK1(mpf_class,4.3,floor);
+  CHECK1(mpf_class,4.3,ceil);
+  CHECK1(mpf_class,-7.1,trunc);
+  CHECK1(mpf_class,-7.1,floor);
+  CHECK1(mpf_class,-7.1,ceil);
+  CHECK1(mpf_class,-2.8,trunc);
+  CHECK1(mpf_class,-2.8,floor);
+  CHECK1(mpf_class,-2.8,ceil);
+  CHECK1(mpf_class,-1.5,trunc);
+  CHECK1(mpf_class,-1.5,floor);
+  CHECK1(mpf_class,-1.5,ceil);
+  CHECK1(mpf_class,2.5,trunc);
+  CHECK1(mpf_class,2.5,floor);
+  CHECK1(mpf_class,2.5,ceil);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),mpf_class(4))>4.9);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),mpf_class(4))<5.1);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),4.)>4.9);
+  ASSERT_ALWAYS(hypot(-3.,mpf_class(4))<5.1);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),4l)>4.9);
+  ASSERT_ALWAYS(hypot(-3l,mpf_class(4))<5.1);
+  ASSERT_ALWAYS(hypot(mpf_class(-3),4ul)>4.9);
+  ASSERT_ALWAYS(hypot(3ul,mpf_class(4))<5.1);
+  CHECK(mpf_class,mpq_class,1.5,2.25,+);
+  CHECK(mpf_class,mpq_class,1.5,2.25,-);
+  CHECK(mpf_class,mpq_class,1.5,-2.25,*);
+  CHECK(mpf_class,mpq_class,1.5,-2,/);
+  CHECK_MPQ(mpf_class,-5.5,-2.25,+);
+  CHECK_MPQ(mpf_class,-5.5,-2.25,-);
+  CHECK_MPQ(mpf_class,-5.5,-2.25,*);
+  CHECK_MPQ(mpf_class,-5.25,-0.5,/);
+  CHECK_MPQ(mpf_class,5,-2,<);
+  CHECK_MPQ(mpf_class,5,-2,>);
+  CHECK_MPQ(mpf_class,5,-2,<=);
+  CHECK_MPQ(mpf_class,5,-2,>=);
+  CHECK_MPQ(mpf_class,5,-2,==);
+  CHECK_MPQ(mpf_class,5,-2,!=);
+  CHECK_MPQ(mpf_class,0,0,<);
+  CHECK_MPQ(mpf_class,0,0,>);
+  CHECK_MPQ(mpf_class,0,0,<=);
+  CHECK_MPQ(mpf_class,0,0,>=);
+  CHECK_MPQ(mpf_class,0,0,==);
+  CHECK_MPQ(mpf_class,0,0,!=);
+}
+
+int
+main (void)
+{
+  tests_start();
+
+  checkz();
+  checkqf<mpq_class>();
+  checkqf<mpf_class>();
+  checkf();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-ops3.cc b/tests/cxx/t-ops3.cc

new file mode 100644 (file)

index 0000000..b5273c5
--- /dev/null
+++ b/tests/cxx/t-ops3.cc
@@ -0,0 +1,133 @@
+/* Test mp*_class assignment operators (+=, -=, etc)
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+#define FOR_ALL_SIGNED_BUILTIN(F) \
+       F(signed char) \
+       F(signed short) \
+       F(signed int) \
+       F(signed long) \
+       F(float) \
+       F(double)
+
+#define FOR_ALL_BUILTIN(F) \
+       FOR_ALL_SIGNED_BUILTIN(F) \
+       F(char) \
+       F(unsigned char) \
+       F(unsigned short) \
+       F(unsigned int) \
+       F(unsigned long)
+
+#define FOR_ALL_GMPXX(F) \
+       F(mpz_class) \
+       F(mpq_class) \
+       F(mpf_class)
+
+template<class T,class U> void f(T t, U u){
+  T a=t;
+  ASSERT_ALWAYS((a+=u)==(t+u)); ASSERT_ALWAYS(a==(t+u));
+  ASSERT_ALWAYS((a-=u)==t); ASSERT_ALWAYS(a==t);
+  ASSERT_ALWAYS((a*=u)==(t*u)); ASSERT_ALWAYS(a==(t*u));
+  ASSERT_ALWAYS((a/=u)==t); ASSERT_ALWAYS(a==t);
+  ASSERT_ALWAYS((a<<=5)==(t<<5)); ASSERT_ALWAYS(a==(t<<5));
+  ASSERT_ALWAYS((a>>=5)==t); ASSERT_ALWAYS(a==t);
+}
+
+template<class T,class U> void g(T t, U u){
+  T a=t;
+  ASSERT_ALWAYS((a%=u)==(t%u)); ASSERT_ALWAYS(a==(t%u));
+  a=t;
+  ASSERT_ALWAYS((a&=u)==(t&u)); ASSERT_ALWAYS(a==(t&u));
+  a=t;
+  ASSERT_ALWAYS((a|=u)==(t|u)); ASSERT_ALWAYS(a==(t|u));
+  a=t;
+  ASSERT_ALWAYS((a^=u)==(t^u)); ASSERT_ALWAYS(a==(t^u));
+}
+
+template<class T> void h(T t){
+  T a=t;
+  ASSERT_ALWAYS((a<<=5)==(t<<5)); ASSERT_ALWAYS(a==(t<<5));
+  ASSERT_ALWAYS((a>>=5)==t); ASSERT_ALWAYS(a==t);
+}
+
+template<class T, class U> void ffs(T t, U u){
+#define F(V) f(t,(V)u);
+       FOR_ALL_SIGNED_BUILTIN(F)
+       FOR_ALL_GMPXX(F)
+#undef F
+#define F(V) f(t,-(V)u);
+       FOR_ALL_GMPXX(F)
+#undef F
+}
+
+template<class T, class U> void ff(T t, U u){
+#define F(V) f(t,(V)u);
+       FOR_ALL_BUILTIN(F)
+       FOR_ALL_GMPXX(F)
+#undef F
+#define F(V) f(t,-(V)u);
+       FOR_ALL_GMPXX(F)
+#undef F
+}
+
+template<class U> void ggs(mpz_class t, U u){
+#define F(V) g(t,(V)u);
+       FOR_ALL_SIGNED_BUILTIN(F)
+#undef F
+       g(t,(mpz_class)u);
+       g(t,-(mpz_class)u);
+}
+
+template<class U> void gg(mpz_class t, U u){
+#define F(V) g(t,(V)u);
+       FOR_ALL_BUILTIN(F)
+#undef F
+       g(t,(mpz_class)u);
+       g(t,-(mpz_class)u);
+}
+
+void check(){
+       mpz_class z=18;
+       mpq_class q(7,2);
+       mpf_class d=3.375;
+       h(z); h(q); h(d);
+       ff(z,13); ff(q,13); ff(d,13);
+       ffs(z,-42); ffs(q,-42); ffs(d,-42);
+       gg(z,33); ggs(z,-22);
+}
+
+
+int
+main (void)
+{
+  tests_start();
+
+  check();
+
+  tests_end();
+  return 0;
+}
diff --git a/tests/cxx/t-ostream.cc b/tests/cxx/t-ostream.cc

index 0eeb7be62be3ac328abe4e440cf96b476ecb816f..9899f5fcbae57d51081a288c0723d007337d5f55 100644 (file)
--- a/tests/cxx/t-ostream.cc
+++ b/tests/cxx/t-ostream.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <iostream>
  #include <cstdlib>
@@ -27,7 +27,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  using namespace std;
  
  
-int   option_check_standard = 0;
+bool option_check_standard = false;
  
  
  #define CALL(expr)                                                     \
@@ -437,7 +437,7 @@ int
  main (int argc, char *argv[])
  {
    if (argc > 1 && strcmp (argv[1], "-s") == 0)
-    option_check_standard = 1;
+    option_check_standard = true;
  
    tests_start ();
  
diff --git a/tests/cxx/t-prec.cc b/tests/cxx/t-prec.cc

index e9f4e4813938fc14bb5913a3bd11175e57151361..28d68d47e2ae5fb734337aa10b3940cb20dad9bd 100644 (file)
--- a/tests/cxx/t-prec.cc
+++ b/tests/cxx/t-prec.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
diff --git a/tests/cxx/t-rand.cc b/tests/cxx/t-rand.cc

index 506a13789f74cd68e4f534a2849487a9ed8741cf..5fd8cf72f4c12827159614b747155eb60118fc60 100644 (file)
--- a/tests/cxx/t-rand.cc
+++ b/tests/cxx/t-rand.cc
@@ -2,20 +2,20 @@
  
  Copyright 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "gmp.h"
  #include "gmpxx.h"
@@ -113,13 +113,24 @@ check_mpf (void)
      r.seed(a);
      mpf_class b;
      b = r.get_f();
+    mpf_class c(r.get_f());
+    ASSERT_ALWAYS (c.get_prec() == mpf_get_default_prec());
+    mpf_class d(r.get_f(),212);
+    ASSERT_ALWAYS (d.get_prec() >= 212);
    }
    {
      gmp_randclass r(gmp_randinit_default);
-    int a = 123, b = 128;
+    int a = 123, b = 198;
      r.seed(a);
      mpf_class c;
      c = r.get_f(b);
+    ASSERT_ALWAYS (c.get_prec() == mpf_get_default_prec());
+    mpf_class d(r.get_f(b));
+    ASSERT_ALWAYS (d.get_prec() >= 198);
+    mpf_class e(r.get_f(b)-r.get_f());
+    ASSERT_ALWAYS (e.get_prec() >= 198);
+    mpf_class f(r.get_f(60),300);
+    ASSERT_ALWAYS (f.get_prec() >= 300);
    }
  }
  
diff --git a/tests/cxx/t-ternary.cc b/tests/cxx/t-ternary.cc

index 39b734782bcafc94d98a312ba4bf98207a8ffd0b..c2152cc76568fd2873521395692d28fe532bc457 100644 (file)
--- a/tests/cxx/t-ternary.cc
+++ b/tests/cxx/t-ternary.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
diff --git a/tests/cxx/t-unary.cc b/tests/cxx/t-unary.cc

index eda437c5b5c7350300daf0d1655db110c6296824..4c4aa24adf6cd2e3298e5f42a49dfcb08a570465 100644 (file)
--- a/tests/cxx/t-unary.cc
+++ b/tests/cxx/t-unary.cc
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
diff --git a/tests/devel/Makefile.am b/tests/devel/Makefile.am

index 5eada53f0f3353a344b2a2efd63b8ebc880e4798..6eb362d6ad3ae1783aa84d975ddc4279c8be6377 100644 (file)
--- a/tests/devel/Makefile.am
+++ b/tests/devel/Makefile.am
@@ -2,20 +2,20 @@
  
  # Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
diff --git a/tests/devel/Makefile.in b/tests/devel/Makefile.in

index 23d443e1ba5015d1b4363ea53db1d70f58c16c87..d70849ad09bbbcdf2a2d07318168b0e3dfc2a11a 100644 (file)
--- a/tests/devel/Makefile.in
+++ b/tests/devel/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -17,21 +17,38 @@
  
  # Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -50,7 +67,6 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  EXTRA_PROGRAMS = aors_n$(EXEEXT) anymul_1$(EXEEXT) copy$(EXEEXT) \
         divmod_1$(EXEEXT) divrem$(EXEEXT) shift$(EXEEXT) \
         logops_n$(EXEEXT) tst-addsub$(EXEEXT) try$(EXEEXT)
@@ -58,7 +74,7 @@ subdir = tests/devel
  DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -66,47 +82,47 @@ CONFIG_HEADER = $(top_builddir)/config.h
  CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  anymul_1_SOURCES = anymul_1.c
-anymul_1_OBJECTS = anymul_1$U.$(OBJEXT)
+anymul_1_OBJECTS = anymul_1.$(OBJEXT)
  anymul_1_LDADD = $(LDADD)
  anymul_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  aors_n_SOURCES = aors_n.c
-aors_n_OBJECTS = aors_n$U.$(OBJEXT)
+aors_n_OBJECTS = aors_n.$(OBJEXT)
  aors_n_LDADD = $(LDADD)
  aors_n_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  copy_SOURCES = copy.c
-copy_OBJECTS = copy$U.$(OBJEXT)
+copy_OBJECTS = copy.$(OBJEXT)
  copy_LDADD = $(LDADD)
  copy_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  divmod_1_SOURCES = divmod_1.c
-divmod_1_OBJECTS = divmod_1$U.$(OBJEXT)
+divmod_1_OBJECTS = divmod_1.$(OBJEXT)
  divmod_1_LDADD = $(LDADD)
  divmod_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  divrem_SOURCES = divrem.c
-divrem_OBJECTS = divrem$U.$(OBJEXT)
+divrem_OBJECTS = divrem.$(OBJEXT)
  divrem_LDADD = $(LDADD)
  divrem_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  logops_n_SOURCES = logops_n.c
-logops_n_OBJECTS = logops_n$U.$(OBJEXT)
+logops_n_OBJECTS = logops_n.$(OBJEXT)
  logops_n_LDADD = $(LDADD)
  logops_n_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  shift_SOURCES = shift.c
-shift_OBJECTS = shift$U.$(OBJEXT)
+shift_OBJECTS = shift.$(OBJEXT)
  shift_LDADD = $(LDADD)
  shift_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  try_SOURCES = try.c
-try_OBJECTS = try$U.$(OBJEXT)
+try_OBJECTS = try.$(OBJEXT)
  try_LDADD = $(LDADD)
  try_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  tst_addsub_SOURCES = tst-addsub.c
-tst_addsub_OBJECTS = tst-addsub$U.$(OBJEXT)
+tst_addsub_OBJECTS = tst-addsub.$(OBJEXT)
  tst_addsub_LDADD = $(LDADD)
  tst_addsub_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
@@ -126,6 +142,11 @@ SOURCES = anymul_1.c aors_n.c copy.c divmod_1.c divrem.c logops_n.c \
         shift.c try.c tst-addsub.c
  DIST_SOURCES = anymul_1.c aors_n.c copy.c divmod_1.c divrem.c \
         logops_n.c shift.c try.c tst-addsub.c
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -227,8 +248,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -275,7 +296,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -326,31 +346,31 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
  $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
         cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
  $(am__aclocal_m4_deps):
-anymul_1$(EXEEXT): $(anymul_1_OBJECTS) $(anymul_1_DEPENDENCIES) 
+anymul_1$(EXEEXT): $(anymul_1_OBJECTS) $(anymul_1_DEPENDENCIES) $(EXTRA_anymul_1_DEPENDENCIES) 
         @rm -f anymul_1$(EXEEXT)
         $(LINK) $(anymul_1_OBJECTS) $(anymul_1_LDADD) $(LIBS)
-aors_n$(EXEEXT): $(aors_n_OBJECTS) $(aors_n_DEPENDENCIES) 
+aors_n$(EXEEXT): $(aors_n_OBJECTS) $(aors_n_DEPENDENCIES) $(EXTRA_aors_n_DEPENDENCIES) 
         @rm -f aors_n$(EXEEXT)
         $(LINK) $(aors_n_OBJECTS) $(aors_n_LDADD) $(LIBS)
-copy$(EXEEXT): $(copy_OBJECTS) $(copy_DEPENDENCIES) 
+copy$(EXEEXT): $(copy_OBJECTS) $(copy_DEPENDENCIES) $(EXTRA_copy_DEPENDENCIES) 
         @rm -f copy$(EXEEXT)
         $(LINK) $(copy_OBJECTS) $(copy_LDADD) $(LIBS)
-divmod_1$(EXEEXT): $(divmod_1_OBJECTS) $(divmod_1_DEPENDENCIES) 
+divmod_1$(EXEEXT): $(divmod_1_OBJECTS) $(divmod_1_DEPENDENCIES) $(EXTRA_divmod_1_DEPENDENCIES) 
         @rm -f divmod_1$(EXEEXT)
         $(LINK) $(divmod_1_OBJECTS) $(divmod_1_LDADD) $(LIBS)
-divrem$(EXEEXT): $(divrem_OBJECTS) $(divrem_DEPENDENCIES) 
+divrem$(EXEEXT): $(divrem_OBJECTS) $(divrem_DEPENDENCIES) $(EXTRA_divrem_DEPENDENCIES) 
         @rm -f divrem$(EXEEXT)
         $(LINK) $(divrem_OBJECTS) $(divrem_LDADD) $(LIBS)
-logops_n$(EXEEXT): $(logops_n_OBJECTS) $(logops_n_DEPENDENCIES) 
+logops_n$(EXEEXT): $(logops_n_OBJECTS) $(logops_n_DEPENDENCIES) $(EXTRA_logops_n_DEPENDENCIES) 
         @rm -f logops_n$(EXEEXT)
         $(LINK) $(logops_n_OBJECTS) $(logops_n_LDADD) $(LIBS)
-shift$(EXEEXT): $(shift_OBJECTS) $(shift_DEPENDENCIES) 
+shift$(EXEEXT): $(shift_OBJECTS) $(shift_DEPENDENCIES) $(EXTRA_shift_DEPENDENCIES) 
         @rm -f shift$(EXEEXT)
         $(LINK) $(shift_OBJECTS) $(shift_LDADD) $(LIBS)
-try$(EXEEXT): $(try_OBJECTS) $(try_DEPENDENCIES) 
+try$(EXEEXT): $(try_OBJECTS) $(try_DEPENDENCIES) $(EXTRA_try_DEPENDENCIES) 
         @rm -f try$(EXEEXT)
         $(LINK) $(try_OBJECTS) $(try_LDADD) $(LIBS)
-tst-addsub$(EXEEXT): $(tst_addsub_OBJECTS) $(tst_addsub_DEPENDENCIES) 
+tst-addsub$(EXEEXT): $(tst_addsub_OBJECTS) $(tst_addsub_DEPENDENCIES) $(EXTRA_tst_addsub_DEPENDENCIES) 
         @rm -f tst-addsub$(EXEEXT)
         $(LINK) $(tst_addsub_OBJECTS) $(tst_addsub_LDADD) $(LIBS)
  
@@ -359,11 +379,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -373,29 +388,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-anymul_1_.c: anymul_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/anymul_1.c; then echo $(srcdir)/anymul_1.c; else echo anymul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-aors_n_.c: aors_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aors_n.c; then echo $(srcdir)/aors_n.c; else echo aors_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-copy_.c: copy.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copy.c; then echo $(srcdir)/copy.c; else echo copy.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divmod_1_.c: divmod_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divmod_1.c; then echo $(srcdir)/divmod_1.c; else echo divmod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_.c: divrem.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem.c; then echo $(srcdir)/divrem.c; else echo divrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-logops_n_.c: logops_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/logops_n.c; then echo $(srcdir)/logops_n.c; else echo logops_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-shift_.c: shift.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/shift.c; then echo $(srcdir)/shift.c; else echo shift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-try_.c: try.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/try.c; then echo $(srcdir)/try.c; else echo try.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tst-addsub_.c: tst-addsub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tst-addsub.c; then echo $(srcdir)/tst-addsub.c; else echo tst-addsub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-anymul_1_.$(OBJEXT) anymul_1_.lo aors_n_.$(OBJEXT) aors_n_.lo \
-copy_.$(OBJEXT) copy_.lo divmod_1_.$(OBJEXT) divmod_1_.lo \
-divrem_.$(OBJEXT) divrem_.lo logops_n_.$(OBJEXT) logops_n_.lo \
-shift_.$(OBJEXT) shift_.lo try_.$(OBJEXT) try_.lo \
-tst-addsub_.$(OBJEXT) tst-addsub_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -499,10 +491,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -570,7 +567,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -583,7 +580,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool ctags distclean distclean-compile \
@@ -595,8 +592,8 @@ uninstall-am:
         install-pdf-am install-ps install-ps-am install-strip \
         installcheck installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  allprogs: $(EXTRA_PROGRAMS)
diff --git a/tests/devel/README b/tests/devel/README

index d224f1ac704458f2333ffe6200ca7e8d0b0c0fe7..2ae30102507a3e969b16411880ffa08f04b09d8d 100644 (file)
--- a/tests/devel/README
+++ b/tests/devel/README
@@ -1,19 +1,19 @@
  Copyright 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
  
diff --git a/tests/devel/anymul_1.c b/tests/devel/anymul_1.c

index 27a1a7464e1bc509b31e5beadcb10348c47453d3..65d766d2b54a258deac1bc7abd82114ff0940841 100644 (file)
--- a/tests/devel/anymul_1.c
+++ b/tests/devel/anymul_1.c
@@ -2,20 +2,20 @@
  Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2006, 2007, 2008
  Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdlib.h>
  #include <string.h>
@@ -23,7 +23,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "longlong.h"
-#include "tests.h"
+#include "tests/tests.h"
  
  #ifdef OPERATION_mul_1
  #define func __gmpn_mul_1
@@ -120,6 +120,12 @@ main (int argc, char **argv)
         }
  #endif
  
+#ifdef PLAIN_RANDOM
+#define MPN_RANDOM mpn_random
+#else
+#define MPN_RANDOM mpn_random2
+#endif
+
  #ifdef RANDOM
        size = random () % SIZE + 1;
  #else
@@ -132,7 +138,7 @@ main (int argc, char **argv)
  #ifdef FIXED_XLIMB
        xlimb = FIXED_XLIMB;
  #else
-      mpn_random2 (&xlimb, 1);
+      MPN_RANDOM (&xlimb, 1);
  #endif
  
  #if TIMES != 1
@@ -151,11 +157,11 @@ main (int argc, char **argv)
  #endif
  
  #ifndef NOCHECK
-      mpn_random2 (s1, size);
+      MPN_RANDOM (s1, size);
  #ifdef ZERO
        memset (rp, 0, size * sizeof *rp);
  #else
-      mpn_random2 (rp, size);
+      MPN_RANDOM (rp, size);
  #endif
  #if defined (PRINT) || defined (XPRINT)
        printf ("xlimb=");
diff --git a/tests/devel/aors_n.c b/tests/devel/aors_n.c

index 99069f2ed256842797085d2212e29349872f0f0f..978a769a0b54dfac6b09b54c3c49f3a8c4db728f 100644 (file)
--- a/tests/devel/aors_n.c
+++ b/tests/devel/aors_n.c
@@ -1,23 +1,24 @@
  /*
-Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2009
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2009, 2011
  Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdlib.h>
+#include <string.h>
  #include <stdio.h>
  #include "gmp.h"
  #include "gmp-impl.h"
@@ -84,8 +85,10 @@ cputime ()
  }
  #endif
  
+static void print_posneg (mp_limb_t);
  static void mpn_print (mp_ptr, mp_size_t);
  
+#define LXW ((int) (2 * sizeof (mp_limb_t)))
  #define M * 1000000
  
  #ifndef CLOCK
@@ -186,11 +189,26 @@ main (int argc, char **argv)
        if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
           || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
         {
+         mp_size_t s, e;
+         for (s = 0;; s++)
+           if ((unsigned long long) (dx+1)[s] != (unsigned long long) (dy+1)[s])
+             break;
+         for (e = size - 1;; e--)
+           if ((unsigned long long) (dx+1)[e] != (unsigned long long) (dy+1)[e])
+             break;
  #ifndef PRINT
-         mpn_print (&cyx, 1);
-         mpn_print (dx+1, size);
-         mpn_print (&cyy, 1);
-         mpn_print (dy+1, size);
+         for (i = s; i <= e; i++)
+           {
+             printf ("%6d: ", i);
+             printf ("%0*llX ", LXW, (unsigned long long) (dx+1)[i]);
+             printf ("%0*llX ", LXW, (unsigned long long) (dy+1)[i]);
+             print_posneg ((dy+1)[i] - (dx+1)[i]);
+             printf ("\n");
+           }
+         printf ("%6s: ", "retval");
+         printf ("%0*llX ", LXW, (unsigned long long) cyx);
+         printf ("%0*llX ", LXW, (unsigned long long) cyy);
+         print_posneg (cyx - cyy);
  #endif
           printf ("\n");
           if (dy[0] != 0x87654321)
@@ -205,6 +223,24 @@ main (int argc, char **argv)
    exit (0);
  }
  
+static void
+print_posneg (mp_limb_t d)
+{
+  char buf[LXW + 2];
+  if (d == 0)
+    printf (" %*X", LXW, 0);
+  else if (-d < d)
+    {
+      sprintf (buf, "%llX", (unsigned long long) -d);
+      printf ("%*s-%s", LXW - (int) strlen (buf), "", buf);
+    }
+  else
+    {
+      sprintf (buf, "%llX", (unsigned long long) d);
+      printf ("%*s+%s", LXW - (int) strlen (buf), "", buf);
+    }
+}
+
  static void
  mpn_print (mp_ptr p, mp_size_t size)
  {
@@ -215,7 +251,7 @@ mpn_print (mp_ptr p, mp_size_t size)
  #ifdef _LONG_LONG_LIMB
        printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
               (unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
-              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+             (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
  #else
        printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
  #endif
diff --git a/tests/devel/copy.c b/tests/devel/copy.c

index d8cbbd96837cfdb38f0289819ed5ff89c30eccda..88b56b5de204e97d162149634e42e50a36c12c4e 100644 (file)
--- a/tests/devel/copy.c
+++ b/tests/devel/copy.c
@@ -1,22 +1,23 @@
  /*
-Copyright 1999, 2000, 2001, 2004, 2009 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2004, 2009, 2011 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdlib.h>
+#include <string.h>
  #include <stdio.h>
  #include "gmp.h"
  #include "gmp-impl.h"
@@ -59,8 +60,10 @@ cputime ()
  }
  #endif
  
+static void print_posneg (mp_limb_t);
  static void mpn_print (mp_ptr, mp_size_t);
  
+#define LXW ((int) (2 * sizeof (mp_limb_t)))
  #define M * 1000000
  
  #ifndef CLOCK
@@ -68,7 +71,7 @@ static void mpn_print (mp_ptr, mp_size_t);
  #endif
  
  #ifndef OPS
-#define OPS (CLOCK/2)
+#define OPS (CLOCK/5)
  #endif
  #ifndef SIZE
  #define SIZE 496
@@ -152,9 +155,22 @@ main (int argc, char **argv)
        if (mpn_cmp (dx, dy, size+2) != 0
           || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
         {
+         mp_size_t s, e;
+         for (s = 0;; s++)
+           if ((unsigned long long) (dx+1)[s] != (unsigned long long) (dy+1)[s])
+             break;
+         for (e = size - 1;; e--)
+           if ((unsigned long long) (dx+1)[e] != (unsigned long long) (dy+1)[e])
+             break;
  #ifndef PRINT
-         mpn_print (dx+1, size);
-         mpn_print (dy+1, size);
+         for (i = s; i <= e; i++)
+           {
+             printf ("%6d: ", i);
+             printf ("%0*llX ", LXW, (unsigned long long) (dx+1)[i]);
+             printf ("%0*llX ", LXW, (unsigned long long) (dy+1)[i]);
+             print_posneg ((dy+1)[i] - (dx+1)[i]);
+             printf ("\n");
+           }
  #endif
           printf ("\n");
           if (dy[0] != 0x87654321)
@@ -169,6 +185,24 @@ main (int argc, char **argv)
    exit (0);
  }
  
+static void
+print_posneg (mp_limb_t d)
+{
+  char buf[LXW + 2];
+  if (d == 0)
+    printf (" %*X", LXW, 0);
+  else if (-d < d)
+    {
+      sprintf (buf, "%llX", (unsigned long long) -d);
+      printf ("%*s-%s", LXW - (int) strlen (buf), "", buf);
+    }
+  else
+    {
+      sprintf (buf, "%llX", (unsigned long long) d);
+      printf ("%*s+%s", LXW - (int) strlen (buf), "", buf);
+    }
+}
+
  static void
  mpn_print (mp_ptr p, mp_size_t size)
  {
@@ -179,7 +213,7 @@ mpn_print (mp_ptr p, mp_size_t size)
  #ifdef _LONG_LONG_LIMB
        printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
               (unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
-              (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+             (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
  #else
        printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
  #endif
diff --git a/tests/devel/divmod_1.c b/tests/devel/divmod_1.c

index 8487775b001d2b2472615d9635a4d976be2201d7..6426913108333f7762f201e6112a5494dcfabca6 100644 (file)
--- a/tests/devel/divmod_1.c
+++ b/tests/devel/divmod_1.c
@@ -1,20 +1,20 @@
  /*
  Copyright 1996, 1998, 2000, 2001, 2007 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/devel/divrem.c b/tests/devel/divrem.c

index 8d7622764763a0f7709d82cfd449a807ebe76817..62e0955264766f566a0114baa1b9d672eb95123b 100644 (file)
--- a/tests/devel/divrem.c
+++ b/tests/devel/divrem.c
@@ -2,20 +2,20 @@
  Copyright 1996, 1997, 1998, 2000, 2001, 2007, 2009 Free Software Foundation,
  Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/devel/logops_n.c b/tests/devel/logops_n.c

index db0be2c8670b1dda249e55ec704e726d380e1928..ef21c1dac55ee3534a300ea3b4ebecb18b6d2b70 100644 (file)
--- a/tests/devel/logops_n.c
+++ b/tests/devel/logops_n.c
@@ -2,20 +2,20 @@
  Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2009 Free
  Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdlib.h>
  #include <stdio.h>
diff --git a/tests/devel/shift.c b/tests/devel/shift.c

index 089edd3ac7517b464b081eef234e19dcf2ff7d63..0a575e3f9bebf16e156a22f5365fefe47717e950 100644 (file)
--- a/tests/devel/shift.c
+++ b/tests/devel/shift.c
@@ -1,23 +1,24 @@
  /*
-Copyright 1996, 1998, 1999, 2000, 2001, 2004, 2007, 2009 Free Software
+Copyright 1996, 1998, 1999, 2000, 2001, 2004, 2007, 2009, 2011 Free Software
  Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdlib.h>
+#include <string.h>
  #include <stdio.h>
  #include "gmp.h"
  #include "gmp-impl.h"
@@ -60,8 +61,10 @@ cputime ()
  }
  #endif
  
+static void print_posneg (mp_limb_t);
  static void mpn_print (mp_ptr, mp_size_t);
  
+#define LXW ((int) (2 * sizeof (mp_limb_t)))
  #define M * 1000000
  
  #ifndef CLOCK
@@ -166,11 +169,27 @@ main (int argc, char **argv)
        if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
           || dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
         {
+         mp_size_t s, e;
+         for (s = 0;; s++)
+           if ((unsigned long long) (dx+1)[s] != (unsigned long long) (dy+1)[s])
+             break;
+         for (e = size - 1;; e--)
+           if ((unsigned long long) (dx+1)[e] != (unsigned long long) (dy+1)[e])
+             break;
  #ifndef PRINT
-         mpn_print (&cyx, 1);
-         mpn_print (dx+1, size);
-         mpn_print (&cyy, 1);
-         mpn_print (dy+1, size);
+         printf ("cnt=%-*d\n", (int) (2 * sizeof(mp_limb_t)) - 4, cnt);
+         for (i = s; i <= e; i++)
+           {
+             printf ("%6d: ", i);
+             printf ("%0*llX ", LXW, (unsigned long long) (dx+1)[i]);
+             printf ("%0*llX ", LXW, (unsigned long long) (dy+1)[i]);
+             print_posneg ((dy+1)[i] - (dx+1)[i]);
+             printf ("\n");
+           }
+         printf ("%6s: ", "retval");
+         printf ("%0*llX ", LXW, (unsigned long long) cyx);
+         printf ("%0*llX ", LXW, (unsigned long long) cyy);
+         print_posneg (cyx - cyy);
  #endif
           printf ("\n");
           if (dy[0] != 0x87654321)
@@ -185,6 +204,24 @@ main (int argc, char **argv)
    exit (0);
  }
  
+static void
+print_posneg (mp_limb_t d)
+{
+  char buf[LXW + 2];
+  if (d == 0)
+    printf (" %*X", LXW, 0);
+  else if (-d < d)
+    {
+      sprintf (buf, "%llX", (unsigned long long) -d);
+      printf ("%*s-%s", LXW - (int) strlen (buf), "", buf);
+    }
+  else
+    {
+      sprintf (buf, "%llX", (unsigned long long) d);
+      printf ("%*s+%s", LXW - (int) strlen (buf), "", buf);
+    }
+}
+
  static void
  mpn_print (mp_ptr p, mp_size_t size)
  {
diff --git a/tests/devel/try.c b/tests/devel/try.c

index f8d1b0d3c70c8a810998bd8a5061ebf8ab38aba5..a4101cb1c54db6c2ddc13120fdd670bf683b98e6 100644 (file)
--- a/tests/devel/try.c
+++ b/tests/devel/try.c
@@ -3,23 +3,23 @@
     THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT.  IT'S ALMOST CERTAIN TO
     BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
  
-Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
-Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2011, 2012
+Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* Usage: try [options] <function>...
@@ -240,7 +240,7 @@ struct region_t {
  int trap_location = TRAP_NOWHERE;
  
  
-#define NUM_SOURCES  2
+#define NUM_SOURCES  5
  #define NUM_DESTS    2
  
  struct source_t {
@@ -289,7 +289,7 @@ struct each_t  fun = { "Fun" };
  
  #define SRC_SIZE(n)  ((n) == 1 && tr->size2 ? size2 : size)
  
-void validate_fail __GMP_PROTO ((void));
+void validate_fail (void);
  
  
  #if HAVE_TRY_NEW_C
@@ -297,37 +297,41 @@ void validate_fail __GMP_PROTO ((void));
  #endif
  
  
-typedef mp_limb_t (*tryfun_t) __GMP_PROTO ((ANYARGS));
+typedef mp_limb_t (*tryfun_t) (ANYARGS);
  
  struct try_t {
    char  retval;
  
-  char  src[2];
-  char  dst[2];
+  char  src[NUM_SOURCES];
+  char  dst[NUM_DESTS];
  
  #define SIZE_YES          1
  #define SIZE_ALLOW_ZERO   2
  #define SIZE_1            3  /* 1 limb  */
  #define SIZE_2            4  /* 2 limbs */
  #define SIZE_3            5  /* 3 limbs */
-#define SIZE_FRACTION     6  /* size2 is fraction for divrem etc */
-#define SIZE_SIZE2        7
-#define SIZE_PLUS_1       8
-#define SIZE_SUM          9
-#define SIZE_DIFF        10
-#define SIZE_DIFF_PLUS_1 11
-#define SIZE_RETVAL      12
-#define SIZE_CEIL_HALF   13
-#define SIZE_GET_STR     14
-#define SIZE_PLUS_MSIZE_SUB_1 15  /* size+msize-1 */
+#define SIZE_4            6  /* 4 limbs */
+#define SIZE_6            7  /* 6 limbs */
+#define SIZE_FRACTION     8  /* size2 is fraction for divrem etc */
+#define SIZE_SIZE2        9
+#define SIZE_PLUS_1      10
+#define SIZE_SUM         11
+#define SIZE_DIFF        12
+#define SIZE_DIFF_PLUS_1 13
+#define SIZE_DIFF_PLUS_3 14
+#define SIZE_RETVAL      15
+#define SIZE_CEIL_HALF   16
+#define SIZE_GET_STR     17
+#define SIZE_PLUS_MSIZE_SUB_1 18  /* size+msize-1 */
+#define SIZE_ODD         19
    char  size;
    char  size2;
-  char  dst_size[2];
+  char  dst_size[NUM_DESTS];
  
    /* multiplier_N size in limbs */
    mp_size_t  msize;
  
-  char  dst_bytes[2];
+  char  dst_bytes[NUM_DESTS];
  
    char  dst0_from_src1;
  
@@ -354,9 +358,10 @@ struct try_t {
  #define DATA_SRC0_ODD         3
  #define DATA_SRC0_HIGHBIT     4
  #define DATA_SRC1_ODD         5
-#define DATA_SRC1_HIGHBIT     6
-#define DATA_MULTIPLE_DIVISOR 7
-#define DATA_UDIV_QRNND       8
+#define DATA_SRC1_ODD_PRIME   6
+#define DATA_SRC1_HIGHBIT     7
+#define DATA_MULTIPLE_DIVISOR 8
+#define DATA_UDIV_QRNND       9
    char  data;
  
  /* Default is allow full overlap. */
@@ -365,12 +370,13 @@ struct try_t {
  #define OVERLAP_HIGH_TO_LOW  3
  #define OVERLAP_NOT_SRCS     4
  #define OVERLAP_NOT_SRC2     8
+#define OVERLAP_NOT_DST2     16
    char  overlap;
  
    tryfun_t    reference;
    const char  *reference_name;
  
-  void        (*validate) __GMP_PROTO ((void));
+  void        (*validate) (void);
    const char  *validate_name;
  };
  
@@ -438,6 +444,36 @@ validate_divexact_1 (void)
      validate_fail ();
  }
  
+void
+validate_bdiv_q_1
+ (void)
+{
+  mp_srcptr  src = s[0].p;
+  mp_srcptr  dst = fun.d[0].p;
+  int  error = 0;
+
+  ASSERT (size >= 1);
+
+  {
+    mp_ptr     tp = refmpn_malloc_limbs (size + 1);
+
+    refmpn_mul_1 (tp, dst, size, divisor);
+    /* Set ignored low bits */
+    tp[0] |= (src[0] & LOW_ZEROS_MASK (divisor));
+    if (! refmpn_equal_anynail (tp, src, size))
+      {
+       printf ("Bdiv wrong: res * divisor != src (mod B^size)\n");
+       mpn_trace ("res ", dst, size);
+       mpn_trace ("src ", src, size);
+       error = 1;
+      }
+    free (tp);
+  }
+
+  if (error)
+    validate_fail ();
+}
+
  
  void
  validate_modexact_1c_odd (void)
@@ -553,115 +589,68 @@ validate_sqrtrem (void)
     as they're all distinct and within the size of param[].  Renumber
     whenever necessary or desired.  */
  
-#define TYPE_ADD               1
-#define TYPE_ADD_N             2
-#define TYPE_ADD_NC            3
-#define TYPE_SUB               4
-#define TYPE_SUB_N             5
-#define TYPE_SUB_NC            6
-
-#define TYPE_MUL_1             7
-#define TYPE_MUL_1C            8
-
-#define TYPE_MUL_2             9
-#define TYPE_MUL_3             92
-#define TYPE_MUL_4             93
-
-#define TYPE_ADDMUL_1         10
-#define TYPE_ADDMUL_1C        11
-#define TYPE_SUBMUL_1         12
-#define TYPE_SUBMUL_1C        13
-
-#define TYPE_ADDMUL_2         14
-#define TYPE_ADDMUL_3         15
-#define TYPE_ADDMUL_4         16
-#define TYPE_ADDMUL_5         17
-#define TYPE_ADDMUL_6         18
-#define TYPE_ADDMUL_7         19
-#define TYPE_ADDMUL_8         20
-
-#define TYPE_ADDSUB_N         21
-#define TYPE_ADDSUB_NC        22
-
-#define TYPE_RSHIFT           23
-#define TYPE_LSHIFT           24
-#define TYPE_LSHIFTC          25
-
-#define TYPE_COPY             26
-#define TYPE_COPYI            27
-#define TYPE_COPYD            28
-#define TYPE_COM              29
-
-#define TYPE_ADDLSH1_N        30
-#define TYPE_ADDLSH2_N        48
-#define TYPE_ADDLSH_N         49
-#define TYPE_SUBLSH1_N        31
-#define TYPE_SUBLSH_N        130
-#define TYPE_RSBLSH1_N        34
-#define TYPE_RSBLSH2_N        46
-#define TYPE_RSBLSH_N         47
-#define TYPE_RSH1ADD_N        32
-#define TYPE_RSH1SUB_N        33
-
-#define TYPE_MOD_1            35
-#define TYPE_MOD_1C           36
-#define TYPE_DIVMOD_1         37
-#define TYPE_DIVMOD_1C        38
-#define TYPE_DIVREM_1         39
-#define TYPE_DIVREM_1C        40
-#define TYPE_PREINV_DIVREM_1  41
-#define TYPE_PREINV_MOD_1     42
-#define TYPE_MOD_34LSUB1      43
-#define TYPE_UDIV_QRNND       44
-#define TYPE_UDIV_QRNND_R     45
-
-#define TYPE_DIVEXACT_1       50
-#define TYPE_DIVEXACT_BY3     51
-#define TYPE_DIVEXACT_BY3C    52
-#define TYPE_MODEXACT_1_ODD   53
-#define TYPE_MODEXACT_1C_ODD  54
-
-#define TYPE_INVERT           55
-#define TYPE_BINVERT          56
-
-#define TYPE_GCD              60
-#define TYPE_GCD_1            61
-#define TYPE_GCD_FINDA        62
-#define TYPE_MPZ_JACOBI       63
-#define TYPE_MPZ_KRONECKER    64
-#define TYPE_MPZ_KRONECKER_UI 65
-#define TYPE_MPZ_KRONECKER_SI 66
-#define TYPE_MPZ_UI_KRONECKER 67
-#define TYPE_MPZ_SI_KRONECKER 68
-
-#define TYPE_AND_N            70
-#define TYPE_NAND_N           71
-#define TYPE_ANDN_N           72
-#define TYPE_IOR_N            73
-#define TYPE_IORN_N           74
-#define TYPE_NIOR_N           75
-#define TYPE_XOR_N            76
-#define TYPE_XNOR_N           77
-
-#define TYPE_MUL_MN           80
-#define TYPE_MUL_N            81
-#define TYPE_SQR              82
-#define TYPE_UMUL_PPMM        83
-#define TYPE_UMUL_PPMM_R      84
-#define TYPE_MULLO_N          85
-
-#define TYPE_SBPI1_DIV_QR     90
-#define TYPE_TDIV_QR          91
-
-#define TYPE_SQRTREM          100
-#define TYPE_ZERO             101
-#define TYPE_GET_STR          102
-#define TYPE_POPCOUNT         103
-#define TYPE_HAMDIST          104
-
-#define TYPE_EXTRA            110
-
-struct try_t  param[150];
+enum {
+  TYPE_ADD = 1, TYPE_ADD_N, TYPE_ADD_NC, TYPE_SUB, TYPE_SUB_N, TYPE_SUB_NC,
+
+  TYPE_ADD_ERR1_N, TYPE_ADD_ERR2_N, TYPE_ADD_ERR3_N,
+  TYPE_SUB_ERR1_N, TYPE_SUB_ERR2_N, TYPE_SUB_ERR3_N,
+
+  TYPE_MUL_1, TYPE_MUL_1C,
+
+  TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6,
+
+  TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C,
+
+  TYPE_ADDMUL_2, TYPE_ADDMUL_3, TYPE_ADDMUL_4, TYPE_ADDMUL_5, TYPE_ADDMUL_6,
+  TYPE_ADDMUL_7, TYPE_ADDMUL_8,
+
+  TYPE_ADDSUB_N, TYPE_ADDSUB_NC,
+
+  TYPE_RSHIFT, TYPE_LSHIFT, TYPE_LSHIFTC,
+
+  TYPE_COPY, TYPE_COPYI, TYPE_COPYD, TYPE_COM,
+
+  TYPE_ADDLSH1_N, TYPE_ADDLSH2_N, TYPE_ADDLSH_N,
+  TYPE_ADDLSH1_N_IP1, TYPE_ADDLSH2_N_IP1, TYPE_ADDLSH_N_IP1,
+  TYPE_ADDLSH1_N_IP2, TYPE_ADDLSH2_N_IP2, TYPE_ADDLSH_N_IP2,
+  TYPE_SUBLSH1_N, TYPE_SUBLSH2_N, TYPE_SUBLSH_N,
+  TYPE_SUBLSH1_N_IP1, TYPE_SUBLSH2_N_IP1, TYPE_SUBLSH_N_IP1,
+  TYPE_RSBLSH1_N, TYPE_RSBLSH2_N, TYPE_RSBLSH_N,
+  TYPE_RSH1ADD_N, TYPE_RSH1SUB_N,
+
+  TYPE_ADDLSH1_NC, TYPE_ADDLSH2_NC, TYPE_ADDLSH_NC,
+  TYPE_SUBLSH1_NC, TYPE_SUBLSH2_NC, TYPE_SUBLSH_NC,
+  TYPE_RSBLSH1_NC, TYPE_RSBLSH2_NC, TYPE_RSBLSH_NC,
+
+  TYPE_ADDCND_N, TYPE_SUBCND_N,
+
+  TYPE_MOD_1, TYPE_MOD_1C, TYPE_DIVMOD_1, TYPE_DIVMOD_1C, TYPE_DIVREM_1,
+  TYPE_DIVREM_1C, TYPE_PREINV_DIVREM_1, TYPE_DIVREM_2, TYPE_PREINV_MOD_1,
+  TYPE_MOD_34LSUB1, TYPE_UDIV_QRNND, TYPE_UDIV_QRNND_R,
+
+  TYPE_DIVEXACT_1, TYPE_BDIV_Q_1, TYPE_DIVEXACT_BY3, TYPE_DIVEXACT_BY3C,
+  TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD,
+
+  TYPE_INVERT, TYPE_BINVERT,
+
+  TYPE_GCD, TYPE_GCD_1, TYPE_GCD_FINDA, TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER,
+  TYPE_MPZ_KRONECKER_UI, TYPE_MPZ_KRONECKER_SI, TYPE_MPZ_UI_KRONECKER,
+  TYPE_MPZ_SI_KRONECKER, TYPE_MPZ_LEGENDRE,
+
+  TYPE_AND_N, TYPE_NAND_N, TYPE_ANDN_N, TYPE_IOR_N, TYPE_IORN_N, TYPE_NIOR_N,
+  TYPE_XOR_N, TYPE_XNOR_N,
+
+  TYPE_MUL_MN, TYPE_MUL_N, TYPE_SQR, TYPE_UMUL_PPMM, TYPE_UMUL_PPMM_R,
+  TYPE_MULLO_N, TYPE_MULMID_MN, TYPE_MULMID_N,
+
+  TYPE_SBPI1_DIV_QR, TYPE_TDIV_QR,
+
+  TYPE_SQRTREM, TYPE_ZERO, TYPE_GET_STR, TYPE_POPCOUNT, TYPE_HAMDIST,
+
+  TYPE_EXTRA
+};
+
+struct try_t  param[TYPE_EXTRA];
  
  
  void
@@ -719,6 +708,53 @@ param_init (void)
    REFERENCE (refmpn_sub);
  
  
+  p = &param[TYPE_ADD_ERR1_N];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->dst[1] = 1;
+  p->src[0] = 1;
+  p->src[1] = 1;
+  p->src[2] = 1;
+  p->dst_size[1] = SIZE_2;
+  p->carry = CARRY_BIT;
+  p->overlap = OVERLAP_NOT_DST2;
+  REFERENCE (refmpn_add_err1_n);
+
+  p = &param[TYPE_SUB_ERR1_N];
+  COPY (TYPE_ADD_ERR1_N);
+  REFERENCE (refmpn_sub_err1_n);
+
+  p = &param[TYPE_ADD_ERR2_N];
+  COPY (TYPE_ADD_ERR1_N);
+  p->src[3] = 1;
+  p->dst_size[1] = SIZE_4;
+  REFERENCE (refmpn_add_err2_n);
+
+  p = &param[TYPE_SUB_ERR2_N];
+  COPY (TYPE_ADD_ERR2_N);
+  REFERENCE (refmpn_sub_err2_n);
+
+  p = &param[TYPE_ADD_ERR3_N];
+  COPY (TYPE_ADD_ERR2_N);
+  p->src[4] = 1;
+  p->dst_size[1] = SIZE_6;
+  REFERENCE (refmpn_add_err3_n);
+
+  p = &param[TYPE_SUB_ERR3_N];
+  COPY (TYPE_ADD_ERR3_N);
+  REFERENCE (refmpn_sub_err3_n);
+
+  p = &param[TYPE_ADDCND_N];
+  COPY (TYPE_ADD_N);
+  p->carry = CARRY_BIT;
+  REFERENCE (refmpn_addcnd_n);
+
+  p = &param[TYPE_SUBCND_N];
+  COPY (TYPE_ADD_N);
+  p->carry = CARRY_BIT;
+  REFERENCE (refmpn_subcnd_n);
+
+
    p = &param[TYPE_MUL_1];
    p->retval = 1;
    p->dst[0] = 1;
@@ -753,6 +789,16 @@ param_init (void)
    p->msize = 4;
    REFERENCE (refmpn_mul_4);
  
+  p = &param[TYPE_MUL_5];
+  COPY (TYPE_MUL_2);
+  p->msize = 5;
+  REFERENCE (refmpn_mul_5);
+
+  p = &param[TYPE_MUL_6];
+  COPY (TYPE_MUL_2);
+  p->msize = 6;
+  REFERENCE (refmpn_mul_6);
+
  
    p = &param[TYPE_ADDMUL_1];
    p->retval = 1;
@@ -784,7 +830,7 @@ param_init (void)
    p->src[1] = 1;
    p->msize = 2;
    p->dst0_from_src1 = 1;
-  p->overlap = OVERLAP_NOT_SRC2;
+  p->overlap = OVERLAP_NONE;
    REFERENCE (refmpn_addmul_2);
  
    p = &param[TYPE_ADDMUL_3];
@@ -907,14 +953,58 @@ param_init (void)
    p->shift = 1;
    REFERENCE (refmpn_addlsh_n);
  
+  p = &param[TYPE_ADDLSH1_N_IP1];
+  p->retval = 1;
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->dst0_from_src1 = 1;
+  REFERENCE (refmpn_addlsh1_n_ip1);
+
+  p = &param[TYPE_ADDLSH2_N_IP1];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_addlsh2_n_ip1);
+
+  p = &param[TYPE_ADDLSH_N_IP1];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  p->shift = 1;
+  REFERENCE (refmpn_addlsh_n_ip1);
+
+  p = &param[TYPE_ADDLSH1_N_IP2];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_addlsh1_n_ip2);
+
+  p = &param[TYPE_ADDLSH2_N_IP2];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_addlsh2_n_ip2);
+
+  p = &param[TYPE_ADDLSH_N_IP2];
+  COPY (TYPE_ADDLSH_N_IP1);
+  REFERENCE (refmpn_addlsh_n_ip2);
+
    p = &param[TYPE_SUBLSH1_N];
    COPY (TYPE_ADD_N);
    REFERENCE (refmpn_sublsh1_n);
  
+  p = &param[TYPE_SUBLSH2_N];
+  COPY (TYPE_ADD_N);
+  REFERENCE (refmpn_sublsh2_n);
+
    p = &param[TYPE_SUBLSH_N];
    COPY (TYPE_ADDLSH_N);
    REFERENCE (refmpn_sublsh_n);
  
+  p = &param[TYPE_SUBLSH1_N_IP1];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_sublsh1_n_ip1);
+
+  p = &param[TYPE_SUBLSH2_N_IP1];
+  COPY (TYPE_ADDLSH1_N_IP1);
+  REFERENCE (refmpn_sublsh2_n_ip1);
+
+  p = &param[TYPE_SUBLSH_N_IP1];
+  COPY (TYPE_ADDLSH_N_IP1);
+  REFERENCE (refmpn_sublsh_n_ip1);
+
    p = &param[TYPE_RSBLSH1_N];
    COPY (TYPE_ADD_N);
    REFERENCE (refmpn_rsblsh1_n);
@@ -936,6 +1026,49 @@ param_init (void)
    REFERENCE (refmpn_rsh1sub_n);
  
  
+  p = &param[TYPE_ADDLSH1_NC];
+  COPY (TYPE_ADDLSH1_N);
+  p->carry = CARRY_3;
+  REFERENCE (refmpn_addlsh1_nc);
+
+  p = &param[TYPE_ADDLSH2_NC];
+  COPY (TYPE_ADDLSH2_N);
+  p->carry = CARRY_4; /* FIXME */
+  REFERENCE (refmpn_addlsh2_nc);
+
+  p = &param[TYPE_ADDLSH_NC];
+  COPY (TYPE_ADDLSH_N);
+  p->carry = CARRY_BIT; /* FIXME */
+  REFERENCE (refmpn_addlsh_nc);
+
+  p = &param[TYPE_SUBLSH1_NC];
+  COPY (TYPE_ADDLSH1_NC);
+  REFERENCE (refmpn_sublsh1_nc);
+
+  p = &param[TYPE_SUBLSH2_NC];
+  COPY (TYPE_ADDLSH2_NC);
+  REFERENCE (refmpn_sublsh2_nc);
+
+  p = &param[TYPE_SUBLSH_NC];
+  COPY (TYPE_ADDLSH_NC);
+  REFERENCE (refmpn_sublsh_nc);
+
+  p = &param[TYPE_RSBLSH1_NC];
+  COPY (TYPE_RSBLSH1_N);
+  p->carry = CARRY_BIT; /* FIXME */
+  REFERENCE (refmpn_rsblsh1_nc);
+
+  p = &param[TYPE_RSBLSH2_NC];
+  COPY (TYPE_RSBLSH2_N);
+  p->carry = CARRY_4; /* FIXME */
+  REFERENCE (refmpn_rsblsh2_nc);
+
+  p = &param[TYPE_RSBLSH_NC];
+  COPY (TYPE_RSBLSH_N);
+  p->carry = CARRY_BIT; /* FIXME */
+  REFERENCE (refmpn_rsblsh_nc);
+
+
    p = &param[TYPE_MOD_1];
    p->retval = 1;
    p->src[0] = 1;
@@ -1008,6 +1141,11 @@ param_init (void)
    VALIDATE (validate_divexact_1);
    REFERENCE (refmpn_divmod_1);
  
+  p = &param[TYPE_BDIV_Q_1];
+  p->dst[0] = 1;
+  p->src[0] = 1;
+  p->divisor = DIVISOR_LIMB;
+  VALIDATE (validate_bdiv_q_1);
  
    p = &param[TYPE_DIVEXACT_BY3];
    p->retval = 1;
@@ -1052,6 +1190,17 @@ param_init (void)
    REFERENCE (refmpn_gcd);
  
  
+  p = &param[TYPE_MPZ_LEGENDRE];
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->src[1] = 1;
+  p->data = DATA_SRC1_ODD_PRIME;
+  p->size2 = 1;
+  p->carry = CARRY_BIT;
+  p->carry_sign = 1;
+  REFERENCE (refmpz_legendre);
+
    p = &param[TYPE_MPZ_JACOBI];
    p->retval = 1;
    p->src[0] = 1;
@@ -1059,13 +1208,19 @@ param_init (void)
    p->src[1] = 1;
    p->data = DATA_SRC1_ODD;
    p->size2 = 1;
-  p->carry = CARRY_4;
+  p->carry = CARRY_BIT;
    p->carry_sign = 1;
    REFERENCE (refmpz_jacobi);
  
    p = &param[TYPE_MPZ_KRONECKER];
-  COPY (TYPE_MPZ_JACOBI);
-  p->data = 0;                 /* clear inherited DATA_SRC1_ODD */
+  p->retval = 1;
+  p->src[0] = 1;
+  p->size = SIZE_ALLOW_ZERO;
+  p->src[1] = 1;
+  p->data = 0;
+  p->size2 = 1;
+  p->carry = CARRY_4;
+  p->carry_sign = 1;
    REFERENCE (refmpz_kronecker);
  
  
@@ -1112,6 +1267,18 @@ param_init (void)
    p->size2 = 1;
    REFERENCE (refmpn_mul_basecase);
  
+  p = &param[TYPE_MULMID_MN];
+  COPY (TYPE_MUL_MN);
+  p->dst_size[0] = SIZE_DIFF_PLUS_3;
+  REFERENCE (refmpn_mulmid_basecase);
+
+  p = &param[TYPE_MULMID_N];
+  COPY (TYPE_MUL_N);
+  p->size = SIZE_ODD;
+  p->size2 = SIZE_CEIL_HALF;
+  p->dst_size[0] = SIZE_DIFF_PLUS_3;
+  REFERENCE (refmpn_mulmid_n);
+
    p = &param[TYPE_UMUL_PPMM];
    p->retval = 1;
    p->src[0] = 1;
@@ -1300,6 +1467,70 @@ mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
    return mpn_divexact_by3 (rp, sp, size);
  }
  
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+mp_limb_t
+mpn_addlsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_addlsh1_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+mp_limb_t
+mpn_addlsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_addlsh2_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+mp_limb_t
+mpn_addlsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
+{
+  return mpn_addlsh_n_ip1 (rp, sp, size, sh);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+mp_limb_t
+mpn_addlsh1_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_addlsh1_n_ip2 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+mp_limb_t
+mpn_addlsh2_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_addlsh2_n_ip2 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+mp_limb_t
+mpn_addlsh_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
+{
+  return mpn_addlsh_n_ip2 (rp, sp, size, sh);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+mp_limb_t
+mpn_sublsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_sublsh1_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+mp_limb_t
+mpn_sublsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+  return mpn_sublsh2_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+mp_limb_t
+mpn_sublsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
+{
+  return mpn_sublsh_n_ip1 (rp, sp, size, sh);
+}
+#endif
+
  mp_limb_t
  mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
  {
@@ -1367,6 +1598,19 @@ mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
    TMP_FREE;
  }
  
+void
+mpn_toom42_mulmid_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+                      mp_size_t size)
+{
+  mp_ptr  tspace;
+  mp_size_t n;
+  TMP_DECL;
+  TMP_MARK;
+  tspace = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (size));
+  mpn_toom42_mulmid (dst, src1, src2, size, tspace);
+  TMP_FREE;
+}
+
  mp_limb_t
  umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
  {
@@ -1416,6 +1660,13 @@ const struct choice_t choice_array[] = {
    { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
  #endif
  
+  { TRY(mpn_add_err1_n),  TYPE_ADD_ERR1_N  },
+  { TRY(mpn_sub_err1_n),  TYPE_SUB_ERR1_N  },
+  { TRY(mpn_add_err2_n),  TYPE_ADD_ERR2_N  },
+  { TRY(mpn_sub_err2_n),  TYPE_SUB_ERR2_N  },
+  { TRY(mpn_add_err3_n),  TYPE_ADD_ERR3_N  },
+  { TRY(mpn_sub_err3_n),  TYPE_SUB_ERR3_N  },
+
    { TRY(mpn_addmul_1),  TYPE_ADDMUL_1  },
    { TRY(mpn_submul_1),  TYPE_SUBMUL_1  },
  #if HAVE_NATIVE_mpn_addmul_1c
@@ -1465,6 +1716,8 @@ const struct choice_t choice_array[] = {
    { TRY(mpn_copyd), TYPE_COPYD },
  #endif
  
+  { TRY(mpn_addcnd_n), TYPE_ADDCND_N },
+  { TRY(mpn_subcnd_n), TYPE_SUBCND_N },
  #if HAVE_NATIVE_mpn_addlsh1_n
    { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
  #endif
@@ -1474,12 +1727,42 @@ const struct choice_t choice_array[] = {
  #if HAVE_NATIVE_mpn_addlsh_n
    { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
  #endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+  { TRY_FUNFUN(mpn_addlsh1_n_ip1), TYPE_ADDLSH1_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+  { TRY_FUNFUN(mpn_addlsh2_n_ip1), TYPE_ADDLSH2_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+  { TRY_FUNFUN(mpn_addlsh_n_ip1), TYPE_ADDLSH_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+  { TRY_FUNFUN(mpn_addlsh1_n_ip2), TYPE_ADDLSH1_N_IP2 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+  { TRY_FUNFUN(mpn_addlsh2_n_ip2), TYPE_ADDLSH2_N_IP2 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+  { TRY_FUNFUN(mpn_addlsh_n_ip2), TYPE_ADDLSH_N_IP2 },
+#endif
  #if HAVE_NATIVE_mpn_sublsh1_n
    { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
  #endif
+#if HAVE_NATIVE_mpn_sublsh2_n
+  { TRY(mpn_sublsh2_n), TYPE_SUBLSH2_N },
+#endif
  #if HAVE_NATIVE_mpn_sublsh_n
    { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
  #endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+  { TRY_FUNFUN(mpn_sublsh1_n_ip1), TYPE_SUBLSH1_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+  { TRY_FUNFUN(mpn_sublsh2_n_ip1), TYPE_SUBLSH2_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+  { TRY_FUNFUN(mpn_sublsh_n_ip1), TYPE_SUBLSH_N_IP1 },
+#endif
  #if HAVE_NATIVE_mpn_rsblsh1_n
    { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
  #endif
@@ -1496,6 +1779,34 @@ const struct choice_t choice_array[] = {
    { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
  #endif
  
+#if HAVE_NATIVE_mpn_addlsh1_nc
+  { TRY(mpn_addlsh1_nc), TYPE_ADDLSH1_NC },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_nc
+  { TRY(mpn_addlsh2_nc), TYPE_ADDLSH2_NC },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_nc
+  { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_nc
+  { TRY(mpn_sublsh1_nc), TYPE_SUBLSH1_NC },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_nc
+  { TRY(mpn_sublsh2_nc), TYPE_SUBLSH2_NC },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_nc
+  { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh1_nc
+  { TRY(mpn_rsblsh1_nc), TYPE_RSBLSH1_NC },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh2_nc
+  { TRY(mpn_rsblsh2_nc), TYPE_RSBLSH2_NC },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_nc
+  { TRY(mpn_rsblsh_nc), TYPE_RSBLSH_NC },
+#endif
+
    { TRY_FUNFUN(mpn_and_n),  TYPE_AND_N  },
    { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
    { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
@@ -1532,6 +1843,7 @@ const struct choice_t choice_array[] = {
  #endif
  
    { TRY(mpn_divexact_1),          TYPE_DIVEXACT_1 },
+  { TRY(mpn_bdiv_q_1),            TYPE_BDIV_Q_1 },
    { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
    { TRY(mpn_divexact_by3c),       TYPE_DIVEXACT_BY3C },
  
@@ -1555,6 +1867,12 @@ const struct choice_t choice_array[] = {
  #if HAVE_NATIVE_mpn_mul_4
    { TRY(mpn_mul_4),      TYPE_MUL_4, 4 },
  #endif
+#if HAVE_NATIVE_mpn_mul_5
+  { TRY(mpn_mul_5),      TYPE_MUL_5, 5 },
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+  { TRY(mpn_mul_6),      TYPE_MUL_6, 6 },
+#endif
  
    { TRY(mpn_rshift),     TYPE_RSHIFT },
    { TRY(mpn_lshift),     TYPE_LSHIFT },
@@ -1562,6 +1880,7 @@ const struct choice_t choice_array[] = {
  
  
    { TRY(mpn_mul_basecase), TYPE_MUL_MN },
+  { TRY(mpn_mulmid_basecase), TYPE_MULMID_MN },
    { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
  #if SQR_TOOM2_THRESHOLD > 0
    { TRY(mpn_sqr_basecase), TYPE_SQR },
@@ -1586,9 +1905,16 @@ const struct choice_t choice_array[] = {
    { TRY_FUNFUN(mpn_toom44_mul),  TYPE_MUL_N,  MPN_TOOM44_MUL_MINSIZE },
    { TRY_FUNFUN(mpn_toom4_sqr),   TYPE_SQR,    MPN_TOOM4_SQR_MINSIZE },
  
+  { TRY(mpn_mulmid_n),  TYPE_MULMID_N, 1 },
+  { TRY(mpn_mulmid),  TYPE_MULMID_MN, 1 },
+  { TRY_FUNFUN(mpn_toom42_mulmid),  TYPE_MULMID_N,
+    (2 * MPN_TOOM42_MULMID_MINSIZE - 1) },
+
    { TRY(mpn_gcd_1),        TYPE_GCD_1            },
    { TRY(mpn_gcd),          TYPE_GCD              },
+  { TRY(mpz_legendre),     TYPE_MPZ_LEGENDRE     },
    { TRY(mpz_jacobi),       TYPE_MPZ_JACOBI       },
+  { TRY(mpz_kronecker),    TYPE_MPZ_KRONECKER    },
    { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
    { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
    { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
@@ -1680,7 +2006,7 @@ malloc_region (struct region_t *r, mp_size_t n)
  
  #if HAVE_MMAP && defined (MAP_ANON)
    /* note must pass fd=-1 for MAP_ANON on BSD */
-  p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+  p = (mp_ptr) mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
    if (p == (void *) -1)
      {
        fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
@@ -1692,7 +2018,7 @@ malloc_region (struct region_t *r, mp_size_t n)
    ASSERT_ALWAYS (p != NULL);
  #endif
  
-  p = align_pointer (p, pagesize);
+  p = (mp_ptr) align_pointer (p, pagesize);
  
    mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
    p += REDZONE_LIMBS;
@@ -1803,15 +2129,15 @@ int        divisor_index;
  struct overlap_t {
    int  s[NUM_SOURCES];
  } overlap_array[] = {
-  { { -1, -1 } },
-  { {  0, -1 } },
-  { { -1,  0 } },
-  { {  0,  0 } },
-  { {  1, -1 } },
-  { { -1,  1 } },
-  { {  1,  1 } },
-  { {  0,  1 } },
-  { {  1,  0 } },
+  { { -1, -1, -1, -1, -1 } },
+  { {  0, -1, -1, -1, -1 } },
+  { { -1,  0, -1, -1, -1 } },
+  { {  0,  0, -1, -1, -1 } },
+  { {  1, -1, -1, -1, -1 } },
+  { { -1,  1, -1, -1, -1 } },
+  { {  1,  1, -1, -1, -1 } },
+  { {  0,  1, -1, -1, -1 } },
+  { {  1,  0, -1, -1, -1 } },
  };
  
  struct overlap_t  *overlap, *overlap_limit;
@@ -1820,6 +2146,7 @@ struct overlap_t  *overlap, *overlap_limit;
    (tr->overlap & OVERLAP_NONE       ? 1 \
     : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
     : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
+   : tr->overlap & OVERLAP_NOT_DST2 ? 4        \
     : tr->dst[1]                     ? 9 \
     : tr->src[1]                     ? 4 \
     : tr->dst[0]                     ? 2 \
@@ -2054,6 +2381,7 @@ call (struct each_t *e, tryfun_t function)
    case TYPE_ADDLSH1_N:
    case TYPE_ADDLSH2_N:
    case TYPE_SUBLSH1_N:
+  case TYPE_SUBLSH2_N:
    case TYPE_RSBLSH1_N:
    case TYPE_RSBLSH2_N:
    case TYPE_RSH1ADD_N:
@@ -2067,11 +2395,40 @@ call (struct each_t *e, tryfun_t function)
      e->retval = CALLING_CONVENTIONS (function)
        (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
      break;
+  case TYPE_ADDLSH_NC:
+  case TYPE_SUBLSH_NC:
+  case TYPE_RSBLSH_NC:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, size, shift, carry);
+    break;
+  case TYPE_ADDLSH1_NC:
+  case TYPE_ADDLSH2_NC:
+  case TYPE_SUBLSH1_NC:
+  case TYPE_SUBLSH2_NC:
+  case TYPE_RSBLSH1_NC:
+  case TYPE_RSBLSH2_NC:
    case TYPE_ADD_NC:
    case TYPE_SUB_NC:
+  case TYPE_ADDCND_N:
+  case TYPE_SUBCND_N:
      e->retval = CALLING_CONVENTIONS (function)
        (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
      break;
+  case TYPE_ADD_ERR1_N:
+  case TYPE_SUB_ERR1_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, size, carry);
+    break;
+  case TYPE_ADD_ERR2_N:
+  case TYPE_SUB_ERR2_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, size, carry);
+    break;
+  case TYPE_ADD_ERR3_N:
+  case TYPE_SUB_ERR3_N:
+    e->retval = CALLING_CONVENTIONS (function)
+      (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, e->s[4].p, size, carry);
+    break;
  
    case TYPE_MUL_1:
    case TYPE_ADDMUL_1:
@@ -2089,6 +2446,8 @@ call (struct each_t *e, tryfun_t function)
    case TYPE_MUL_2:
    case TYPE_MUL_3:
    case TYPE_MUL_4:
+  case TYPE_MUL_5:
+  case TYPE_MUL_6:
      if (size == 1)
        abort ();
      e->retval = CALLING_CONVENTIONS (function)
@@ -2135,7 +2494,12 @@ call (struct each_t *e, tryfun_t function)
      CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
      break;
  
-
+  case TYPE_ADDLSH1_N_IP1:
+  case TYPE_ADDLSH2_N_IP1:
+  case TYPE_ADDLSH1_N_IP2:
+  case TYPE_ADDLSH2_N_IP2:
+  case TYPE_SUBLSH1_N_IP1:
+  case TYPE_SUBLSH2_N_IP1:
    case TYPE_DIVEXACT_BY3:
      e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
      break;
@@ -2147,6 +2511,7 @@ call (struct each_t *e, tryfun_t function)
  
    case TYPE_DIVMOD_1:
    case TYPE_DIVEXACT_1:
+  case TYPE_BDIV_Q_1:
      e->retval = CALLING_CONVENTIONS (function)
        (e->d[0].p, e->s[0].p, size, divisor);
      break;
@@ -2259,7 +2624,15 @@ call (struct each_t *e, tryfun_t function)
      }
      break;
  
+  case TYPE_MPZ_LEGENDRE:
    case TYPE_MPZ_JACOBI:
+    {
+      mpz_t  a, b;
+      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
+      PTR(b) = e->s[1].p; SIZ(b) = size2;
+      e->retval = CALLING_CONVENTIONS (function) (a, b);
+    }
+    break;
    case TYPE_MPZ_KRONECKER:
      {
        mpz_t  a, b;
@@ -2298,6 +2671,7 @@ call (struct each_t *e, tryfun_t function)
      break;
  
    case TYPE_MUL_MN:
+  case TYPE_MULMID_MN:
      CALLING_CONVENTIONS (function)
        (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
      break;
@@ -2305,6 +2679,10 @@ call (struct each_t *e, tryfun_t function)
    case TYPE_MULLO_N:
      CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
      break;
+  case TYPE_MULMID_N:
+    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p,
+                                   (size + 1) / 2);
+    break;
    case TYPE_SQR:
      CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
      break;
@@ -2318,6 +2696,9 @@ call (struct each_t *e, tryfun_t function)
        (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
      break;
  
+  case TYPE_ADDLSH_N_IP1:
+  case TYPE_ADDLSH_N_IP2:
+  case TYPE_SUBLSH_N_IP1:
    case TYPE_LSHIFT:
    case TYPE_LSHIFTC:
    case TYPE_RSHIFT:
@@ -2430,6 +2811,12 @@ pointer_setup (struct each_t *e)
        case SIZE_3:
         d[i].size = 3;
         break;
+      case SIZE_4:
+       d[i].size = 4;
+       break;
+      case SIZE_6:
+       d[i].size = 6;
+       break;
  
        case SIZE_PLUS_1:
         d[i].size = size+1;
@@ -2457,6 +2844,10 @@ pointer_setup (struct each_t *e)
         d[i].size = size - size2 + 1;
         break;
  
+      case SIZE_DIFF_PLUS_3:
+       d[i].size = size - size2 + 3;
+       break;
+
        case SIZE_CEIL_HALF:
         d[i].size = (size+1)/2;
         break;
@@ -2634,6 +3025,36 @@ try_one (void)
           s[i].p[0] |= 1;
         break;
  
+      case DATA_SRC1_ODD_PRIME:
+       if (i == 1)
+         {
+           if (refmpn_zero_p (s[i].p+1, SRC_SIZE(i)-1)
+               && s[i].p[0] <=3)
+             s[i].p[0] = 3;
+           else
+             {
+               mpz_t p;
+               mpz_init (p);
+               for (;;)
+                 {
+                   _mpz_realloc (p, SRC_SIZE(i));
+                   MPN_COPY (PTR(p), s[i].p, SRC_SIZE(i));
+                   SIZ(p) = SRC_SIZE(i);
+                   MPN_NORMALIZE (PTR(p), SIZ(p));
+                   mpz_nextprime (p, p);
+                   if (mpz_size (p) <= SRC_SIZE(i))
+                     break;
+
+                   t_random (s[i].p, SRC_SIZE(i));
+                 }
+               MPN_COPY (s[i].p, PTR(p), SIZ(p));
+               if (SIZ(p) < SRC_SIZE(i))
+                 MPN_ZERO (s[i].p + SIZ(p), SRC_SIZE(i) - SIZ(p));
+               mpz_clear (p);
+             }
+         }
+       break;
+
        case DATA_SRC1_HIGHBIT:
         if (i == 1)
           {
@@ -2646,9 +3067,9 @@ try_one (void)
  
        case DATA_SRC0_HIGHBIT:
         if (i == 0)
-         {
-           s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
-         }
+        {
+          s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
+        }
         break;
  
        case DATA_UDIV_QRNND:
@@ -2736,13 +3157,15 @@ try_one (void)
  #define SIZE_ITERATION                                          \
    for (size = MAX3 (option_firstsize,                           \
                     choice->minsize,                            \
-                   (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1);     \
+                   (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1),     \
+        size += (tr->size == SIZE_ODD) && !(size & 1);         \
         size <= option_lastsize;                                 \
-       size++)
+       size += (tr->size == SIZE_ODD) ? 2 : 1)
  
  #define SIZE2_FIRST                                     \
    (tr->size2 == SIZE_2 ? 2                              \
     : tr->size2 == SIZE_FRACTION ? option_firstsize2     \
+   : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2)    \
     : tr->size2 ?                                        \
     MAX (choice->minsize, (option_firstsize2 != 0        \
                           ? option_firstsize2 : 1))     \
@@ -2751,6 +3174,7 @@ try_one (void)
  #define SIZE2_LAST                                      \
    (tr->size2 == SIZE_2 ? 2                              \
     : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1      \
+   : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2)    \
     : tr->size2 ? size                                   \
     : 0)
  
diff --git a/tests/devel/tst-addsub.c b/tests/devel/tst-addsub.c

index 5b819e9b34ab9100194644e3b2b04760b638af77..37d7f06d53d87dd1da30622050f8889d3eec3a60 100644 (file)
--- a/tests/devel/tst-addsub.c
+++ b/tests/devel/tst-addsub.c
@@ -1,19 +1,19 @@
  /* Copyright 1996, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/memory.c b/tests/memory.c

index ea123810109d806415c4f44ed5bb76542e838622..1a723fe3bf6bd3ad953ebb1af58df723be7b1f3a 100644 (file)
--- a/tests/memory.c
+++ b/tests/memory.c
@@ -1,27 +1,28 @@
  /* Memory allocation used during tests.
  
-Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2007, 2013 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>            /* for abort */
  #include <string.h>            /* for memcpy, memcmp */
  #include "gmp.h"
  #include "gmp-impl.h"
+#include "tests.h"
  
  #if GMP_LIMB_BITS == 64
  #define PATTERN1 CNST_LIMB(0xcafebabedeadbeef)
@@ -31,6 +32,12 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define PATTERN2 CNST_LIMB(0xdeadbeef)
  #endif
  
+#if HAVE_INTPTR_T
+#define PTRLIMB(p)  ((mp_limb_t) (intptr_t) p)
+#else
+#define PTRLIMB(p)  ((mp_limb_t) (size_t) p)
+#endif
+
  /* Each block allocated is a separate malloc, for the benefit of a redzoning
     malloc debugger during development or when bug hunting.
  
@@ -91,8 +98,8 @@ tests_allocate (size_t size)
    ptr = (void *) ((gmp_intptr_t) rptr + sizeof (mp_limb_t));
  
    *((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
-    = PATTERN1 - ((mp_limb_t) ptr);
-  PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+    = PATTERN1 - PTRLIMB (ptr);
+  PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
    memcpy ((void *) ((gmp_intptr_t) ptr + size), &PATTERN2_var, sizeof (mp_limb_t));
  
    h->size = size;
@@ -131,12 +138,12 @@ tests_reallocate (void *ptr, size_t old_size, size_t new_size)
      }
  
    if (*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
-      != PATTERN1 - ((mp_limb_t) ptr))
+      != PATTERN1 - PTRLIMB (ptr))
      {
        fprintf (stderr, "in realloc: redzone clobbered before block\n");
        abort ();
      }
-  PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+  PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
    if (memcmp ((void *) ((gmp_intptr_t) ptr + h->size), &PATTERN2_var, sizeof (mp_limb_t)))
      {
        fprintf (stderr, "in realloc: redzone clobbered after block\n");
@@ -149,8 +156,8 @@ tests_reallocate (void *ptr, size_t old_size, size_t new_size)
    ptr = (void *) ((gmp_intptr_t) rptr + sizeof (mp_limb_t));
  
    *((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
-    = PATTERN1 - ((mp_limb_t) ptr);
-  PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+    = PATTERN1 - PTRLIMB (ptr);
+  PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
    memcpy ((void *) ((gmp_intptr_t) ptr + new_size), &PATTERN2_var, sizeof (mp_limb_t));
  
    h->size = new_size;
@@ -181,12 +188,12 @@ tests_free_nosize (void *ptr)
    *hp = h->next;  /* unlink */
  
    if (*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
-      != PATTERN1 - ((mp_limb_t) ptr))
+      != PATTERN1 - PTRLIMB (ptr))
      {
        fprintf (stderr, "in free: redzone clobbered before block\n");
        abort ();
      }
-  PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+  PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
    if (memcmp ((void *) ((gmp_intptr_t) ptr + h->size), &PATTERN2_var, sizeof (mp_limb_t)))
      {
        fprintf (stderr, "in free: redzone clobbered after block\n");
diff --git a/tests/misc.c b/tests/misc.c

index 3ad27e5d23271de5ca1d4e873a043d8abab9d0d8..31b1e152c0f5c33b1868dc4f46ddb26f960a333b 100644 (file)
--- a/tests/misc.c
+++ b/tests/misc.c
@@ -1,21 +1,21 @@
  /* Miscellaneous test program support routines.
  
-Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2005, 2013 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
@@ -123,7 +123,7 @@ tests_rand_end (void)
  
  
  /* Only used if CPU calling conventions checking is available. */
-mp_limb_t (*calling_conventions_function) __GMP_PROTO ((ANYARGS));
+mp_limb_t (*calling_conventions_function) (ANYARGS);
  
  
  /* Return p advanced to the next multiple of "align" bytes.  "align" must be
@@ -163,7 +163,7 @@ __gmp_allocate_strdup (const char *s)
    size_t  len;
    char    *t;
    len = strlen (s);
-  t = (*__gmp_allocate_func) (len+1);
+  t = (char *) (*__gmp_allocate_func) (len+1);
    memcpy (t, s, len+1);
    return t;
  }
@@ -382,7 +382,7 @@ urandom (void)
  
  /* Call (*func)() with various random number generators. */
  void
-call_rand_algs (void (*func) __GMP_PROTO ((const char *, gmp_randstate_ptr)))
+call_rand_algs (void (*func) (const char *, gmp_randstate_ptr))
  {
    gmp_randstate_t  rstate;
    mpz_t            a;
@@ -465,7 +465,7 @@ tests_isinf (double d)
  int
  tests_hardware_setround (int mode)
  {
-#if HAVE_HOST_CPU_FAMILY_x86
+#if WANT_ASSEMBLY && HAVE_HOST_CPU_FAMILY_x86
    int  rc;
    switch (mode) {
    case 0: rc = 0; break;  /* nearest */
@@ -486,7 +486,7 @@ tests_hardware_setround (int mode)
  int
  tests_hardware_getround (void)
  {
-#if HAVE_HOST_CPU_FAMILY_x86
+#if WANT_ASSEMBLY && HAVE_HOST_CPU_FAMILY_x86
    switch ((x86_fstcw () & ~0xC00) >> 10) {
    case 0: return 0; break;  /* nearest */
    case 1: return 3; break;  /* down    */
diff --git a/tests/misc/Makefile.am b/tests/misc/Makefile.am

index 588652ea2fc003ab97476fe014c4aae5ccf5be8f..6a96ec3bb7590630e2da6b0074eb835ef87f7f16 100644 (file)
--- a/tests/misc/Makefile.am
+++ b/tests/misc/Makefile.am
@@ -2,20 +2,20 @@
  
  # Copyright 2001, 2002 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
diff --git a/tests/misc/Makefile.in b/tests/misc/Makefile.in

index 3ab448ec358ef4171881f88cfab7a48e06ccb817..3acb88af4607a711952ec2b1bdafd86024ac3817 100644 (file)
--- a/tests/misc/Makefile.in
+++ b/tests/misc/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -17,21 +17,38 @@
  
  # Copyright 2001, 2002 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -50,13 +67,12 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  check_PROGRAMS = t-printf$(EXEEXT) t-scanf$(EXEEXT) t-locale$(EXEEXT)
  subdir = tests/misc
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -64,17 +80,17 @@ CONFIG_HEADER = $(top_builddir)/config.h
  CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  t_locale_SOURCES = t-locale.c
-t_locale_OBJECTS = t-locale$U.$(OBJEXT)
+t_locale_OBJECTS = t-locale.$(OBJEXT)
  t_locale_LDADD = $(LDADD)
  t_locale_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_printf_SOURCES = t-printf.c
-t_printf_OBJECTS = t-printf$U.$(OBJEXT)
+t_printf_OBJECTS = t-printf.$(OBJEXT)
  t_printf_LDADD = $(LDADD)
  t_printf_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_scanf_SOURCES = t-scanf.c
-t_scanf_OBJECTS = t-scanf$U.$(OBJEXT)
+t_scanf_OBJECTS = t-scanf.$(OBJEXT)
  t_scanf_LDADD = $(LDADD)
  t_scanf_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
@@ -92,6 +108,11 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = t-locale.c t-printf.c t-scanf.c
  DIST_SOURCES = t-locale.c t-printf.c t-scanf.c
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  am__tty_colors = \
@@ -195,8 +216,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -243,7 +264,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -308,13 +328,13 @@ clean-checkPROGRAMS:
         list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
         echo " rm -f" $$list; \
         rm -f $$list
-t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES) 
+t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES) $(EXTRA_t_locale_DEPENDENCIES) 
         @rm -f t-locale$(EXEEXT)
         $(LINK) $(t_locale_OBJECTS) $(t_locale_LDADD) $(LIBS)
-t-printf$(EXEEXT): $(t_printf_OBJECTS) $(t_printf_DEPENDENCIES) 
+t-printf$(EXEEXT): $(t_printf_OBJECTS) $(t_printf_DEPENDENCIES) $(EXTRA_t_printf_DEPENDENCIES) 
         @rm -f t-printf$(EXEEXT)
         $(LINK) $(t_printf_OBJECTS) $(t_printf_LDADD) $(LIBS)
-t-scanf$(EXEEXT): $(t_scanf_OBJECTS) $(t_scanf_DEPENDENCIES) 
+t-scanf$(EXEEXT): $(t_scanf_OBJECTS) $(t_scanf_DEPENDENCIES) $(EXTRA_t_scanf_DEPENDENCIES) 
         @rm -f t-scanf$(EXEEXT)
         $(LINK) $(t_scanf_OBJECTS) $(t_scanf_LDADD) $(LIBS)
  
@@ -323,11 +343,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -337,14 +352,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-t-locale_.c: t-locale.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-locale.c; then echo $(srcdir)/t-locale.c; else echo t-locale.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-printf_.c: t-printf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-printf.c; then echo $(srcdir)/t-printf.c; else echo t-printf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-scanf_.c: t-scanf.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-scanf.c; then echo $(srcdir)/t-scanf.c; else echo t-scanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-locale_.$(OBJEXT) t-locale_.lo t-printf_.$(OBJEXT) t-printf_.lo \
-t-scanf_.$(OBJEXT) t-scanf_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -485,14 +492,15 @@ check-TESTS: $(TESTS)
           fi; \
           dashes=`echo "$$dashes" | sed s/./=/g`; \
           if test "$$failed" -eq 0; then \
-           echo "$$grn$$dashes"; \
+           col="$$grn"; \
           else \
-           echo "$$red$$dashes"; \
+           col="$$red"; \
           fi; \
-         echo "$$banner"; \
-         test -z "$$skipped" || echo "$$skipped"; \
-         test -z "$$report" || echo "$$report"; \
-         echo "$$dashes$$std"; \
+         echo "$${col}$$dashes$${std}"; \
+         echo "$${col}$$banner$${std}"; \
+         test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+         test -z "$$report" || echo "$${col}$$report$${std}"; \
+         echo "$${col}$$dashes$${std}"; \
           test "$$failed" -eq 0; \
         else :; fi
  
@@ -542,10 +550,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -614,7 +627,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -627,7 +640,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
         clean-checkPROGRAMS clean-generic clean-libtool ctags \
@@ -640,8 +653,8 @@ uninstall-am:
         install-ps install-ps-am install-strip installcheck \
         installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  $(top_builddir)/tests/libtests.la:
diff --git a/tests/misc/t-locale.c b/tests/misc/t-locale.c

index 09819fa0560243da515e689b92b63e36d78de6d1..724b01070665890362bd1bc40a75c465be180cfe 100644 (file)
--- a/tests/misc/t-locale.c
+++ b/tests/misc/t-locale.c
@@ -1,21 +1,21 @@
  /* Test locale support, or attempt to do so.
  
-Copyright 2001, 2002 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2011 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #define _GNU_SOURCE    /* for DECIMAL_POINT in glibc langinfo.h */
  
@@ -49,7 +49,7 @@ main (void)
  }
  #else
  
-char *decimal_point;
+const char *decimal_point;
  
  /* Replace the libc localeconv with one we can manipulate. */
  #if HAVE_LOCALECONV
@@ -57,7 +57,7 @@ struct lconv *
  localeconv (void)
  {
    static struct lconv  l;
-  l.decimal_point = decimal_point;
+  l.decimal_point = (char *) decimal_point;
    return &l;
  }
  #endif
@@ -69,20 +69,20 @@ nl_langinfo (nl_item n)
  {
  #if defined (DECIMAL_POINT)
    if (n == DECIMAL_POINT)
-    return decimal_point;
+    return (char *) decimal_point;
  #endif
  #if defined (RADIXCHAR)
    if (n == RADIXCHAR)
-    return decimal_point;
+    return (char *) decimal_point;
  #endif
-  return "";
+  return (char *) "";
  }
  #endif
  
  void
  check_input (void)
  {
-  static char *point[] = {
+  static const char *point[] = {
      ".", ",", "WU", "STR", "ZTV***"
    };
  
@@ -113,7 +113,7 @@ check_input (void)
  
    for (i = 0; i < numberof (point); i++)
      {
-      decimal_point = point[i];
+      decimal_point = (const char *) point[i];
  
        for (neg = 0; neg <= 1; neg++)
          {
diff --git a/tests/misc/t-printf.c b/tests/misc/t-printf.c

index d6026b2d02f15cbe271296feb5630455e0ba7f8c..25b016179dbf54aae72d425094d19e70415720a3 100644 (file)
--- a/tests/misc/t-printf.c
+++ b/tests/misc/t-printf.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* Usage: t-printf [-s]
@@ -100,7 +100,7 @@ check_plain (va_alist)
      return;
  
    fmtsize = strlen (fmt_orig) + 1;
-  fmt = (*__gmp_allocate_func) (fmtsize);
+  fmt = (char *) (*__gmp_allocate_func) (fmtsize);
  
    for (p = fmt_orig, q = fmt; *p != '\0'; p++)
      {
@@ -296,7 +296,7 @@ check_obstack_vprintf (const char *want, const char *fmt, va_list ap)
  
    obstack_init (&ob);
    got_len = gmp_obstack_vprintf (&ob, fmt, ap);
-  got = obstack_base (&ob);
+  got = (char *) obstack_base (&ob);
    ob_len = obstack_object_size (&ob);
  
    if (got_len != want_len
diff --git a/tests/misc/t-scanf.c b/tests/misc/t-scanf.c

index 5a9eda1fc955370bde3feaa64197d7305535ad7b..b8d833867f2344a2aafc4108910c02f0a47b04ec 100644 (file)
--- a/tests/misc/t-scanf.c
+++ b/tests/misc/t-scanf.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* Usage: t-scanf [-s]
@@ -63,7 +63,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  int   option_libc_scanf = 0;
  
-typedef int (*fun_t) __GMP_PROTO ((const char *, const char *, void *, void *));
+typedef int (*fun_t) (const char *, const char *, void *, void *);
  
  
  /* This problem was seen on powerpc7450-apple-darwin7.0.0, sscanf returns 0
@@ -229,7 +229,7 @@ fun_sscanf (const char *input, const char *fmt, void *a1, void *a2)
    int     ret;
  
    size = strlen (input) + 1;
-  input_writable = (*__gmp_allocate_func) (size);
+  input_writable = (char *) (*__gmp_allocate_func) (size);
    memcpy (input_writable, input, size);
  
    if (a2 == NULL)
diff --git a/tests/mpbsd/Makefile.am b/tests/mpbsd/Makefile.am

deleted file mode 100644 (file)

index f609a6a..0000000
--- a/tests/mpbsd/Makefile.am
+++ /dev/null
@@ -1,35 +0,0 @@
-## Process this file with automake to generate Makefile.in
-
-# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
-#
-# This file is part of the GNU MP Library.
-#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-
-
-INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
-LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmp.la
-
-if WANT_MPBSD
-MPBSD_check_OPTION = allfuns t-itom t-mtox
-endif
-
-check_PROGRAMS = $(MPBSD_check_OPTION)
-TESTS = $(check_PROGRAMS)
-
-# check linking only against libmp
-allfuns_LDADD = $(top_builddir)/libmp.la
-
-$(top_builddir)/tests/libtests.la:
-       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
diff --git a/tests/mpbsd/Makefile.in b/tests/mpbsd/Makefile.in

deleted file mode 100644 (file)

index 81107ec..0000000
--- a/tests/mpbsd/Makefile.in
+++ /dev/null
@@ -1,650 +0,0 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
-#
-# This file is part of the GNU MP Library.
-#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
-check_PROGRAMS = $(am__EXEEXT_1)
-subdir = tests/mpbsd
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
-       $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-@WANT_MPBSD_TRUE@am__EXEEXT_1 = allfuns$(EXEEXT) t-itom$(EXEEXT) \
-@WANT_MPBSD_TRUE@      t-mtox$(EXEEXT)
-allfuns_SOURCES = allfuns.c
-allfuns_OBJECTS = allfuns$U.$(OBJEXT)
-allfuns_DEPENDENCIES = $(top_builddir)/libmp.la
-t_itom_SOURCES = t-itom.c
-t_itom_OBJECTS = t-itom$U.$(OBJEXT)
-t_itom_LDADD = $(LDADD)
-t_itom_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
-       $(top_builddir)/libmp.la
-t_mtox_SOURCES = t-mtox.c
-t_mtox_OBJECTS = t-mtox$U.$(OBJEXT)
-t_mtox_LDADD = $(LDADD)
-t_mtox_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
-       $(top_builddir)/libmp.la
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
-depcomp =
-am__depfiles_maybe =
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
-       $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-       --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-       $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-       --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
-       $(LDFLAGS) -o $@
-SOURCES = allfuns.c t-itom.c t-mtox.c
-DIST_SOURCES = allfuns.c t-itom.c t-mtox.c
-ETAGS = etags
-CTAGS = ctags
-am__tty_colors = \
-red=; grn=; lgn=; blu=; std=
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ABI = @ABI@
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AS = @AS@
-ASMFLAGS = @ASMFLAGS@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
-CC = @CC@
-CCAS = @CCAS@
-CC_FOR_BUILD = @CC_FOR_BUILD@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CPP_FOR_BUILD = @CPP_FOR_BUILD@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
-FGREP = @FGREP@
-GMP_LDFLAGS = @GMP_LDFLAGS@
-GMP_LIMB_BITS = @GMP_LIMB_BITS@
-GMP_NAIL_BITS = @GMP_NAIL_BITS@
-GREP = @GREP@
-HAVE_CLOCK_01 = @HAVE_CLOCK_01@
-HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
-HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
-HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
-HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
-HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
-HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
-HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
-HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
-HAVE_STACK_T_01 = @HAVE_STACK_T_01@
-HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LEX = @LEX@
-LEXLIB = @LEXLIB@
-LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
-LIBCURSES = @LIBCURSES@
-LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
-LIBGMP_DLL = @LIBGMP_DLL@
-LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
-LIBM = @LIBM@
-LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
-LIBOBJS = @LIBOBJS@
-LIBREADLINE = @LIBREADLINE@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-M4 = @M4@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-RANLIB = @RANLIB@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
-STRIP = @STRIP@
-TAL_OBJECT = @TAL_OBJECT@
-TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
-U_FOR_BUILD = @U_FOR_BUILD@
-VERSION = @VERSION@
-WITH_READLINE_01 = @WITH_READLINE_01@
-YACC = @YACC@
-YFLAGS = @YFLAGS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-gmp_srclinks = @gmp_srclinks@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-mpn_objects = @mpn_objects@
-mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
-LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmp.la
-@WANT_MPBSD_TRUE@MPBSD_check_OPTION = allfuns t-itom t-mtox
-TESTS = $(check_PROGRAMS)
-
-# check linking only against libmp
-allfuns_LDADD = $(top_builddir)/libmp.la
-all: all-am
-
-.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
-       @for dep in $?; do \
-         case '$(am__configure_deps)' in \
-           *$$dep*) \
-             ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
-               && { if test -f $@; then exit 0; else break; fi; }; \
-             exit 1;; \
-         esac; \
-       done; \
-       echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/mpbsd/Makefile'; \
-       $(am__cd) $(top_srcdir) && \
-         $(AUTOMAKE) --gnu --ignore-deps tests/mpbsd/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
-       @case '$?' in \
-         *config.status*) \
-           cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
-         *) \
-           echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
-           cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
-       esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
-       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
-       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
-       cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-clean-checkPROGRAMS:
-       @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
-       echo " rm -f" $$list; \
-       rm -f $$list || exit $$?; \
-       test -n "$(EXEEXT)" || exit 0; \
-       list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
-       echo " rm -f" $$list; \
-       rm -f $$list
-allfuns$(EXEEXT): $(allfuns_OBJECTS) $(allfuns_DEPENDENCIES) 
-       @rm -f allfuns$(EXEEXT)
-       $(LINK) $(allfuns_OBJECTS) $(allfuns_LDADD) $(LIBS)
-t-itom$(EXEEXT): $(t_itom_OBJECTS) $(t_itom_DEPENDENCIES) 
-       @rm -f t-itom$(EXEEXT)
-       $(LINK) $(t_itom_OBJECTS) $(t_itom_LDADD) $(LIBS)
-t-mtox$(EXEEXT): $(t_mtox_OBJECTS) $(t_mtox_DEPENDENCIES) 
-       @rm -f t-mtox$(EXEEXT)
-       $(LINK) $(t_mtox_OBJECTS) $(t_mtox_LDADD) $(LIBS)
-
-mostlyclean-compile:
-       -rm -f *.$(OBJEXT)
-
-distclean-compile:
-       -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
-
-.c.o:
-       $(COMPILE) -c $<
-
-.c.obj:
-       $(COMPILE) -c `$(CYGPATH_W) '$<'`
-
-.c.lo:
-       $(LTCOMPILE) -c -o $@ $<
-allfuns_.c: allfuns.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/allfuns.c; then echo $(srcdir)/allfuns.c; else echo allfuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-itom_.c: t-itom.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-itom.c; then echo $(srcdir)/t-itom.c; else echo t-itom.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mtox_.c: t-mtox.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mtox.c; then echo $(srcdir)/t-mtox.c; else echo t-mtox.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-allfuns_.$(OBJEXT) allfuns_.lo t-itom_.$(OBJEXT) t-itom_.lo \
-t-mtox_.$(OBJEXT) t-mtox_.lo : $(ANSI2KNR)
-
-mostlyclean-libtool:
-       -rm -f *.lo
-
-clean-libtool:
-       -rm -rf .libs _libs
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
-       list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
-       unique=`for i in $$list; do \
-           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-         done | \
-         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-             END { if (nonempty) { for (i in files) print i; }; }'`; \
-       mkid -fID $$unique
-tags: TAGS
-
-TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-               $(TAGS_FILES) $(LISP)
-       set x; \
-       here=`pwd`; \
-       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-       unique=`for i in $$list; do \
-           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-         done | \
-         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-             END { if (nonempty) { for (i in files) print i; }; }'`; \
-       shift; \
-       if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
-         test -n "$$unique" || unique=$$empty_fix; \
-         if test $$# -gt 0; then \
-           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-             "$$@" $$unique; \
-         else \
-           $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-             $$unique; \
-         fi; \
-       fi
-ctags: CTAGS
-CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
-               $(TAGS_FILES) $(LISP)
-       list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
-       unique=`for i in $$list; do \
-           if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
-         done | \
-         $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-             END { if (nonempty) { for (i in files) print i; }; }'`; \
-       test -z "$(CTAGS_ARGS)$$unique" \
-         || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
-            $$unique
-
-GTAGS:
-       here=`$(am__cd) $(top_builddir) && pwd` \
-         && $(am__cd) $(top_srcdir) \
-         && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
-       -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-check-TESTS: $(TESTS)
-       @failed=0; all=0; xfail=0; xpass=0; skip=0; \
-       srcdir=$(srcdir); export srcdir; \
-       list=' $(TESTS) '; \
-       $(am__tty_colors); \
-       if test -n "$$list"; then \
-         for tst in $$list; do \
-           if test -f ./$$tst; then dir=./; \
-           elif test -f $$tst; then dir=; \
-           else dir="$(srcdir)/"; fi; \
-           if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
-             all=`expr $$all + 1`; \
-             case " $(XFAIL_TESTS) " in \
-             *[\ \     ]$$tst[\ \      ]*) \
-               xpass=`expr $$xpass + 1`; \
-               failed=`expr $$failed + 1`; \
-               col=$$red; res=XPASS; \
-             ;; \
-             *) \
-               col=$$grn; res=PASS; \
-             ;; \
-             esac; \
-           elif test $$? -ne 77; then \
-             all=`expr $$all + 1`; \
-             case " $(XFAIL_TESTS) " in \
-             *[\ \     ]$$tst[\ \      ]*) \
-               xfail=`expr $$xfail + 1`; \
-               col=$$lgn; res=XFAIL; \
-             ;; \
-             *) \
-               failed=`expr $$failed + 1`; \
-               col=$$red; res=FAIL; \
-             ;; \
-             esac; \
-           else \
-             skip=`expr $$skip + 1`; \
-             col=$$blu; res=SKIP; \
-           fi; \
-           echo "$${col}$$res$${std}: $$tst"; \
-         done; \
-         if test "$$all" -eq 1; then \
-           tests="test"; \
-           All=""; \
-         else \
-           tests="tests"; \
-           All="All "; \
-         fi; \
-         if test "$$failed" -eq 0; then \
-           if test "$$xfail" -eq 0; then \
-             banner="$$All$$all $$tests passed"; \
-           else \
-             if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
-             banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
-           fi; \
-         else \
-           if test "$$xpass" -eq 0; then \
-             banner="$$failed of $$all $$tests failed"; \
-           else \
-             if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
-             banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
-           fi; \
-         fi; \
-         dashes="$$banner"; \
-         skipped=""; \
-         if test "$$skip" -ne 0; then \
-           if test "$$skip" -eq 1; then \
-             skipped="($$skip test was not run)"; \
-           else \
-             skipped="($$skip tests were not run)"; \
-           fi; \
-           test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
-             dashes="$$skipped"; \
-         fi; \
-         report=""; \
-         if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
-           report="Please report to $(PACKAGE_BUGREPORT)"; \
-           test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
-             dashes="$$report"; \
-         fi; \
-         dashes=`echo "$$dashes" | sed s/./=/g`; \
-         if test "$$failed" -eq 0; then \
-           echo "$$grn$$dashes"; \
-         else \
-           echo "$$red$$dashes"; \
-         fi; \
-         echo "$$banner"; \
-         test -z "$$skipped" || echo "$$skipped"; \
-         test -z "$$report" || echo "$$report"; \
-         echo "$$dashes$$std"; \
-         test "$$failed" -eq 0; \
-       else :; fi
-
-distdir: $(DISTFILES)
-       @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-       topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-       list='$(DISTFILES)'; \
-         dist_files=`for file in $$list; do echo $$file; done | \
-         sed -e "s|^$$srcdirstrip/||;t" \
-             -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
-       case $$dist_files in \
-         */*) $(MKDIR_P) `echo "$$dist_files" | \
-                          sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
-                          sort -u` ;; \
-       esac; \
-       for file in $$dist_files; do \
-         if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
-         if test -d $$d/$$file; then \
-           dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
-           if test -d "$(distdir)/$$file"; then \
-             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-           fi; \
-           if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
-             cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
-             find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-           fi; \
-           cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
-         else \
-           test -f "$(distdir)/$$file" \
-           || cp -p $$d/$$file "$(distdir)/$$file" \
-           || exit 1; \
-         fi; \
-       done
-check-am: all-am
-       $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
-       $(MAKE) $(AM_MAKEFLAGS) check-TESTS
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
-       @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
-       -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-       -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
-       @echo "This command is intended for maintainers to use"
-       @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
-       mostlyclean-am
-
-distclean: distclean-am
-       -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
-       distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
-       -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
-       mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
-       clean-checkPROGRAMS clean-generic clean-libtool ctags \
-       distclean distclean-compile distclean-generic \
-       distclean-libtool distclean-tags distdir dvi dvi-am html \
-       html-am info info-am install install-am install-data \
-       install-data-am install-dvi install-dvi-am install-exec \
-       install-exec-am install-html install-html-am install-info \
-       install-info-am install-man install-pdf install-pdf-am \
-       install-ps install-ps-am install-strip installcheck \
-       installcheck-am installdirs maintainer-clean \
-       maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
-
-
-$(top_builddir)/tests/libtests.la:
-       cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
diff --git a/tests/mpbsd/allfuns.c b/tests/mpbsd/allfuns.c

deleted file mode 100644 (file)

index 2aa25d3..0000000
--- a/tests/mpbsd/allfuns.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/* A test program doing nothing really, just linking to all the BSD MP
-   functions that're supposed to exist.
-
-Copyright 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "mp.h"
-
-int
-main (int argc, char *argv[])
-{
-  MINT *a, *b, *c, *d;
-  short  h;
-
-  mp_set_memory_functions (NULL, NULL, NULL);
-  a = itom (123);
-  b = xtom ("DEADBEEF");
-  c = itom (0);
-  d = itom (0);
-  move (a, b);
-  madd (a, b, c);
-  msub (a, b, c);
-  mult (a, b, c);
-  mdiv (b, a, c, d);
-  sdiv (b, 2, c, &h);
-  msqrt (a, c, d);
-  pow (b, a, a, c);
-  rpow (a, 3, c);
-  gcd (a, b, c);
-  mcmp (a, b);
-  if (argc > 1)
-    {
-      min (c);
-      mout (a);
-    }
-  mtox (b);
-  mfree(a);
-
-  exit (0);
-}
diff --git a/tests/mpbsd/t-itom.c b/tests/mpbsd/t-itom.c

deleted file mode 100644 (file)

index 9b49cb0..0000000
--- a/tests/mpbsd/t-itom.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Test itom.
-
-Copyright 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "mp.h"
-#include "tests.h"
-
-#define SGN(x)       ((x) < 0 ? -1 : (x) == 0 ? 0 : 1)
-
-
-void
-check_data (void)
-{
-  static const struct {
-    short      m;
-    mp_size_t  want_size;
-    mp_limb_t  want_limb;
-  } data[] = {
-
-    {  0L,  0 },
-    {  1L,  1, 1 },
-    { -1L, -1, 1 },
-
-    {  SHRT_MAX,  1,  SHRT_MAX },
-    { -SHRT_MAX, -1,  SHRT_MAX },
-    {  SHRT_MIN, -1, -SHRT_MIN },
-  };
-
-  MINT  *m;
-  int   i;
-
-  for (i = 0; i < numberof (data); i++)
-    {
-      m = itom (data[i].m);
-      if (m->_mp_size != data[i].want_size
-         || (m->_mp_size != 0 && m->_mp_d[0] != data[i].want_limb))
-       {
-         printf ("itom wrong on data[%d]\n", i);
-         abort();
-       }
-      mfree (m);
-    }
-}
-
-
-int
-main (void)
-{
-  tests_start ();
-
-  check_data ();
-
-  tests_end ();
-  exit (0);
-}
diff --git a/tests/mpbsd/t-mtox.c b/tests/mpbsd/t-mtox.c

deleted file mode 100644 (file)

index 1138e76..0000000
--- a/tests/mpbsd/t-mtox.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/* Test mtox.
-
-Copyright 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
-
-#include <string.h>            /* for strcmp, strlen */
-#include <stdlib.h>            /* for abort */
-#include <stdio.h>
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "mp.h"
-#include "tests.h"
-
-
-void
-check_random (void)
-{
-  mpz_t  z;
-  int    i;
-  char   *got, *want;
-  gmp_randstate_ptr  rands = RANDS;
-
-  mpz_init (z);
-
-  for (i = 0; i < 1000; i++)
-    {
-      mpz_erandomb (z, rands, 6 * GMP_LIMB_BITS);
-      got = mtox (z);
-      want = mpz_get_str (NULL, 16, z);
-      if (strcmp (got, want) != 0)
-        {
-          printf ("mtox wrong result\n");
-          printf ("  got  \"%s\"\n", got);
-          printf ("  want \"%s\"\n", want);
-          abort ();
-        }
-      (*__gmp_free_func) (got, strlen (got) + 1);
-      (*__gmp_free_func) (want, strlen (want) + 1);
-    }
-
-  mpz_clear (z);
-}
-
-void
-check_mem (void)
-{
-  MINT  *m;
-  char  *s;
-
-  m = itom (0);
-  s = mtox (m);
-  if (! tests_memory_valid (s))
-    {
-      printf ("Skipping t-mtox, cannot test libgmp and libmp memory together\n");
-      exit (0);
-    }
-  mfree (m);
-  (*__gmp_free_func) (s, strlen (s) + 1);
-}
-
-
-int
-main (void)
-{
-  tests_start ();
-
-  check_mem ();
-  check_random ();
-
-  tests_end ();
-  exit (0);
-}
diff --git a/tests/mpf/Makefile.am b/tests/mpf/Makefile.am

index dece8f71b017b1dd693af6e61cae80eac5570d2f..05e9812844e0aa5fd081413915f88ea844b5b81f 100644 (file)
--- a/tests/mpf/Makefile.am
+++ b/tests/mpf/Makefile.am
@@ -3,20 +3,20 @@
  # Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
  # Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
diff --git a/tests/mpf/Makefile.in b/tests/mpf/Makefile.in

index f56e7deb2e1f7ae4c60185acdd24bac5d799d2a2..e451aa5e20bdcee5bfd1877ef88b713d4b905868 100644 (file)
--- a/tests/mpf/Makefile.in
+++ b/tests/mpf/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -18,21 +18,38 @@
  # Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
  # Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -51,7 +68,6 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  check_PROGRAMS = t-add$(EXEEXT) t-sub$(EXEEXT) t-conv$(EXEEXT) \
         t-sqrt$(EXEEXT) t-sqrt_ui$(EXEEXT) t-muldiv$(EXEEXT) \
         t-dm2exp$(EXEEXT) reuse$(EXEEXT) t-cmp_d$(EXEEXT) \
@@ -65,7 +81,7 @@ subdir = tests/mpf
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -73,137 +89,137 @@ CONFIG_HEADER = $(top_builddir)/config.h
  CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  reuse_SOURCES = reuse.c
-reuse_OBJECTS = reuse$U.$(OBJEXT)
+reuse_OBJECTS = reuse.$(OBJEXT)
  reuse_LDADD = $(LDADD)
  reuse_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_add_SOURCES = t-add.c
-t_add_OBJECTS = t-add$U.$(OBJEXT)
+t_add_OBJECTS = t-add.$(OBJEXT)
  t_add_LDADD = $(LDADD)
  t_add_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cmp_d_SOURCES = t-cmp_d.c
-t_cmp_d_OBJECTS = t-cmp_d$U.$(OBJEXT)
+t_cmp_d_OBJECTS = t-cmp_d.$(OBJEXT)
  t_cmp_d_LDADD = $(LDADD)
  t_cmp_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cmp_si_SOURCES = t-cmp_si.c
-t_cmp_si_OBJECTS = t-cmp_si$U.$(OBJEXT)
+t_cmp_si_OBJECTS = t-cmp_si.$(OBJEXT)
  t_cmp_si_LDADD = $(LDADD)
  t_cmp_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_conv_SOURCES = t-conv.c
-t_conv_OBJECTS = t-conv$U.$(OBJEXT)
+t_conv_OBJECTS = t-conv.$(OBJEXT)
  t_conv_LDADD = $(LDADD)
  t_conv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_div_SOURCES = t-div.c
-t_div_OBJECTS = t-div$U.$(OBJEXT)
+t_div_OBJECTS = t-div.$(OBJEXT)
  t_div_LDADD = $(LDADD)
  t_div_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_dm2exp_SOURCES = t-dm2exp.c
-t_dm2exp_OBJECTS = t-dm2exp$U.$(OBJEXT)
+t_dm2exp_OBJECTS = t-dm2exp.$(OBJEXT)
  t_dm2exp_LDADD = $(LDADD)
  t_dm2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_eq_SOURCES = t-eq.c
-t_eq_OBJECTS = t-eq$U.$(OBJEXT)
+t_eq_OBJECTS = t-eq.$(OBJEXT)
  t_eq_LDADD = $(LDADD)
  t_eq_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_fits_SOURCES = t-fits.c
-t_fits_OBJECTS = t-fits$U.$(OBJEXT)
+t_fits_OBJECTS = t-fits.$(OBJEXT)
  t_fits_LDADD = $(LDADD)
  t_fits_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_d_SOURCES = t-get_d.c
-t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_OBJECTS = t-get_d.$(OBJEXT)
  t_get_d_LDADD = $(LDADD)
  t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_d_2exp_SOURCES = t-get_d_2exp.c
-t_get_d_2exp_OBJECTS = t-get_d_2exp$U.$(OBJEXT)
+t_get_d_2exp_OBJECTS = t-get_d_2exp.$(OBJEXT)
  t_get_d_2exp_LDADD = $(LDADD)
  t_get_d_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_si_SOURCES = t-get_si.c
-t_get_si_OBJECTS = t-get_si$U.$(OBJEXT)
+t_get_si_OBJECTS = t-get_si.$(OBJEXT)
  t_get_si_LDADD = $(LDADD)
  t_get_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_ui_SOURCES = t-get_ui.c
-t_get_ui_OBJECTS = t-get_ui$U.$(OBJEXT)
+t_get_ui_OBJECTS = t-get_ui.$(OBJEXT)
  t_get_ui_LDADD = $(LDADD)
  t_get_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_gsprec_SOURCES = t-gsprec.c
-t_gsprec_OBJECTS = t-gsprec$U.$(OBJEXT)
+t_gsprec_OBJECTS = t-gsprec.$(OBJEXT)
  t_gsprec_LDADD = $(LDADD)
  t_gsprec_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_inp_str_SOURCES = t-inp_str.c
-t_inp_str_OBJECTS = t-inp_str$U.$(OBJEXT)
+t_inp_str_OBJECTS = t-inp_str.$(OBJEXT)
  t_inp_str_LDADD = $(LDADD)
  t_inp_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_int_p_SOURCES = t-int_p.c
-t_int_p_OBJECTS = t-int_p$U.$(OBJEXT)
+t_int_p_OBJECTS = t-int_p.$(OBJEXT)
  t_int_p_LDADD = $(LDADD)
  t_int_p_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_mul_ui_SOURCES = t-mul_ui.c
-t_mul_ui_OBJECTS = t-mul_ui$U.$(OBJEXT)
+t_mul_ui_OBJECTS = t-mul_ui.$(OBJEXT)
  t_mul_ui_LDADD = $(LDADD)
  t_mul_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_muldiv_SOURCES = t-muldiv.c
-t_muldiv_OBJECTS = t-muldiv$U.$(OBJEXT)
+t_muldiv_OBJECTS = t-muldiv.$(OBJEXT)
  t_muldiv_LDADD = $(LDADD)
  t_muldiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_SOURCES = t-set.c
-t_set_OBJECTS = t-set$U.$(OBJEXT)
+t_set_OBJECTS = t-set.$(OBJEXT)
  t_set_LDADD = $(LDADD)
  t_set_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_q_SOURCES = t-set_q.c
-t_set_q_OBJECTS = t-set_q$U.$(OBJEXT)
+t_set_q_OBJECTS = t-set_q.$(OBJEXT)
  t_set_q_LDADD = $(LDADD)
  t_set_q_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_si_SOURCES = t-set_si.c
-t_set_si_OBJECTS = t-set_si$U.$(OBJEXT)
+t_set_si_OBJECTS = t-set_si.$(OBJEXT)
  t_set_si_LDADD = $(LDADD)
  t_set_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_ui_SOURCES = t-set_ui.c
-t_set_ui_OBJECTS = t-set_ui$U.$(OBJEXT)
+t_set_ui_OBJECTS = t-set_ui.$(OBJEXT)
  t_set_ui_LDADD = $(LDADD)
  t_set_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_sqrt_SOURCES = t-sqrt.c
-t_sqrt_OBJECTS = t-sqrt$U.$(OBJEXT)
+t_sqrt_OBJECTS = t-sqrt.$(OBJEXT)
  t_sqrt_LDADD = $(LDADD)
  t_sqrt_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_sqrt_ui_SOURCES = t-sqrt_ui.c
-t_sqrt_ui_OBJECTS = t-sqrt_ui$U.$(OBJEXT)
+t_sqrt_ui_OBJECTS = t-sqrt_ui.$(OBJEXT)
  t_sqrt_ui_LDADD = $(LDADD)
  t_sqrt_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_sub_SOURCES = t-sub.c
-t_sub_OBJECTS = t-sub$U.$(OBJEXT)
+t_sub_OBJECTS = t-sub.$(OBJEXT)
  t_sub_LDADD = $(LDADD)
  t_sub_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_trunc_SOURCES = t-trunc.c
-t_trunc_OBJECTS = t-trunc$U.$(OBJEXT)
+t_trunc_OBJECTS = t-trunc.$(OBJEXT)
  t_trunc_LDADD = $(LDADD)
  t_trunc_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_ui_div_SOURCES = t-ui_div.c
-t_ui_div_OBJECTS = t-ui_div$U.$(OBJEXT)
+t_ui_div_OBJECTS = t-ui_div.$(OBJEXT)
  t_ui_div_LDADD = $(LDADD)
  t_ui_div_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
@@ -229,6 +245,11 @@ DIST_SOURCES = reuse.c t-add.c t-cmp_d.c t-cmp_si.c t-conv.c t-div.c \
         t-get_ui.c t-gsprec.c t-inp_str.c t-int_p.c t-mul_ui.c \
         t-muldiv.c t-set.c t-set_q.c t-set_si.c t-set_ui.c t-sqrt.c \
         t-sqrt_ui.c t-sub.c t-trunc.c t-ui_div.c
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  am__tty_colors = \
@@ -332,8 +353,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -380,7 +401,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -440,85 +460,85 @@ clean-checkPROGRAMS:
         list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
         echo " rm -f" $$list; \
         rm -f $$list
-reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) 
+reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) $(EXTRA_reuse_DEPENDENCIES) 
         @rm -f reuse$(EXEEXT)
         $(LINK) $(reuse_OBJECTS) $(reuse_LDADD) $(LIBS)
-t-add$(EXEEXT): $(t_add_OBJECTS) $(t_add_DEPENDENCIES) 
+t-add$(EXEEXT): $(t_add_OBJECTS) $(t_add_DEPENDENCIES) $(EXTRA_t_add_DEPENDENCIES) 
         @rm -f t-add$(EXEEXT)
         $(LINK) $(t_add_OBJECTS) $(t_add_LDADD) $(LIBS)
-t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES) 
+t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES) $(EXTRA_t_cmp_d_DEPENDENCIES) 
         @rm -f t-cmp_d$(EXEEXT)
         $(LINK) $(t_cmp_d_OBJECTS) $(t_cmp_d_LDADD) $(LIBS)
-t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) 
+t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) $(EXTRA_t_cmp_si_DEPENDENCIES) 
         @rm -f t-cmp_si$(EXEEXT)
         $(LINK) $(t_cmp_si_OBJECTS) $(t_cmp_si_LDADD) $(LIBS)
-t-conv$(EXEEXT): $(t_conv_OBJECTS) $(t_conv_DEPENDENCIES) 
+t-conv$(EXEEXT): $(t_conv_OBJECTS) $(t_conv_DEPENDENCIES) $(EXTRA_t_conv_DEPENDENCIES) 
         @rm -f t-conv$(EXEEXT)
         $(LINK) $(t_conv_OBJECTS) $(t_conv_LDADD) $(LIBS)
-t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES) 
+t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES) $(EXTRA_t_div_DEPENDENCIES) 
         @rm -f t-div$(EXEEXT)
         $(LINK) $(t_div_OBJECTS) $(t_div_LDADD) $(LIBS)
-t-dm2exp$(EXEEXT): $(t_dm2exp_OBJECTS) $(t_dm2exp_DEPENDENCIES) 
+t-dm2exp$(EXEEXT): $(t_dm2exp_OBJECTS) $(t_dm2exp_DEPENDENCIES) $(EXTRA_t_dm2exp_DEPENDENCIES) 
         @rm -f t-dm2exp$(EXEEXT)
         $(LINK) $(t_dm2exp_OBJECTS) $(t_dm2exp_LDADD) $(LIBS)
-t-eq$(EXEEXT): $(t_eq_OBJECTS) $(t_eq_DEPENDENCIES) 
+t-eq$(EXEEXT): $(t_eq_OBJECTS) $(t_eq_DEPENDENCIES) $(EXTRA_t_eq_DEPENDENCIES) 
         @rm -f t-eq$(EXEEXT)
         $(LINK) $(t_eq_OBJECTS) $(t_eq_LDADD) $(LIBS)
-t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES) 
+t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES) $(EXTRA_t_fits_DEPENDENCIES) 
         @rm -f t-fits$(EXEEXT)
         $(LINK) $(t_fits_OBJECTS) $(t_fits_LDADD) $(LIBS)
-t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) 
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) $(EXTRA_t_get_d_DEPENDENCIES) 
         @rm -f t-get_d$(EXEEXT)
         $(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
-t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES) 
+t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES) $(EXTRA_t_get_d_2exp_DEPENDENCIES) 
         @rm -f t-get_d_2exp$(EXEEXT)
         $(LINK) $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_LDADD) $(LIBS)
-t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES) 
+t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES) $(EXTRA_t_get_si_DEPENDENCIES) 
         @rm -f t-get_si$(EXEEXT)
         $(LINK) $(t_get_si_OBJECTS) $(t_get_si_LDADD) $(LIBS)
-t-get_ui$(EXEEXT): $(t_get_ui_OBJECTS) $(t_get_ui_DEPENDENCIES) 
+t-get_ui$(EXEEXT): $(t_get_ui_OBJECTS) $(t_get_ui_DEPENDENCIES) $(EXTRA_t_get_ui_DEPENDENCIES) 
         @rm -f t-get_ui$(EXEEXT)
         $(LINK) $(t_get_ui_OBJECTS) $(t_get_ui_LDADD) $(LIBS)
-t-gsprec$(EXEEXT): $(t_gsprec_OBJECTS) $(t_gsprec_DEPENDENCIES) 
+t-gsprec$(EXEEXT): $(t_gsprec_OBJECTS) $(t_gsprec_DEPENDENCIES) $(EXTRA_t_gsprec_DEPENDENCIES) 
         @rm -f t-gsprec$(EXEEXT)
         $(LINK) $(t_gsprec_OBJECTS) $(t_gsprec_LDADD) $(LIBS)
-t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) 
+t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) $(EXTRA_t_inp_str_DEPENDENCIES) 
         @rm -f t-inp_str$(EXEEXT)
         $(LINK) $(t_inp_str_OBJECTS) $(t_inp_str_LDADD) $(LIBS)
-t-int_p$(EXEEXT): $(t_int_p_OBJECTS) $(t_int_p_DEPENDENCIES) 
+t-int_p$(EXEEXT): $(t_int_p_OBJECTS) $(t_int_p_DEPENDENCIES) $(EXTRA_t_int_p_DEPENDENCIES) 
         @rm -f t-int_p$(EXEEXT)
         $(LINK) $(t_int_p_OBJECTS) $(t_int_p_LDADD) $(LIBS)
-t-mul_ui$(EXEEXT): $(t_mul_ui_OBJECTS) $(t_mul_ui_DEPENDENCIES) 
+t-mul_ui$(EXEEXT): $(t_mul_ui_OBJECTS) $(t_mul_ui_DEPENDENCIES) $(EXTRA_t_mul_ui_DEPENDENCIES) 
         @rm -f t-mul_ui$(EXEEXT)
         $(LINK) $(t_mul_ui_OBJECTS) $(t_mul_ui_LDADD) $(LIBS)
-t-muldiv$(EXEEXT): $(t_muldiv_OBJECTS) $(t_muldiv_DEPENDENCIES) 
+t-muldiv$(EXEEXT): $(t_muldiv_OBJECTS) $(t_muldiv_DEPENDENCIES) $(EXTRA_t_muldiv_DEPENDENCIES) 
         @rm -f t-muldiv$(EXEEXT)
         $(LINK) $(t_muldiv_OBJECTS) $(t_muldiv_LDADD) $(LIBS)
-t-set$(EXEEXT): $(t_set_OBJECTS) $(t_set_DEPENDENCIES) 
+t-set$(EXEEXT): $(t_set_OBJECTS) $(t_set_DEPENDENCIES) $(EXTRA_t_set_DEPENDENCIES) 
         @rm -f t-set$(EXEEXT)
         $(LINK) $(t_set_OBJECTS) $(t_set_LDADD) $(LIBS)
-t-set_q$(EXEEXT): $(t_set_q_OBJECTS) $(t_set_q_DEPENDENCIES) 
+t-set_q$(EXEEXT): $(t_set_q_OBJECTS) $(t_set_q_DEPENDENCIES) $(EXTRA_t_set_q_DEPENDENCIES) 
         @rm -f t-set_q$(EXEEXT)
         $(LINK) $(t_set_q_OBJECTS) $(t_set_q_LDADD) $(LIBS)
-t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES) 
+t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES) $(EXTRA_t_set_si_DEPENDENCIES) 
         @rm -f t-set_si$(EXEEXT)
         $(LINK) $(t_set_si_OBJECTS) $(t_set_si_LDADD) $(LIBS)
-t-set_ui$(EXEEXT): $(t_set_ui_OBJECTS) $(t_set_ui_DEPENDENCIES) 
+t-set_ui$(EXEEXT): $(t_set_ui_OBJECTS) $(t_set_ui_DEPENDENCIES) $(EXTRA_t_set_ui_DEPENDENCIES) 
         @rm -f t-set_ui$(EXEEXT)
         $(LINK) $(t_set_ui_OBJECTS) $(t_set_ui_LDADD) $(LIBS)
-t-sqrt$(EXEEXT): $(t_sqrt_OBJECTS) $(t_sqrt_DEPENDENCIES) 
+t-sqrt$(EXEEXT): $(t_sqrt_OBJECTS) $(t_sqrt_DEPENDENCIES) $(EXTRA_t_sqrt_DEPENDENCIES) 
         @rm -f t-sqrt$(EXEEXT)
         $(LINK) $(t_sqrt_OBJECTS) $(t_sqrt_LDADD) $(LIBS)
-t-sqrt_ui$(EXEEXT): $(t_sqrt_ui_OBJECTS) $(t_sqrt_ui_DEPENDENCIES) 
+t-sqrt_ui$(EXEEXT): $(t_sqrt_ui_OBJECTS) $(t_sqrt_ui_DEPENDENCIES) $(EXTRA_t_sqrt_ui_DEPENDENCIES) 
         @rm -f t-sqrt_ui$(EXEEXT)
         $(LINK) $(t_sqrt_ui_OBJECTS) $(t_sqrt_ui_LDADD) $(LIBS)
-t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES) 
+t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES) $(EXTRA_t_sub_DEPENDENCIES) 
         @rm -f t-sub$(EXEEXT)
         $(LINK) $(t_sub_OBJECTS) $(t_sub_LDADD) $(LIBS)
-t-trunc$(EXEEXT): $(t_trunc_OBJECTS) $(t_trunc_DEPENDENCIES) 
+t-trunc$(EXEEXT): $(t_trunc_OBJECTS) $(t_trunc_DEPENDENCIES) $(EXTRA_t_trunc_DEPENDENCIES) 
         @rm -f t-trunc$(EXEEXT)
         $(LINK) $(t_trunc_OBJECTS) $(t_trunc_LDADD) $(LIBS)
-t-ui_div$(EXEEXT): $(t_ui_div_OBJECTS) $(t_ui_div_DEPENDENCIES) 
+t-ui_div$(EXEEXT): $(t_ui_div_OBJECTS) $(t_ui_div_DEPENDENCIES) $(EXTRA_t_ui_div_DEPENDENCIES) 
         @rm -f t-ui_div$(EXEEXT)
         $(LINK) $(t_ui_div_OBJECTS) $(t_ui_div_LDADD) $(LIBS)
  
@@ -527,11 +547,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -541,74 +556,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-reuse_.c: reuse.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/reuse.c; then echo $(srcdir)/reuse.c; else echo reuse.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-add_.c: t-add.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-add.c; then echo $(srcdir)/t-add.c; else echo t-add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_d_.c: t-cmp_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_d.c; then echo $(srcdir)/t-cmp_d.c; else echo t-cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_si_.c: t-cmp_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_si.c; then echo $(srcdir)/t-cmp_si.c; else echo t-cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-conv_.c: t-conv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-conv.c; then echo $(srcdir)/t-conv.c; else echo t-conv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-div_.c: t-div.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-div.c; then echo $(srcdir)/t-div.c; else echo t-div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-dm2exp_.c: t-dm2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-dm2exp.c; then echo $(srcdir)/t-dm2exp.c; else echo t-dm2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-eq_.c: t-eq.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-eq.c; then echo $(srcdir)/t-eq.c; else echo t-eq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fits_.c: t-fits.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fits.c; then echo $(srcdir)/t-fits.c; else echo t-fits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_.c: t-get_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_2exp_.c: t-get_d_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d_2exp.c; then echo $(srcdir)/t-get_d_2exp.c; else echo t-get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_si_.c: t-get_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_si.c; then echo $(srcdir)/t-get_si.c; else echo t-get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_ui_.c: t-get_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_ui.c; then echo $(srcdir)/t-get_ui.c; else echo t-get_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-gsprec_.c: t-gsprec.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gsprec.c; then echo $(srcdir)/t-gsprec.c; else echo t-gsprec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-inp_str_.c: t-inp_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-inp_str.c; then echo $(srcdir)/t-inp_str.c; else echo t-inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-int_p_.c: t-int_p.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-int_p.c; then echo $(srcdir)/t-int_p.c; else echo t-int_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mul_ui_.c: t-mul_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul_ui.c; then echo $(srcdir)/t-mul_ui.c; else echo t-mul_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-muldiv_.c: t-muldiv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-muldiv.c; then echo $(srcdir)/t-muldiv.c; else echo t-muldiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_.c: t-set.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set.c; then echo $(srcdir)/t-set.c; else echo t-set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_q_.c: t-set_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_q.c; then echo $(srcdir)/t-set_q.c; else echo t-set_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_si_.c: t-set_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_si.c; then echo $(srcdir)/t-set_si.c; else echo t-set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_ui_.c: t-set_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_ui.c; then echo $(srcdir)/t-set_ui.c; else echo t-set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sqrt_.c: t-sqrt.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrt.c; then echo $(srcdir)/t-sqrt.c; else echo t-sqrt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sqrt_ui_.c: t-sqrt_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrt_ui.c; then echo $(srcdir)/t-sqrt_ui.c; else echo t-sqrt_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sub_.c: t-sub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sub.c; then echo $(srcdir)/t-sub.c; else echo t-sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-trunc_.c: t-trunc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-trunc.c; then echo $(srcdir)/t-trunc.c; else echo t-trunc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-ui_div_.c: t-ui_div.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-ui_div.c; then echo $(srcdir)/t-ui_div.c; else echo t-ui_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-reuse_.$(OBJEXT) reuse_.lo t-add_.$(OBJEXT) t-add_.lo \
-t-cmp_d_.$(OBJEXT) t-cmp_d_.lo t-cmp_si_.$(OBJEXT) t-cmp_si_.lo \
-t-conv_.$(OBJEXT) t-conv_.lo t-div_.$(OBJEXT) t-div_.lo \
-t-dm2exp_.$(OBJEXT) t-dm2exp_.lo t-eq_.$(OBJEXT) t-eq_.lo \
-t-fits_.$(OBJEXT) t-fits_.lo t-get_d_.$(OBJEXT) t-get_d_.lo \
-t-get_d_2exp_.$(OBJEXT) t-get_d_2exp_.lo t-get_si_.$(OBJEXT) \
-t-get_si_.lo t-get_ui_.$(OBJEXT) t-get_ui_.lo t-gsprec_.$(OBJEXT) \
-t-gsprec_.lo t-inp_str_.$(OBJEXT) t-inp_str_.lo t-int_p_.$(OBJEXT) \
-t-int_p_.lo t-mul_ui_.$(OBJEXT) t-mul_ui_.lo t-muldiv_.$(OBJEXT) \
-t-muldiv_.lo t-set_.$(OBJEXT) t-set_.lo t-set_q_.$(OBJEXT) t-set_q_.lo \
-t-set_si_.$(OBJEXT) t-set_si_.lo t-set_ui_.$(OBJEXT) t-set_ui_.lo \
-t-sqrt_.$(OBJEXT) t-sqrt_.lo t-sqrt_ui_.$(OBJEXT) t-sqrt_ui_.lo \
-t-sub_.$(OBJEXT) t-sub_.lo t-trunc_.$(OBJEXT) t-trunc_.lo \
-t-ui_div_.$(OBJEXT) t-ui_div_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -749,14 +696,15 @@ check-TESTS: $(TESTS)
           fi; \
           dashes=`echo "$$dashes" | sed s/./=/g`; \
           if test "$$failed" -eq 0; then \
-           echo "$$grn$$dashes"; \
+           col="$$grn"; \
           else \
-           echo "$$red$$dashes"; \
+           col="$$red"; \
           fi; \
-         echo "$$banner"; \
-         test -z "$$skipped" || echo "$$skipped"; \
-         test -z "$$report" || echo "$$report"; \
-         echo "$$dashes$$std"; \
+         echo "$${col}$$dashes$${std}"; \
+         echo "$${col}$$banner$${std}"; \
+         test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+         test -z "$$report" || echo "$${col}$$report$${std}"; \
+         echo "$${col}$$dashes$${std}"; \
           test "$$failed" -eq 0; \
         else :; fi
  
@@ -806,10 +754,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -877,7 +830,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -890,7 +843,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
         clean-checkPROGRAMS clean-generic clean-libtool ctags \
@@ -903,8 +856,8 @@ uninstall-am:
         install-ps install-ps-am install-strip installcheck \
         installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  $(top_builddir)/tests/libtests.la:
diff --git a/tests/mpf/reuse.c b/tests/mpf/reuse.c

index f373ac700ea6170e9cb9a0bfdf58f36376c460ca..53f973c2fbb1d73e90aea37d2c0244f6407ff9af 100644 (file)
--- a/tests/mpf/reuse.c
+++ b/tests/mpf/reuse.c
@@ -1,21 +1,21 @@
  /* Test that routines allow reusing a source variable as destination.
  
-Copyright 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1996, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -50,42 +50,42 @@ main (void)
  #define EXPO 32
  #endif
  
-void dump_abort __GMP_PROTO ((char *, mpf_t, mpf_t));
+void dump_abort (const char *, mpf_t, mpf_t);
  
-typedef void (*dss_func) __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+typedef void (*dss_func) (mpf_ptr, mpf_srcptr, mpf_srcptr);
  
  dss_func dss_funcs[] =
  {
    mpf_div, mpf_add, mpf_mul, mpf_sub,
  };
  
-char *dss_func_names[] =
+const char *dss_func_names[] =
  {
    "mpf_div", "mpf_add", "mpf_mul", "mpf_sub",
  };
  
-typedef void (*dsi_func) __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+typedef void (*dsi_func) (mpf_ptr, mpf_srcptr, unsigned long int);
  
  dsi_func dsi_funcs[] =
  {
    mpf_div_ui, mpf_add_ui, mpf_mul_ui, mpf_sub_ui,
-  mpf_mul_2exp, mpf_div_2exp
+  mpf_mul_2exp, mpf_div_2exp, mpf_pow_ui
  };
  
-char *dsi_func_names[] =
+const char *dsi_func_names[] =
  {
    "mpf_div_ui", "mpf_add_ui", "mpf_mul_ui", "mpf_sub_ui",
-  "mpf_mul_2exp", "mpf_div_2exp"
+  "mpf_mul_2exp", "mpf_div_2exp", "mpf_pow_ui"
  };
  
-typedef void (*dis_func) __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+typedef void (*dis_func) (mpf_ptr, unsigned long int, mpf_srcptr);
  
  dis_func dis_funcs[] =
  {
    mpf_ui_div, mpf_ui_sub,
  };
  
-char *dis_func_names[] =
+const char *dis_func_names[] =
  {
    "mpf_ui_div", "mpf_ui_sub",
  };
@@ -194,7 +194,7 @@ main (int argc, char **argv)
  }
  
  void
-dump_abort (char *name, mpf_t res1, mpf_t res2)
+dump_abort (const char *name, mpf_t res1, mpf_t res2)
  {
    printf ("failure in %s:\n", name);
    mpf_dump (res1);
@@ -203,9 +203,9 @@ dump_abort (char *name, mpf_t res1, mpf_t res2)
  }
  
  #if 0
-void mpf_abs           __GMP_PROTO ((mpf_ptr, mpf_srcptr));
-void mpf_sqrt          __GMP_PROTO ((mpf_ptr, mpf_srcptr));
-void mpf_neg           __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+void mpf_abs           (mpf_ptr, mpf_srcptr);
+void mpf_sqrt          (mpf_ptr, mpf_srcptr);
+void mpf_neg           (mpf_ptr, mpf_srcptr);
  #endif
  
  #endif /* ! DLL_EXPORT */
diff --git a/tests/mpf/t-add.c b/tests/mpf/t-add.c

index db0dbe5b508e07629bed9e30b8f9f7ff91e73d77..bb19de813e9c30d37a48a9f86996362ac754de0d 100644 (file)
--- a/tests/mpf/t-add.c
+++ b/tests/mpf/t-add.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-cmp_d.c b/tests/mpf/t-cmp_d.c

index 6b5385f6059348d55f9a9d6bc55b5d5be69bc0af..dd3920e3710f3261faf5e3878ccc207651c6e5a9 100644 (file)
--- a/tests/mpf/t-cmp_d.c
+++ b/tests/mpf/t-cmp_d.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2003, 2003, 2005 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-cmp_si.c b/tests/mpf/t-cmp_si.c

index e4b9514dc5d112625b98d0f41df8ed54fa9cbfb7..e328541b71513419057eab23a9fee979e5435424 100644 (file)
--- a/tests/mpf/t-cmp_si.c
+++ b/tests/mpf/t-cmp_si.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-conv.c b/tests/mpf/t-conv.c

index c151174173457fbb18edc1e6fa1887f8069f1843..74ddb7c6d65054f8a7e6808f78cc3827cfd31c8e 100644 (file)
--- a/tests/mpf/t-conv.c
+++ b/tests/mpf/t-conv.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2000, 2001, 2008 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-div.c b/tests/mpf/t-div.c

index a88f1cc5b1009cae03ce5cbf038d117d2dc68aa5..00fd76e0989eea6fed834b8c55658d2e9b1787e0 100644 (file)
--- a/tests/mpf/t-div.c
+++ b/tests/mpf/t-div.c
@@ -2,20 +2,20 @@
  
  Copyright 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-dm2exp.c b/tests/mpf/t-dm2exp.c

index da43e28ac3b8b7aab4932b232790f0f56df4874a..e6a09d9001123ec07e22893b3333af4e1d856431 100644 (file)
--- a/tests/mpf/t-dm2exp.c
+++ b/tests/mpf/t-dm2exp.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-eq.c b/tests/mpf/t-eq.c

index 73fde44eed3988a492c8edc600a116fe92fd8dfa..8e645b690a340f04cfd8102fdc7db6661c149e7b 100644 (file)
--- a/tests/mpf/t-eq.c
+++ b/tests/mpf/t-eq.c
@@ -1,21 +1,21 @@
  /* Test mpf_eq.
  
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -30,12 +30,79 @@ void insert_random_low_zero_limbs (mpf_t, gmp_randstate_ptr);
  void dump_abort (mpf_t, mpf_t, int, int, int, int, int, long);
  void hexdump (mpf_t);
  
-int
-main (int argc, char **argv)
+void
+check_data (void)
+{
+  static const struct
+  {
+    struct {
+      int        exp, size;
+      mp_limb_t  d[10];
+    } x, y;
+    mp_bitcnt_t bits;
+    int want;
+
+  } data[] = {
+    { { 0, 0, { 0 } },             { 0, 0, { 0 } },    0, 1 },
+
+    { { 0, 1, { 7 } },             { 0, 1, { 7 } },    0, 1 },
+    { { 0, 1, { 7 } },             { 0, 1, { 7 } },   17, 1 },
+    { { 0, 1, { 7 } },             { 0, 1, { 7 } }, 4711, 1 },
+
+    { { 0, 1, { 7 } },             { 0, 1, { 6 } },    0, 1 },
+    { { 0, 1, { 7 } },             { 0, 1, { 6 } },    2, 1 },
+    { { 0, 1, { 7 } },             { 0, 1, { 6 } },    3, 0 },
+
+    { { 0, 0, { 0 } },             { 0, 1, { 1 } },    0, 0 },
+    { { 0, 1, { 1 } },             { 0,-1 ,{ 1 } },    0, 0 },
+    { { 1, 1, { 1 } },             { 0, 1, { 1 } },    0, 0 },
+
+    { { 0, 1, { 8 } },             { 0, 1, { 4 } },    0, 0 },
+
+    { { 0, 2, { 0, 3 } },          { 0, 1, { 3 } }, 1000, 1 },
+  };
+
+  mpf_t  x, y;
+  int got, got_swapped;
+  int i;
+  mp_trace_base = 16;
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      PTR(x) = (mp_ptr) data[i].x.d;
+      SIZ(x) = data[i].x.size;
+      EXP(x) = data[i].x.exp;
+      PREC(x) = numberof (data[i].x.d);
+      MPF_CHECK_FORMAT (x);
+
+      PTR(y) = (mp_ptr) data[i].y.d;
+      SIZ(y) = data[i].y.size;
+      EXP(y) = data[i].y.exp;
+      PREC(y) = numberof (data[i].y.d);
+      MPF_CHECK_FORMAT (y);
+
+      got         = mpf_eq (x, y, data[i].bits);
+      got_swapped = mpf_eq (y, x, data[i].bits);
+
+      if (got != got_swapped || got != data[i].want)
+       {
+         printf ("check_data() wrong reault at data[%d]\n", i);
+         mpf_trace ("x   ", x);
+         mpf_trace ("y   ", y);
+         printf ("got         %d\n", got);
+         printf ("got_swapped %d\n", got_swapped);
+         printf ("want        %d\n", data[i].want);
+         abort ();
+        }
+    }
+}
+
+void
+check_random (long reps)
  {
-  unsigned long test, reps = 10000;
+  unsigned long test;
+  gmp_randstate_ptr rands = RANDS;
    mpf_t a, b, x;
-  gmp_randstate_ptr rands;
    mpz_t ds;
    int hibits, lshift1, lshift2;
    int xtra;
@@ -44,13 +111,6 @@ main (int argc, char **argv)
  #define LSHIFT1 10
  #define LSHIFT2 10
  
-  if (argc > 1)
-    reps = strtol (argv[1], 0, 0);
-
-  tests_start ();
-
-  rands = RANDS;
-
    mpf_set_default_prec ((1 << HIBITS) + (1 << LSHIFT1) + (1 << LSHIFT2));
  
    mpz_init (ds);
@@ -83,12 +143,14 @@ main (int argc, char **argv)
        insert_random_low_zero_limbs (a, rands);
        insert_random_low_zero_limbs (b, rands);
  
-      if (mpf_eq (a, b, lshift1 + hibits) == 0)
+      if (mpf_eq (a, b, lshift1 + hibits) == 0 ||
+         mpf_eq (b, a, lshift1 + hibits) == 0)
         {
           dump_abort (a, b, lshift1 + hibits, lshift1, lshift2, hibits, 1, test);
         }
        for (xtra = 1; xtra < 100; xtra++)
-       if (mpf_eq (a, b, lshift1 + hibits + xtra) != 0)
+       if (mpf_eq (a, b, lshift1 + hibits + xtra) != 0 ||
+           mpf_eq (b, a, lshift1 + hibits + xtra) != 0)
           {
             dump_abort (a, b, lshift1 + hibits + xtra, lshift1, lshift2, hibits, 0, test);
           }
@@ -96,8 +158,6 @@ main (int argc, char **argv)
  
    mpf_clears (a, b, x, NULL);
    mpz_clear (ds);
-  tests_end ();
-  exit (0);
  }
  
  void
@@ -139,3 +199,20 @@ hexdump (mpf_t x)
         printf (" ");
      }
  }
+
+int
+main (int argc, char *argv[])
+{
+  long reps = 10000;
+
+  if (argc == 2)
+    reps = strtol (argv[1], 0, 0);
+
+  tests_start ();
+
+  check_data ();
+  check_random (reps);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpf/t-fits.c b/tests/mpf/t-fits.c

index 5318e4767bb2b8f0617764f44ef450a24e1cf320..0f473c90e6b4a1ca1be5fd1d667f0294d51643ee 100644 (file)
--- a/tests/mpf/t-fits.c
+++ b/tests/mpf/t-fits.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-get_d.c b/tests/mpf/t-get_d.c

index 8f18f44d060d8cd657adfd99112bac20e5ecf874..c86a87a18ce05144fdae47aad71319f12e67e9bc 100644 (file)
--- a/tests/mpf/t-get_d.c
+++ b/tests/mpf/t-get_d.c
@@ -1,28 +1,28 @@
  /* Test mpf_get_d and mpf_set_d.
  
-   Copyright 1996, 1999, 2000, 2001, 2009 Free Software Foundation, Inc.
+Copyright 1996, 1999, 2000, 2001, 2009 Free Software Foundation, Inc.
  
-   This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-   The GNU MP Library is free software; you can redistribute it and/or modify
-   it under the terms of the GNU Lesser General Public License as published by
-   the Free Software Foundation; either version 3 of the License, or (at your
-   option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-   The GNU MP Library is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-   License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-   You should have received a copy of the GNU Lesser General Public License
-   along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
  #include "gmp.h"
  #include "tests.h"
  
-#if defined (__vax__)
+#if defined (__vax) || defined (__vax__)
  #define LOW_BOUND 1e-38
  #define HIGH_BOUND 8e37
  #endif
diff --git a/tests/mpf/t-get_d_2exp.c b/tests/mpf/t-get_d_2exp.c

index 91ab97ee511cd4988bb71eb9860e4454e4263b5f..8de1bbbeea53d122bbda81ee1afb9294d85dbaf2 100644 (file)
--- a/tests/mpf/t-get_d_2exp.c
+++ b/tests/mpf/t-get_d_2exp.c
@@ -2,20 +2,20 @@
  
  Copyright 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-get_si.c b/tests/mpf/t-get_si.c

index c5080411d2baea82ddc7a24c87158a5b8845ae37..785d22bb5608fafedecf750a5b506720c23da947 100644 (file)
--- a/tests/mpf/t-get_si.c
+++ b/tests/mpf/t-get_si.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-get_ui.c b/tests/mpf/t-get_ui.c

index f5795134d7d400fe3b12cc4ec6580ad23a418498..1a4428ab55f130dfe2c1c39408f69f0d016e0822 100644 (file)
--- a/tests/mpf/t-get_ui.c
+++ b/tests/mpf/t-get_ui.c
@@ -2,20 +2,20 @@
  
  Copyright 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-gsprec.c b/tests/mpf/t-gsprec.c

index d0d7abadbb0bf235542af06ba66a5aa0e650cc12..cb5df4fc7af98c031f33fcd51a1a4cce0b9f9b3b 100644 (file)
--- a/tests/mpf/t-gsprec.c
+++ b/tests/mpf/t-gsprec.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-inp_str.c b/tests/mpf/t-inp_str.c

index 8a50816643a32e8fbf12e6cfce822e61c4663ec6..72d9b1553dd89e4b2781c357e75d161cb104dd47 100644 (file)
--- a/tests/mpf/t-inp_str.c
+++ b/tests/mpf/t-inp_str.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
@@ -52,6 +52,7 @@ check_data (void)
  
      { "125",    10, "125",  3 },
      { "125e1",  10, "1250", 5 },
+    { "12e+2",  10, "1200", 5 },
      { "125e-1", 10, "12.5", 6 },
  
      {  "ff", 16,  "255", 2 },
diff --git a/tests/mpf/t-int_p.c b/tests/mpf/t-int_p.c

index 11bc90f840166fa34a3cd545e479c3f4340356f6..c9b6ee0b9257f5e9cdc0381774a4ce43ebea4e1a 100644 (file)
--- a/tests/mpf/t-int_p.c
+++ b/tests/mpf/t-int_p.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-mul_ui.c b/tests/mpf/t-mul_ui.c

index a4fd75cf79e79378a2e0b36c779146443dcf41e1..41cf5e7d14d69505cebd01d1f45b76d866016c1c 100644 (file)
--- a/tests/mpf/t-mul_ui.c
+++ b/tests/mpf/t-mul_ui.c
@@ -2,20 +2,20 @@
  
  Copyright 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -118,7 +118,7 @@ void
  check_various (void)
  {
    mpf_t  u, got, want;
-  char   *s;
+  const char   *s;
  
    mpf_init2 (u,    2*8*sizeof(long));
    mpf_init2 (got,  2*8*sizeof(long));
diff --git a/tests/mpf/t-muldiv.c b/tests/mpf/t-muldiv.c

index 3ce1292c43a45b4a91166b43d6ecb9c1856fb951..10762c92234930e5932f681d2905f8c26d286f5d 100644 (file)
--- a/tests/mpf/t-muldiv.c
+++ b/tests/mpf/t-muldiv.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2000, 2001, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-set.c b/tests/mpf/t-set.c

index 48336a890c86bdc4e5b13179476b55a08b6db77e..21013a4c40272f812770acda0b0351472bbaf18e 100644 (file)
--- a/tests/mpf/t-set.c
+++ b/tests/mpf/t-set.c
@@ -1,21 +1,21 @@
-/* Test mpf_set.
+/* Test mpf_set, mpf_init_set.
  
-Copyright 2004 Free Software Foundation, Inc.
+Copyright 2004, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -40,12 +40,73 @@ check_reuse (void)
    mpf_clear (f);
  }
  
+void
+check_random (long reps)
+{
+  unsigned long test;
+  gmp_randstate_ptr rands;
+  mpf_t a, b;
+  mpz_t z;
+  int precbits;
+
+#define PRECBITS 10
+
+  rands = RANDS;
+
+  mpz_init (z);
+  mpf_init2 (a, 1 << PRECBITS);
+
+  for (test = 0; test < reps; test++)
+    {
+      mpz_urandomb (z, rands, PRECBITS + 1);
+      precbits = mpz_get_ui (z) + 1;
+      mpz_urandomb (z, rands, precbits);
+      mpz_setbit (z, precbits  - 1);   /* make sure msb is set */
+      mpf_set_z (a, z);
+      if (precbits & 1)
+       mpf_neg (a, a);
+      mpz_urandomb (z, rands, PRECBITS);
+      mpf_div_2exp (a, a, mpz_get_ui (z) + 1);
+      mpz_urandomb (z, rands, PRECBITS);
+      precbits -= mpz_get_ui (z);
+      if (precbits <= 0)
+       precbits = 1 - precbits;
+      mpf_set_default_prec (precbits);
+
+      mpf_init_set (b, a);
+      MPF_CHECK_FORMAT (b);
+      if (!mpf_eq (a, b, precbits))
+       {
+         printf ("mpf_init_set wrong.\n");
+         abort();
+       }
+
+      mpf_set_ui (b, 0);
+      mpf_set (b, a);
+      MPF_CHECK_FORMAT (b);
+      if (!mpf_eq (a, b, precbits))
+       {
+         printf ("mpf_set wrong.\n");
+         abort();
+       }
+
+      mpf_clear (b);
+    }
+
+  mpf_clear (a);
+  mpz_clear (z);
+}
+
  int
-main (void)
+main (int argc, char *argv[])
  {
+  long reps = 10000;
+
    tests_start ();
+  TESTS_REPS (reps, argv, argc);
  
    check_reuse ();
+  check_random (reps);
  
    tests_end ();
    exit (0);
diff --git a/tests/mpf/t-set_q.c b/tests/mpf/t-set_q.c

index 9dfa04fa5be38dc6f15568400621b9e94cbe1721..56eb75e072b30b0fcb88db53b2fe514eb3abe466 100644 (file)
--- a/tests/mpf/t-set_q.c
+++ b/tests/mpf/t-set_q.c
@@ -2,20 +2,20 @@
  
  Copyright 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-set_si.c b/tests/mpf/t-set_si.c

index b9519ee9134548196f0234dab5d3116b0737f3a7..e89d6c5a49843494d1d3f2395df9bb99c5b095c3 100644 (file)
--- a/tests/mpf/t-set_si.c
+++ b/tests/mpf/t-set_si.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-set_ui.c b/tests/mpf/t-set_ui.c

index fef529b7fd41b3ddc5a8a859b7ed714706e8c133..c931228f8596afd6460b57d4ec700468d8383a82 100644 (file)
--- a/tests/mpf/t-set_ui.c
+++ b/tests/mpf/t-set_ui.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -41,7 +41,7 @@ check_data (void)
  #else
      { ULONG_MAX,     2, { ULONG_MAX & GMP_NUMB_MASK,
                            ULONG_MAX >> GMP_NUMB_BITS } },
-    { LONG_HIGHBIT,  2, { 0,
+    { ULONG_HIGHBIT, 2, { 0,
                            ULONG_HIGHBIT >> GMP_NUMB_BITS } },
  #endif
    };
diff --git a/tests/mpf/t-sqrt.c b/tests/mpf/t-sqrt.c

index 2df7bb22d9b2e02f4fe3fb06af370ce247913131..fb1b85bb69b37da94aeb5d4060fb43381d176b33 100644 (file)
--- a/tests/mpf/t-sqrt.c
+++ b/tests/mpf/t-sqrt.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2001, 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-sqrt_ui.c b/tests/mpf/t-sqrt_ui.c

index 7ae572bb7aa81eddf7a2ce3750a3b93dd407e8a3..0e19a44a400d5ffa083c1f57201be4ef76c6c937 100644 (file)
--- a/tests/mpf/t-sqrt_ui.c
+++ b/tests/mpf/t-sqrt_ui.c
@@ -2,20 +2,20 @@
  
  Copyright 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-sub.c b/tests/mpf/t-sub.c

index 92d4f05fc65fc11ad648774b83e2629c57bacd71..8cd1e2c1ba6284c51e48ea35c1b92e67027dfa8e 100644 (file)
--- a/tests/mpf/t-sub.c
+++ b/tests/mpf/t-sub.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2001, 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-trunc.c b/tests/mpf/t-trunc.c

index 30e3703b0ea56c4fdbe959b6f8d05f79dc12b492..0a23b63ab71ae05e3f6655d2cd8d3e3784afaabc 100644 (file)
--- a/tests/mpf/t-trunc.c
+++ b/tests/mpf/t-trunc.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpf/t-ui_div.c b/tests/mpf/t-ui_div.c

index 542ecb109919b10363a472e4c58173a34ede653f..e23f91d8f197061c814156aab7b8b07cedd6fe17 100644 (file)
--- a/tests/mpf/t-ui_div.c
+++ b/tests/mpf/t-ui_div.c
@@ -2,20 +2,20 @@
  
  Copyright 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpn/Makefile.am b/tests/mpn/Makefile.am

index e3a43aec89e0b11d7ff59e14f043c89d1bbd6356..d0541e33e50325dd72d4222ed092112607fe8ec4 100644 (file)
--- a/tests/mpn/Makefile.am
+++ b/tests/mpn/Makefile.am
@@ -1,21 +1,22 @@
  ## Process this file with automake to generate Makefile.in
  
-# Copyright 2001, 2002, 2003, 2009, 2010, 2012 Free Software Foundation, Inc.
+# Copyright 2001, 2002, 2003, 2009, 2010, 2011, 2012 Free Software Foundation,
+# Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
@@ -24,11 +25,13 @@ LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
  check_PROGRAMS = t-asmtype t-aors_1 t-divrem_1 t-mod_1 t-fat t-get_d   \
    t-instrument t-iord_u t-mp_bases t-perfsqr t-scan logic              \
    t-toom22 t-toom32 t-toom33 t-toom42 t-toom43 t-toom44                        \
-  t-toom52 t-toom53 t-toom62 t-toom63 t-toom6h t-toom8h                        \
-  t-mul t-mullo t-mulmod_bnm1 t-sqrmod_bnm1                            \
-  t-hgcd t-matrix22 t-invert t-div t-bdiv
+  t-toom52 t-toom53 t-toom54 t-toom62 t-toom63 t-toom6h t-toom8h       \
+  t-toom2-sqr t-toom3-sqr t-toom4-sqr t-toom6-sqr t-toom8-sqr          \
+  t-mul t-mullo t-mulmod_bnm1 t-sqrmod_bnm1 t-mulmid                   \
+  t-hgcd t-hgcd_appr t-matrix22 t-invert t-div t-bdiv                  \
+  t-broot t-brootinv
  
-EXTRA_DIST = toom-shared.h
+EXTRA_DIST = toom-shared.h toom-sqr-shared.h
  
  TESTS = $(check_PROGRAMS)
  
diff --git a/tests/mpn/Makefile.in b/tests/mpn/Makefile.in

index dde45b441214877074f54fc0fa2a95d2e809232d..71ee9b1d1b259f4feae12f98ad38d2c8c49c20aa 100644 (file)
--- a/tests/mpn/Makefile.in
+++ b/tests/mpn/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -15,23 +15,41 @@
  
  @SET_MAKE@
  
-# Copyright 2001, 2002, 2003, 2009, 2010, 2012 Free Software Foundation, Inc.
+# Copyright 2001, 2002, 2003, 2009, 2010, 2011, 2012 Free Software Foundation,
+# Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -50,7 +68,6 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  check_PROGRAMS = t-asmtype$(EXEEXT) t-aors_1$(EXEEXT) \
         t-divrem_1$(EXEEXT) t-mod_1$(EXEEXT) t-fat$(EXEEXT) \
         t-get_d$(EXEEXT) t-instrument$(EXEEXT) t-iord_u$(EXEEXT) \
@@ -58,16 +75,19 @@ check_PROGRAMS = t-asmtype$(EXEEXT) t-aors_1$(EXEEXT) \
         logic$(EXEEXT) t-toom22$(EXEEXT) t-toom32$(EXEEXT) \
         t-toom33$(EXEEXT) t-toom42$(EXEEXT) t-toom43$(EXEEXT) \
         t-toom44$(EXEEXT) t-toom52$(EXEEXT) t-toom53$(EXEEXT) \
-       t-toom62$(EXEEXT) t-toom63$(EXEEXT) t-toom6h$(EXEEXT) \
-       t-toom8h$(EXEEXT) t-mul$(EXEEXT) t-mullo$(EXEEXT) \
-       t-mulmod_bnm1$(EXEEXT) t-sqrmod_bnm1$(EXEEXT) t-hgcd$(EXEEXT) \
+       t-toom54$(EXEEXT) t-toom62$(EXEEXT) t-toom63$(EXEEXT) \
+       t-toom6h$(EXEEXT) t-toom8h$(EXEEXT) t-toom2-sqr$(EXEEXT) \
+       t-toom3-sqr$(EXEEXT) t-toom4-sqr$(EXEEXT) t-toom6-sqr$(EXEEXT) \
+       t-toom8-sqr$(EXEEXT) t-mul$(EXEEXT) t-mullo$(EXEEXT) \
+       t-mulmod_bnm1$(EXEEXT) t-sqrmod_bnm1$(EXEEXT) \
+       t-mulmid$(EXEEXT) t-hgcd$(EXEEXT) t-hgcd_appr$(EXEEXT) \
         t-matrix22$(EXEEXT) t-invert$(EXEEXT) t-div$(EXEEXT) \
-       t-bdiv$(EXEEXT)
+       t-bdiv$(EXEEXT) t-broot$(EXEEXT) t-brootinv$(EXEEXT)
  subdir = tests/mpn
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -75,167 +95,217 @@ CONFIG_HEADER = $(top_builddir)/config.h
  CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  logic_SOURCES = logic.c
-logic_OBJECTS = logic$U.$(OBJEXT)
+logic_OBJECTS = logic.$(OBJEXT)
  logic_LDADD = $(LDADD)
  logic_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_aors_1_SOURCES = t-aors_1.c
-t_aors_1_OBJECTS = t-aors_1$U.$(OBJEXT)
+t_aors_1_OBJECTS = t-aors_1.$(OBJEXT)
  t_aors_1_LDADD = $(LDADD)
  t_aors_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_asmtype_SOURCES = t-asmtype.c
-t_asmtype_OBJECTS = t-asmtype$U.$(OBJEXT)
+t_asmtype_OBJECTS = t-asmtype.$(OBJEXT)
  t_asmtype_LDADD = $(LDADD)
  t_asmtype_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_bdiv_SOURCES = t-bdiv.c
-t_bdiv_OBJECTS = t-bdiv$U.$(OBJEXT)
+t_bdiv_OBJECTS = t-bdiv.$(OBJEXT)
  t_bdiv_LDADD = $(LDADD)
  t_bdiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_broot_SOURCES = t-broot.c
+t_broot_OBJECTS = t-broot.$(OBJEXT)
+t_broot_LDADD = $(LDADD)
+t_broot_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_brootinv_SOURCES = t-brootinv.c
+t_brootinv_OBJECTS = t-brootinv.$(OBJEXT)
+t_brootinv_LDADD = $(LDADD)
+t_brootinv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_div_SOURCES = t-div.c
-t_div_OBJECTS = t-div$U.$(OBJEXT)
+t_div_OBJECTS = t-div.$(OBJEXT)
  t_div_LDADD = $(LDADD)
  t_div_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_divrem_1_SOURCES = t-divrem_1.c
-t_divrem_1_OBJECTS = t-divrem_1$U.$(OBJEXT)
+t_divrem_1_OBJECTS = t-divrem_1.$(OBJEXT)
  t_divrem_1_LDADD = $(LDADD)
  t_divrem_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_fat_SOURCES = t-fat.c
-t_fat_OBJECTS = t-fat$U.$(OBJEXT)
+t_fat_OBJECTS = t-fat.$(OBJEXT)
  t_fat_LDADD = $(LDADD)
  t_fat_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_d_SOURCES = t-get_d.c
-t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_OBJECTS = t-get_d.$(OBJEXT)
  t_get_d_LDADD = $(LDADD)
  t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_hgcd_SOURCES = t-hgcd.c
-t_hgcd_OBJECTS = t-hgcd$U.$(OBJEXT)
+t_hgcd_OBJECTS = t-hgcd.$(OBJEXT)
  t_hgcd_LDADD = $(LDADD)
  t_hgcd_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_hgcd_appr_SOURCES = t-hgcd_appr.c
+t_hgcd_appr_OBJECTS = t-hgcd_appr.$(OBJEXT)
+t_hgcd_appr_LDADD = $(LDADD)
+t_hgcd_appr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_instrument_SOURCES = t-instrument.c
-t_instrument_OBJECTS = t-instrument$U.$(OBJEXT)
+t_instrument_OBJECTS = t-instrument.$(OBJEXT)
  t_instrument_LDADD = $(LDADD)
  t_instrument_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_invert_SOURCES = t-invert.c
-t_invert_OBJECTS = t-invert$U.$(OBJEXT)
+t_invert_OBJECTS = t-invert.$(OBJEXT)
  t_invert_LDADD = $(LDADD)
  t_invert_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_iord_u_SOURCES = t-iord_u.c
-t_iord_u_OBJECTS = t-iord_u$U.$(OBJEXT)
+t_iord_u_OBJECTS = t-iord_u.$(OBJEXT)
  t_iord_u_LDADD = $(LDADD)
  t_iord_u_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_matrix22_SOURCES = t-matrix22.c
-t_matrix22_OBJECTS = t-matrix22$U.$(OBJEXT)
+t_matrix22_OBJECTS = t-matrix22.$(OBJEXT)
  t_matrix22_LDADD = $(LDADD)
  t_matrix22_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_mod_1_SOURCES = t-mod_1.c
-t_mod_1_OBJECTS = t-mod_1$U.$(OBJEXT)
+t_mod_1_OBJECTS = t-mod_1.$(OBJEXT)
  t_mod_1_LDADD = $(LDADD)
  t_mod_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_mp_bases_SOURCES = t-mp_bases.c
-t_mp_bases_OBJECTS = t-mp_bases$U.$(OBJEXT)
+t_mp_bases_OBJECTS = t-mp_bases.$(OBJEXT)
  t_mp_bases_LDADD = $(LDADD)
  t_mp_bases_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_mul_SOURCES = t-mul.c
-t_mul_OBJECTS = t-mul$U.$(OBJEXT)
+t_mul_OBJECTS = t-mul.$(OBJEXT)
  t_mul_LDADD = $(LDADD)
  t_mul_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_mullo_SOURCES = t-mullo.c
-t_mullo_OBJECTS = t-mullo$U.$(OBJEXT)
+t_mullo_OBJECTS = t-mullo.$(OBJEXT)
  t_mullo_LDADD = $(LDADD)
  t_mullo_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_mulmid_SOURCES = t-mulmid.c
+t_mulmid_OBJECTS = t-mulmid.$(OBJEXT)
+t_mulmid_LDADD = $(LDADD)
+t_mulmid_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_mulmod_bnm1_SOURCES = t-mulmod_bnm1.c
-t_mulmod_bnm1_OBJECTS = t-mulmod_bnm1$U.$(OBJEXT)
+t_mulmod_bnm1_OBJECTS = t-mulmod_bnm1.$(OBJEXT)
  t_mulmod_bnm1_LDADD = $(LDADD)
  t_mulmod_bnm1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_perfsqr_SOURCES = t-perfsqr.c
-t_perfsqr_OBJECTS = t-perfsqr$U.$(OBJEXT)
+t_perfsqr_OBJECTS = t-perfsqr.$(OBJEXT)
  t_perfsqr_LDADD = $(LDADD)
  t_perfsqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_scan_SOURCES = t-scan.c
-t_scan_OBJECTS = t-scan$U.$(OBJEXT)
+t_scan_OBJECTS = t-scan.$(OBJEXT)
  t_scan_LDADD = $(LDADD)
  t_scan_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_sqrmod_bnm1_SOURCES = t-sqrmod_bnm1.c
-t_sqrmod_bnm1_OBJECTS = t-sqrmod_bnm1$U.$(OBJEXT)
+t_sqrmod_bnm1_OBJECTS = t-sqrmod_bnm1.$(OBJEXT)
  t_sqrmod_bnm1_LDADD = $(LDADD)
  t_sqrmod_bnm1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_toom2_sqr_SOURCES = t-toom2-sqr.c
+t_toom2_sqr_OBJECTS = t-toom2-sqr.$(OBJEXT)
+t_toom2_sqr_LDADD = $(LDADD)
+t_toom2_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_toom22_SOURCES = t-toom22.c
-t_toom22_OBJECTS = t-toom22$U.$(OBJEXT)
+t_toom22_OBJECTS = t-toom22.$(OBJEXT)
  t_toom22_LDADD = $(LDADD)
  t_toom22_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_toom3_sqr_SOURCES = t-toom3-sqr.c
+t_toom3_sqr_OBJECTS = t-toom3-sqr.$(OBJEXT)
+t_toom3_sqr_LDADD = $(LDADD)
+t_toom3_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_toom32_SOURCES = t-toom32.c
-t_toom32_OBJECTS = t-toom32$U.$(OBJEXT)
+t_toom32_OBJECTS = t-toom32.$(OBJEXT)
  t_toom32_LDADD = $(LDADD)
  t_toom32_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_toom33_SOURCES = t-toom33.c
-t_toom33_OBJECTS = t-toom33$U.$(OBJEXT)
+t_toom33_OBJECTS = t-toom33.$(OBJEXT)
  t_toom33_LDADD = $(LDADD)
  t_toom33_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_toom4_sqr_SOURCES = t-toom4-sqr.c
+t_toom4_sqr_OBJECTS = t-toom4-sqr.$(OBJEXT)
+t_toom4_sqr_LDADD = $(LDADD)
+t_toom4_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_toom42_SOURCES = t-toom42.c
-t_toom42_OBJECTS = t-toom42$U.$(OBJEXT)
+t_toom42_OBJECTS = t-toom42.$(OBJEXT)
  t_toom42_LDADD = $(LDADD)
  t_toom42_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_toom43_SOURCES = t-toom43.c
-t_toom43_OBJECTS = t-toom43$U.$(OBJEXT)
+t_toom43_OBJECTS = t-toom43.$(OBJEXT)
  t_toom43_LDADD = $(LDADD)
  t_toom43_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_toom44_SOURCES = t-toom44.c
-t_toom44_OBJECTS = t-toom44$U.$(OBJEXT)
+t_toom44_OBJECTS = t-toom44.$(OBJEXT)
  t_toom44_LDADD = $(LDADD)
  t_toom44_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_toom52_SOURCES = t-toom52.c
-t_toom52_OBJECTS = t-toom52$U.$(OBJEXT)
+t_toom52_OBJECTS = t-toom52.$(OBJEXT)
  t_toom52_LDADD = $(LDADD)
  t_toom52_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_toom53_SOURCES = t-toom53.c
-t_toom53_OBJECTS = t-toom53$U.$(OBJEXT)
+t_toom53_OBJECTS = t-toom53.$(OBJEXT)
  t_toom53_LDADD = $(LDADD)
  t_toom53_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_toom54_SOURCES = t-toom54.c
+t_toom54_OBJECTS = t-toom54.$(OBJEXT)
+t_toom54_LDADD = $(LDADD)
+t_toom54_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_toom6_sqr_SOURCES = t-toom6-sqr.c
+t_toom6_sqr_OBJECTS = t-toom6-sqr.$(OBJEXT)
+t_toom6_sqr_LDADD = $(LDADD)
+t_toom6_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_toom62_SOURCES = t-toom62.c
-t_toom62_OBJECTS = t-toom62$U.$(OBJEXT)
+t_toom62_OBJECTS = t-toom62.$(OBJEXT)
  t_toom62_LDADD = $(LDADD)
  t_toom62_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_toom63_SOURCES = t-toom63.c
-t_toom63_OBJECTS = t-toom63$U.$(OBJEXT)
+t_toom63_OBJECTS = t-toom63.$(OBJEXT)
  t_toom63_LDADD = $(LDADD)
  t_toom63_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_toom6h_SOURCES = t-toom6h.c
-t_toom6h_OBJECTS = t-toom6h$U.$(OBJEXT)
+t_toom6h_OBJECTS = t-toom6h.$(OBJEXT)
  t_toom6h_LDADD = $(LDADD)
  t_toom6h_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_toom8_sqr_SOURCES = t-toom8-sqr.c
+t_toom8_sqr_OBJECTS = t-toom8-sqr.$(OBJEXT)
+t_toom8_sqr_LDADD = $(LDADD)
+t_toom8_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_toom8h_SOURCES = t-toom8h.c
-t_toom8h_OBJECTS = t-toom8h$U.$(OBJEXT)
+t_toom8h_OBJECTS = t-toom8h.$(OBJEXT)
  t_toom8h_LDADD = $(LDADD)
  t_toom8h_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
@@ -251,20 +321,29 @@ CCLD = $(CC)
  LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
         $(LDFLAGS) -o $@
-SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-div.c t-divrem_1.c \
-       t-fat.c t-get_d.c t-hgcd.c t-instrument.c t-invert.c \
-       t-iord_u.c t-matrix22.c t-mod_1.c t-mp_bases.c t-mul.c \
-       t-mullo.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c t-sqrmod_bnm1.c \
-       t-toom22.c t-toom32.c t-toom33.c t-toom42.c t-toom43.c \
-       t-toom44.c t-toom52.c t-toom53.c t-toom62.c t-toom63.c \
-       t-toom6h.c t-toom8h.c
-DIST_SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-div.c \
-       t-divrem_1.c t-fat.c t-get_d.c t-hgcd.c t-instrument.c \
-       t-invert.c t-iord_u.c t-matrix22.c t-mod_1.c t-mp_bases.c \
-       t-mul.c t-mullo.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c \
-       t-sqrmod_bnm1.c t-toom22.c t-toom32.c t-toom33.c t-toom42.c \
-       t-toom43.c t-toom44.c t-toom52.c t-toom53.c t-toom62.c \
-       t-toom63.c t-toom6h.c t-toom8h.c
+SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-broot.c \
+       t-brootinv.c t-div.c t-divrem_1.c t-fat.c t-get_d.c t-hgcd.c \
+       t-hgcd_appr.c t-instrument.c t-invert.c t-iord_u.c \
+       t-matrix22.c t-mod_1.c t-mp_bases.c t-mul.c t-mullo.c \
+       t-mulmid.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c \
+       t-sqrmod_bnm1.c t-toom2-sqr.c t-toom22.c t-toom3-sqr.c \
+       t-toom32.c t-toom33.c t-toom4-sqr.c t-toom42.c t-toom43.c \
+       t-toom44.c t-toom52.c t-toom53.c t-toom54.c t-toom6-sqr.c \
+       t-toom62.c t-toom63.c t-toom6h.c t-toom8-sqr.c t-toom8h.c
+DIST_SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-broot.c \
+       t-brootinv.c t-div.c t-divrem_1.c t-fat.c t-get_d.c t-hgcd.c \
+       t-hgcd_appr.c t-instrument.c t-invert.c t-iord_u.c \
+       t-matrix22.c t-mod_1.c t-mp_bases.c t-mul.c t-mullo.c \
+       t-mulmid.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c \
+       t-sqrmod_bnm1.c t-toom2-sqr.c t-toom22.c t-toom3-sqr.c \
+       t-toom32.c t-toom33.c t-toom4-sqr.c t-toom42.c t-toom43.c \
+       t-toom44.c t-toom52.c t-toom53.c t-toom54.c t-toom6-sqr.c \
+       t-toom62.c t-toom63.c t-toom6h.c t-toom8-sqr.c t-toom8h.c
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  am__tty_colors = \
@@ -368,8 +447,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -416,7 +495,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -432,7 +510,7 @@ top_builddir = @top_builddir@
  top_srcdir = @top_srcdir@
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
  LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
-EXTRA_DIST = toom-shared.h
+EXTRA_DIST = toom-shared.h toom-sqr-shared.h
  TESTS = $(check_PROGRAMS)
  all: all-am
  
@@ -477,103 +555,133 @@ clean-checkPROGRAMS:
         list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
         echo " rm -f" $$list; \
         rm -f $$list
-logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES) 
+logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES) $(EXTRA_logic_DEPENDENCIES) 
         @rm -f logic$(EXEEXT)
         $(LINK) $(logic_OBJECTS) $(logic_LDADD) $(LIBS)
-t-aors_1$(EXEEXT): $(t_aors_1_OBJECTS) $(t_aors_1_DEPENDENCIES) 
+t-aors_1$(EXEEXT): $(t_aors_1_OBJECTS) $(t_aors_1_DEPENDENCIES) $(EXTRA_t_aors_1_DEPENDENCIES) 
         @rm -f t-aors_1$(EXEEXT)
         $(LINK) $(t_aors_1_OBJECTS) $(t_aors_1_LDADD) $(LIBS)
-t-asmtype$(EXEEXT): $(t_asmtype_OBJECTS) $(t_asmtype_DEPENDENCIES) 
+t-asmtype$(EXEEXT): $(t_asmtype_OBJECTS) $(t_asmtype_DEPENDENCIES) $(EXTRA_t_asmtype_DEPENDENCIES) 
         @rm -f t-asmtype$(EXEEXT)
         $(LINK) $(t_asmtype_OBJECTS) $(t_asmtype_LDADD) $(LIBS)
-t-bdiv$(EXEEXT): $(t_bdiv_OBJECTS) $(t_bdiv_DEPENDENCIES) 
+t-bdiv$(EXEEXT): $(t_bdiv_OBJECTS) $(t_bdiv_DEPENDENCIES) $(EXTRA_t_bdiv_DEPENDENCIES) 
         @rm -f t-bdiv$(EXEEXT)
         $(LINK) $(t_bdiv_OBJECTS) $(t_bdiv_LDADD) $(LIBS)
-t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES) 
+t-broot$(EXEEXT): $(t_broot_OBJECTS) $(t_broot_DEPENDENCIES) $(EXTRA_t_broot_DEPENDENCIES) 
+       @rm -f t-broot$(EXEEXT)
+       $(LINK) $(t_broot_OBJECTS) $(t_broot_LDADD) $(LIBS)
+t-brootinv$(EXEEXT): $(t_brootinv_OBJECTS) $(t_brootinv_DEPENDENCIES) $(EXTRA_t_brootinv_DEPENDENCIES) 
+       @rm -f t-brootinv$(EXEEXT)
+       $(LINK) $(t_brootinv_OBJECTS) $(t_brootinv_LDADD) $(LIBS)
+t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES) $(EXTRA_t_div_DEPENDENCIES) 
         @rm -f t-div$(EXEEXT)
         $(LINK) $(t_div_OBJECTS) $(t_div_LDADD) $(LIBS)
-t-divrem_1$(EXEEXT): $(t_divrem_1_OBJECTS) $(t_divrem_1_DEPENDENCIES) 
+t-divrem_1$(EXEEXT): $(t_divrem_1_OBJECTS) $(t_divrem_1_DEPENDENCIES) $(EXTRA_t_divrem_1_DEPENDENCIES) 
         @rm -f t-divrem_1$(EXEEXT)
         $(LINK) $(t_divrem_1_OBJECTS) $(t_divrem_1_LDADD) $(LIBS)
-t-fat$(EXEEXT): $(t_fat_OBJECTS) $(t_fat_DEPENDENCIES) 
+t-fat$(EXEEXT): $(t_fat_OBJECTS) $(t_fat_DEPENDENCIES) $(EXTRA_t_fat_DEPENDENCIES) 
         @rm -f t-fat$(EXEEXT)
         $(LINK) $(t_fat_OBJECTS) $(t_fat_LDADD) $(LIBS)
-t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) 
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) $(EXTRA_t_get_d_DEPENDENCIES) 
         @rm -f t-get_d$(EXEEXT)
         $(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
-t-hgcd$(EXEEXT): $(t_hgcd_OBJECTS) $(t_hgcd_DEPENDENCIES) 
+t-hgcd$(EXEEXT): $(t_hgcd_OBJECTS) $(t_hgcd_DEPENDENCIES) $(EXTRA_t_hgcd_DEPENDENCIES) 
         @rm -f t-hgcd$(EXEEXT)
         $(LINK) $(t_hgcd_OBJECTS) $(t_hgcd_LDADD) $(LIBS)
-t-instrument$(EXEEXT): $(t_instrument_OBJECTS) $(t_instrument_DEPENDENCIES) 
+t-hgcd_appr$(EXEEXT): $(t_hgcd_appr_OBJECTS) $(t_hgcd_appr_DEPENDENCIES) $(EXTRA_t_hgcd_appr_DEPENDENCIES) 
+       @rm -f t-hgcd_appr$(EXEEXT)
+       $(LINK) $(t_hgcd_appr_OBJECTS) $(t_hgcd_appr_LDADD) $(LIBS)
+t-instrument$(EXEEXT): $(t_instrument_OBJECTS) $(t_instrument_DEPENDENCIES) $(EXTRA_t_instrument_DEPENDENCIES) 
         @rm -f t-instrument$(EXEEXT)
         $(LINK) $(t_instrument_OBJECTS) $(t_instrument_LDADD) $(LIBS)
-t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES) 
+t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES) $(EXTRA_t_invert_DEPENDENCIES) 
         @rm -f t-invert$(EXEEXT)
         $(LINK) $(t_invert_OBJECTS) $(t_invert_LDADD) $(LIBS)
-t-iord_u$(EXEEXT): $(t_iord_u_OBJECTS) $(t_iord_u_DEPENDENCIES) 
+t-iord_u$(EXEEXT): $(t_iord_u_OBJECTS) $(t_iord_u_DEPENDENCIES) $(EXTRA_t_iord_u_DEPENDENCIES) 
         @rm -f t-iord_u$(EXEEXT)
         $(LINK) $(t_iord_u_OBJECTS) $(t_iord_u_LDADD) $(LIBS)
-t-matrix22$(EXEEXT): $(t_matrix22_OBJECTS) $(t_matrix22_DEPENDENCIES) 
+t-matrix22$(EXEEXT): $(t_matrix22_OBJECTS) $(t_matrix22_DEPENDENCIES) $(EXTRA_t_matrix22_DEPENDENCIES) 
         @rm -f t-matrix22$(EXEEXT)
         $(LINK) $(t_matrix22_OBJECTS) $(t_matrix22_LDADD) $(LIBS)
-t-mod_1$(EXEEXT): $(t_mod_1_OBJECTS) $(t_mod_1_DEPENDENCIES) 
+t-mod_1$(EXEEXT): $(t_mod_1_OBJECTS) $(t_mod_1_DEPENDENCIES) $(EXTRA_t_mod_1_DEPENDENCIES) 
         @rm -f t-mod_1$(EXEEXT)
         $(LINK) $(t_mod_1_OBJECTS) $(t_mod_1_LDADD) $(LIBS)
-t-mp_bases$(EXEEXT): $(t_mp_bases_OBJECTS) $(t_mp_bases_DEPENDENCIES) 
+t-mp_bases$(EXEEXT): $(t_mp_bases_OBJECTS) $(t_mp_bases_DEPENDENCIES) $(EXTRA_t_mp_bases_DEPENDENCIES) 
         @rm -f t-mp_bases$(EXEEXT)
         $(LINK) $(t_mp_bases_OBJECTS) $(t_mp_bases_LDADD) $(LIBS)
-t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES) 
+t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES) $(EXTRA_t_mul_DEPENDENCIES) 
         @rm -f t-mul$(EXEEXT)
         $(LINK) $(t_mul_OBJECTS) $(t_mul_LDADD) $(LIBS)
-t-mullo$(EXEEXT): $(t_mullo_OBJECTS) $(t_mullo_DEPENDENCIES) 
+t-mullo$(EXEEXT): $(t_mullo_OBJECTS) $(t_mullo_DEPENDENCIES) $(EXTRA_t_mullo_DEPENDENCIES) 
         @rm -f t-mullo$(EXEEXT)
         $(LINK) $(t_mullo_OBJECTS) $(t_mullo_LDADD) $(LIBS)
-t-mulmod_bnm1$(EXEEXT): $(t_mulmod_bnm1_OBJECTS) $(t_mulmod_bnm1_DEPENDENCIES) 
+t-mulmid$(EXEEXT): $(t_mulmid_OBJECTS) $(t_mulmid_DEPENDENCIES) $(EXTRA_t_mulmid_DEPENDENCIES) 
+       @rm -f t-mulmid$(EXEEXT)
+       $(LINK) $(t_mulmid_OBJECTS) $(t_mulmid_LDADD) $(LIBS)
+t-mulmod_bnm1$(EXEEXT): $(t_mulmod_bnm1_OBJECTS) $(t_mulmod_bnm1_DEPENDENCIES) $(EXTRA_t_mulmod_bnm1_DEPENDENCIES) 
         @rm -f t-mulmod_bnm1$(EXEEXT)
         $(LINK) $(t_mulmod_bnm1_OBJECTS) $(t_mulmod_bnm1_LDADD) $(LIBS)
-t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES) 
+t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES) $(EXTRA_t_perfsqr_DEPENDENCIES) 
         @rm -f t-perfsqr$(EXEEXT)
         $(LINK) $(t_perfsqr_OBJECTS) $(t_perfsqr_LDADD) $(LIBS)
-t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES) 
+t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES) $(EXTRA_t_scan_DEPENDENCIES) 
         @rm -f t-scan$(EXEEXT)
         $(LINK) $(t_scan_OBJECTS) $(t_scan_LDADD) $(LIBS)
-t-sqrmod_bnm1$(EXEEXT): $(t_sqrmod_bnm1_OBJECTS) $(t_sqrmod_bnm1_DEPENDENCIES) 
+t-sqrmod_bnm1$(EXEEXT): $(t_sqrmod_bnm1_OBJECTS) $(t_sqrmod_bnm1_DEPENDENCIES) $(EXTRA_t_sqrmod_bnm1_DEPENDENCIES) 
         @rm -f t-sqrmod_bnm1$(EXEEXT)
         $(LINK) $(t_sqrmod_bnm1_OBJECTS) $(t_sqrmod_bnm1_LDADD) $(LIBS)
-t-toom22$(EXEEXT): $(t_toom22_OBJECTS) $(t_toom22_DEPENDENCIES) 
+t-toom2-sqr$(EXEEXT): $(t_toom2_sqr_OBJECTS) $(t_toom2_sqr_DEPENDENCIES) $(EXTRA_t_toom2_sqr_DEPENDENCIES) 
+       @rm -f t-toom2-sqr$(EXEEXT)
+       $(LINK) $(t_toom2_sqr_OBJECTS) $(t_toom2_sqr_LDADD) $(LIBS)
+t-toom22$(EXEEXT): $(t_toom22_OBJECTS) $(t_toom22_DEPENDENCIES) $(EXTRA_t_toom22_DEPENDENCIES) 
         @rm -f t-toom22$(EXEEXT)
         $(LINK) $(t_toom22_OBJECTS) $(t_toom22_LDADD) $(LIBS)
-t-toom32$(EXEEXT): $(t_toom32_OBJECTS) $(t_toom32_DEPENDENCIES) 
+t-toom3-sqr$(EXEEXT): $(t_toom3_sqr_OBJECTS) $(t_toom3_sqr_DEPENDENCIES) $(EXTRA_t_toom3_sqr_DEPENDENCIES) 
+       @rm -f t-toom3-sqr$(EXEEXT)
+       $(LINK) $(t_toom3_sqr_OBJECTS) $(t_toom3_sqr_LDADD) $(LIBS)
+t-toom32$(EXEEXT): $(t_toom32_OBJECTS) $(t_toom32_DEPENDENCIES) $(EXTRA_t_toom32_DEPENDENCIES) 
         @rm -f t-toom32$(EXEEXT)
         $(LINK) $(t_toom32_OBJECTS) $(t_toom32_LDADD) $(LIBS)
-t-toom33$(EXEEXT): $(t_toom33_OBJECTS) $(t_toom33_DEPENDENCIES) 
+t-toom33$(EXEEXT): $(t_toom33_OBJECTS) $(t_toom33_DEPENDENCIES) $(EXTRA_t_toom33_DEPENDENCIES) 
         @rm -f t-toom33$(EXEEXT)
         $(LINK) $(t_toom33_OBJECTS) $(t_toom33_LDADD) $(LIBS)
-t-toom42$(EXEEXT): $(t_toom42_OBJECTS) $(t_toom42_DEPENDENCIES) 
+t-toom4-sqr$(EXEEXT): $(t_toom4_sqr_OBJECTS) $(t_toom4_sqr_DEPENDENCIES) $(EXTRA_t_toom4_sqr_DEPENDENCIES) 
+       @rm -f t-toom4-sqr$(EXEEXT)
+       $(LINK) $(t_toom4_sqr_OBJECTS) $(t_toom4_sqr_LDADD) $(LIBS)
+t-toom42$(EXEEXT): $(t_toom42_OBJECTS) $(t_toom42_DEPENDENCIES) $(EXTRA_t_toom42_DEPENDENCIES) 
         @rm -f t-toom42$(EXEEXT)
         $(LINK) $(t_toom42_OBJECTS) $(t_toom42_LDADD) $(LIBS)
-t-toom43$(EXEEXT): $(t_toom43_OBJECTS) $(t_toom43_DEPENDENCIES) 
+t-toom43$(EXEEXT): $(t_toom43_OBJECTS) $(t_toom43_DEPENDENCIES) $(EXTRA_t_toom43_DEPENDENCIES) 
         @rm -f t-toom43$(EXEEXT)
         $(LINK) $(t_toom43_OBJECTS) $(t_toom43_LDADD) $(LIBS)
-t-toom44$(EXEEXT): $(t_toom44_OBJECTS) $(t_toom44_DEPENDENCIES) 
+t-toom44$(EXEEXT): $(t_toom44_OBJECTS) $(t_toom44_DEPENDENCIES) $(EXTRA_t_toom44_DEPENDENCIES) 
         @rm -f t-toom44$(EXEEXT)
         $(LINK) $(t_toom44_OBJECTS) $(t_toom44_LDADD) $(LIBS)
-t-toom52$(EXEEXT): $(t_toom52_OBJECTS) $(t_toom52_DEPENDENCIES) 
+t-toom52$(EXEEXT): $(t_toom52_OBJECTS) $(t_toom52_DEPENDENCIES) $(EXTRA_t_toom52_DEPENDENCIES) 
         @rm -f t-toom52$(EXEEXT)
         $(LINK) $(t_toom52_OBJECTS) $(t_toom52_LDADD) $(LIBS)
-t-toom53$(EXEEXT): $(t_toom53_OBJECTS) $(t_toom53_DEPENDENCIES) 
+t-toom53$(EXEEXT): $(t_toom53_OBJECTS) $(t_toom53_DEPENDENCIES) $(EXTRA_t_toom53_DEPENDENCIES) 
         @rm -f t-toom53$(EXEEXT)
         $(LINK) $(t_toom53_OBJECTS) $(t_toom53_LDADD) $(LIBS)
-t-toom62$(EXEEXT): $(t_toom62_OBJECTS) $(t_toom62_DEPENDENCIES) 
+t-toom54$(EXEEXT): $(t_toom54_OBJECTS) $(t_toom54_DEPENDENCIES) $(EXTRA_t_toom54_DEPENDENCIES) 
+       @rm -f t-toom54$(EXEEXT)
+       $(LINK) $(t_toom54_OBJECTS) $(t_toom54_LDADD) $(LIBS)
+t-toom6-sqr$(EXEEXT): $(t_toom6_sqr_OBJECTS) $(t_toom6_sqr_DEPENDENCIES) $(EXTRA_t_toom6_sqr_DEPENDENCIES) 
+       @rm -f t-toom6-sqr$(EXEEXT)
+       $(LINK) $(t_toom6_sqr_OBJECTS) $(t_toom6_sqr_LDADD) $(LIBS)
+t-toom62$(EXEEXT): $(t_toom62_OBJECTS) $(t_toom62_DEPENDENCIES) $(EXTRA_t_toom62_DEPENDENCIES) 
         @rm -f t-toom62$(EXEEXT)
         $(LINK) $(t_toom62_OBJECTS) $(t_toom62_LDADD) $(LIBS)
-t-toom63$(EXEEXT): $(t_toom63_OBJECTS) $(t_toom63_DEPENDENCIES) 
+t-toom63$(EXEEXT): $(t_toom63_OBJECTS) $(t_toom63_DEPENDENCIES) $(EXTRA_t_toom63_DEPENDENCIES) 
         @rm -f t-toom63$(EXEEXT)
         $(LINK) $(t_toom63_OBJECTS) $(t_toom63_LDADD) $(LIBS)
-t-toom6h$(EXEEXT): $(t_toom6h_OBJECTS) $(t_toom6h_DEPENDENCIES) 
+t-toom6h$(EXEEXT): $(t_toom6h_OBJECTS) $(t_toom6h_DEPENDENCIES) $(EXTRA_t_toom6h_DEPENDENCIES) 
         @rm -f t-toom6h$(EXEEXT)
         $(LINK) $(t_toom6h_OBJECTS) $(t_toom6h_LDADD) $(LIBS)
-t-toom8h$(EXEEXT): $(t_toom8h_OBJECTS) $(t_toom8h_DEPENDENCIES) 
+t-toom8-sqr$(EXEEXT): $(t_toom8_sqr_OBJECTS) $(t_toom8_sqr_DEPENDENCIES) $(EXTRA_t_toom8_sqr_DEPENDENCIES) 
+       @rm -f t-toom8-sqr$(EXEEXT)
+       $(LINK) $(t_toom8_sqr_OBJECTS) $(t_toom8_sqr_LDADD) $(LIBS)
+t-toom8h$(EXEEXT): $(t_toom8h_OBJECTS) $(t_toom8h_DEPENDENCIES) $(EXTRA_t_toom8h_DEPENDENCIES) 
         @rm -f t-toom8h$(EXEEXT)
         $(LINK) $(t_toom8h_OBJECTS) $(t_toom8h_LDADD) $(LIBS)
  
@@ -582,11 +690,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -596,90 +699,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-logic_.c: logic.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/logic.c; then echo $(srcdir)/logic.c; else echo logic.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-aors_1_.c: t-aors_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-aors_1.c; then echo $(srcdir)/t-aors_1.c; else echo t-aors_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-asmtype_.c: t-asmtype.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-asmtype.c; then echo $(srcdir)/t-asmtype.c; else echo t-asmtype.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-bdiv_.c: t-bdiv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-bdiv.c; then echo $(srcdir)/t-bdiv.c; else echo t-bdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-div_.c: t-div.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-div.c; then echo $(srcdir)/t-div.c; else echo t-div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-divrem_1_.c: t-divrem_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-divrem_1.c; then echo $(srcdir)/t-divrem_1.c; else echo t-divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fat_.c: t-fat.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fat.c; then echo $(srcdir)/t-fat.c; else echo t-fat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_.c: t-get_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-hgcd_.c: t-hgcd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-hgcd.c; then echo $(srcdir)/t-hgcd.c; else echo t-hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-instrument_.c: t-instrument.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-instrument.c; then echo $(srcdir)/t-instrument.c; else echo t-instrument.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-invert_.c: t-invert.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-invert.c; then echo $(srcdir)/t-invert.c; else echo t-invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-iord_u_.c: t-iord_u.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-iord_u.c; then echo $(srcdir)/t-iord_u.c; else echo t-iord_u.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-matrix22_.c: t-matrix22.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-matrix22.c; then echo $(srcdir)/t-matrix22.c; else echo t-matrix22.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mod_1_.c: t-mod_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mod_1.c; then echo $(srcdir)/t-mod_1.c; else echo t-mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mp_bases_.c: t-mp_bases.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mp_bases.c; then echo $(srcdir)/t-mp_bases.c; else echo t-mp_bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mul_.c: t-mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul.c; then echo $(srcdir)/t-mul.c; else echo t-mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mullo_.c: t-mullo.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mullo.c; then echo $(srcdir)/t-mullo.c; else echo t-mullo.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mulmod_bnm1_.c: t-mulmod_bnm1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mulmod_bnm1.c; then echo $(srcdir)/t-mulmod_bnm1.c; else echo t-mulmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-perfsqr_.c: t-perfsqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-perfsqr.c; then echo $(srcdir)/t-perfsqr.c; else echo t-perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-scan_.c: t-scan.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-scan.c; then echo $(srcdir)/t-scan.c; else echo t-scan.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sqrmod_bnm1_.c: t-sqrmod_bnm1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrmod_bnm1.c; then echo $(srcdir)/t-sqrmod_bnm1.c; else echo t-sqrmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom22_.c: t-toom22.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom22.c; then echo $(srcdir)/t-toom22.c; else echo t-toom22.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom32_.c: t-toom32.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom32.c; then echo $(srcdir)/t-toom32.c; else echo t-toom32.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom33_.c: t-toom33.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom33.c; then echo $(srcdir)/t-toom33.c; else echo t-toom33.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom42_.c: t-toom42.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom42.c; then echo $(srcdir)/t-toom42.c; else echo t-toom42.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom43_.c: t-toom43.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom43.c; then echo $(srcdir)/t-toom43.c; else echo t-toom43.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom44_.c: t-toom44.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom44.c; then echo $(srcdir)/t-toom44.c; else echo t-toom44.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom52_.c: t-toom52.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom52.c; then echo $(srcdir)/t-toom52.c; else echo t-toom52.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom53_.c: t-toom53.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom53.c; then echo $(srcdir)/t-toom53.c; else echo t-toom53.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom62_.c: t-toom62.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom62.c; then echo $(srcdir)/t-toom62.c; else echo t-toom62.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom63_.c: t-toom63.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom63.c; then echo $(srcdir)/t-toom63.c; else echo t-toom63.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom6h_.c: t-toom6h.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom6h.c; then echo $(srcdir)/t-toom6h.c; else echo t-toom6h.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom8h_.c: t-toom8h.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom8h.c; then echo $(srcdir)/t-toom8h.c; else echo t-toom8h.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-logic_.$(OBJEXT) logic_.lo t-aors_1_.$(OBJEXT) t-aors_1_.lo \
-t-asmtype_.$(OBJEXT) t-asmtype_.lo t-bdiv_.$(OBJEXT) t-bdiv_.lo \
-t-div_.$(OBJEXT) t-div_.lo t-divrem_1_.$(OBJEXT) t-divrem_1_.lo \
-t-fat_.$(OBJEXT) t-fat_.lo t-get_d_.$(OBJEXT) t-get_d_.lo \
-t-hgcd_.$(OBJEXT) t-hgcd_.lo t-instrument_.$(OBJEXT) t-instrument_.lo \
-t-invert_.$(OBJEXT) t-invert_.lo t-iord_u_.$(OBJEXT) t-iord_u_.lo \
-t-matrix22_.$(OBJEXT) t-matrix22_.lo t-mod_1_.$(OBJEXT) t-mod_1_.lo \
-t-mp_bases_.$(OBJEXT) t-mp_bases_.lo t-mul_.$(OBJEXT) t-mul_.lo \
-t-mullo_.$(OBJEXT) t-mullo_.lo t-mulmod_bnm1_.$(OBJEXT) \
-t-mulmod_bnm1_.lo t-perfsqr_.$(OBJEXT) t-perfsqr_.lo t-scan_.$(OBJEXT) \
-t-scan_.lo t-sqrmod_bnm1_.$(OBJEXT) t-sqrmod_bnm1_.lo \
-t-toom22_.$(OBJEXT) t-toom22_.lo t-toom32_.$(OBJEXT) t-toom32_.lo \
-t-toom33_.$(OBJEXT) t-toom33_.lo t-toom42_.$(OBJEXT) t-toom42_.lo \
-t-toom43_.$(OBJEXT) t-toom43_.lo t-toom44_.$(OBJEXT) t-toom44_.lo \
-t-toom52_.$(OBJEXT) t-toom52_.lo t-toom53_.$(OBJEXT) t-toom53_.lo \
-t-toom62_.$(OBJEXT) t-toom62_.lo t-toom63_.$(OBJEXT) t-toom63_.lo \
-t-toom6h_.$(OBJEXT) t-toom6h_.lo t-toom8h_.$(OBJEXT) t-toom8h_.lo : \
-$(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -820,14 +839,15 @@ check-TESTS: $(TESTS)
           fi; \
           dashes=`echo "$$dashes" | sed s/./=/g`; \
           if test "$$failed" -eq 0; then \
-           echo "$$grn$$dashes"; \
+           col="$$grn"; \
           else \
-           echo "$$red$$dashes"; \
+           col="$$red"; \
           fi; \
-         echo "$$banner"; \
-         test -z "$$skipped" || echo "$$skipped"; \
-         test -z "$$report" || echo "$$report"; \
-         echo "$$dashes$$std"; \
+         echo "$${col}$$dashes$${std}"; \
+         echo "$${col}$$banner$${std}"; \
+         test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+         test -z "$$report" || echo "$${col}$$report$${std}"; \
+         echo "$${col}$$dashes$${std}"; \
           test "$$failed" -eq 0; \
         else :; fi
  
@@ -877,10 +897,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -948,7 +973,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -961,7 +986,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
         clean-checkPROGRAMS clean-generic clean-libtool ctags \
@@ -974,8 +999,8 @@ uninstall-am:
         install-ps install-ps-am install-strip installcheck \
         installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  $(top_builddir)/tests/libtests.la:
diff --git a/tests/mpn/logic.c b/tests/mpn/logic.c

index f6bfe7ff6ce39c773cdebaeab13d20a3344a3d8b..3077a75d88d0b6785c342a2abbd34264887ce62a 100644 (file)
--- a/tests/mpn/logic.c
+++ b/tests/mpn/logic.c
@@ -1,37 +1,51 @@
  /* Test mpn_and, mpn_ior, mpn_xor, mpn_andn, mpn_iorn, mpn_xnor, mpn_nand, and
     mpn_nior.
  
-Copyright 2011, 2012 Free Software Foundation, Inc.
+Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #include <stdlib.h>
  #include <stdio.h>
  
+/* Fake native prevalence of the tested operations, so that we actually test
+   the compiled functions, i.e., the ones which users will reach.  The inlined
+   variants will be tested through tests/mpz/logic.c.  */
+#define HAVE_NATIVE_mpn_com    1
+#define HAVE_NATIVE_mpn_and_n  1
+#define HAVE_NATIVE_mpn_andn_n 1
+#define HAVE_NATIVE_mpn_nand_n 1
+#define HAVE_NATIVE_mpn_ior_n  1
+#define HAVE_NATIVE_mpn_iorn_n 1
+#define HAVE_NATIVE_mpn_nior_n 1
+#define HAVE_NATIVE_mpn_xor_n  1
+#define HAVE_NATIVE_mpn_xnor_n 1
+
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "tests.h"
  
+
  void
  check_one (mp_srcptr refp, mp_srcptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, char *funcname)
  {
    if (mpn_cmp (refp, rp, n))
      {
-      printf ("ERROR in mpn_%s_n\n", funcname);
+      printf ("ERROR in mpn_%s\n", funcname);
        printf ("a: "); mpn_dump (ap, n);
        printf ("b: "); mpn_dump (bp, n);
        printf ("r:   "); mpn_dump (rp, n);
@@ -71,35 +85,39 @@ main (int argc, char **argv)
  
           refmpn_and_n (refp, ap, bp, n);
           mpn_and_n (rp, ap, bp, n);
-         check_one (refp, rp, ap, bp, n, "and");
+         check_one (refp, rp, ap, bp, n, "and_n");
  
           refmpn_ior_n (refp, ap, bp, n);
           mpn_ior_n (rp, ap, bp, n);
-         check_one (refp, rp, ap, bp, n, "ior");
+         check_one (refp, rp, ap, bp, n, "ior_n");
  
           refmpn_xor_n (refp, ap, bp, n);
           mpn_xor_n (rp, ap, bp, n);
-         check_one (refp, rp, ap, bp, n, "xor");
+         check_one (refp, rp, ap, bp, n, "xor_n");
  
           refmpn_andn_n (refp, ap, bp, n);
           mpn_andn_n (rp, ap, bp, n);
-         check_one (refp, rp, ap, bp, n, "andn");
+         check_one (refp, rp, ap, bp, n, "andn_n");
  
           refmpn_iorn_n (refp, ap, bp, n);
           mpn_iorn_n (rp, ap, bp, n);
-         check_one (refp, rp, ap, bp, n, "iorn");
+         check_one (refp, rp, ap, bp, n, "iorn_n");
  
           refmpn_nand_n (refp, ap, bp, n);
           mpn_nand_n (rp, ap, bp, n);
-         check_one (refp, rp, ap, bp, n, "nand");
+         check_one (refp, rp, ap, bp, n, "nand_n");
  
           refmpn_nior_n (refp, ap, bp, n);
           mpn_nior_n (rp, ap, bp, n);
-         check_one (refp, rp, ap, bp, n, "nior");
+         check_one (refp, rp, ap, bp, n, "nior_n");
  
           refmpn_xnor_n (refp, ap, bp, n);
           mpn_xnor_n (rp, ap, bp, n);
-         check_one (refp, rp, ap, bp, n, "xnor");
+         check_one (refp, rp, ap, bp, n, "xnor_n");
+
+         refmpn_com (refp, ap, n);
+         mpn_com (rp, ap, n);
+         check_one (refp, rp, ap, bp, n, "com");
         }
      }
  
diff --git a/tests/mpn/t-aors_1.c b/tests/mpn/t-aors_1.c

index a1878bb845a85c99129cc3eb13378c0b9c419617..422f9c40e166bc754b906e99b578a902f76d0c89 100644 (file)
--- a/tests/mpn/t-aors_1.c
+++ b/tests/mpn/t-aors_1.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -48,9 +48,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
              got, data[i].want, data[i].size);   \
    } while (0)
  
-typedef mp_limb_t (*mpn_aors_1_t)
-     __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
-mpn_aors_1_t fudge __GMP_PROTO ((mpn_aors_1_t));
+typedef mp_limb_t (*mpn_aors_1_t) (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mpn_aors_1_t fudge (mpn_aors_1_t);
  
  
  void
diff --git a/tests/mpn/t-asmtype.c b/tests/mpn/t-asmtype.c

index 4ee5a7ab2f46ab5720104eab324445982ba39bed..56444fb6c6ac843a5a4946d6cb54eb268b4c9198 100644 (file)
--- a/tests/mpn/t-asmtype.c
+++ b/tests/mpn/t-asmtype.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpn/t-bdiv.c b/tests/mpn/t-bdiv.c

index 4d56bfba743c131870cbf09c6a6b4b93f079d8c0..0feca7e3d604562362af6304abbba37b2e7b6f19 100644 (file)
--- a/tests/mpn/t-bdiv.c
+++ b/tests/mpn/t-bdiv.c
@@ -1,16 +1,19 @@
  /* Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
  
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; either version 3 of the License, or (at your option) any later
-version.
+This file is part of the GNU MP Library test suite.
  
-This program is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
  You should have received a copy of the GNU General Public License along with
-this program.  If not, see http://www.gnu.org/licenses/.  */
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #include <stdlib.h>            /* for strtol */
@@ -55,7 +58,7 @@ static unsigned long test;
  
  void
  check_one (mp_ptr qp, mp_srcptr rp, mp_limb_t rh,
-          mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, char *fname)
+          mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, const char *fname)
  {
    mp_size_t qn;
    int cmp;
@@ -296,6 +299,27 @@ main (int argc, char **argv)
           check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q");
         }
  
+      if (nn > dn)
+       {
+         /* Test mpn_bdiv_qr */
+         itch = mpn_bdiv_qr_itch (nn, dn);
+         if (itch + 1 > alloc)
+           {
+             scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+             alloc = itch + 1;
+           }
+         scratch[itch] = ran;
+         MPN_ZERO (qp, nn - dn);
+         MPN_ZERO (rp, dn);
+         rp[dn] = rran1;
+         rh = mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
+         ASSERT_ALWAYS (ran == scratch[itch]);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+         ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
+
+         check_one (qp, rp, rh, np, nn, dp, dn, "mpn_bdiv_qr");
+       }
+
        if (nn - dn < 2 || dn < 2)
         continue;
  
diff --git a/tests/mpn/t-broot.c b/tests/mpn/t-broot.c

new file mode 100644 (file)

index 0000000..beb9973
--- /dev/null
+++ b/tests/mpn/t-broot.c
@@ -0,0 +1,105 @@
+/* Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>            /* for strtol */
+#include <stdio.h>             /* for printf */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_LIMBS 150
+#define COUNT 500
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+
+  mp_ptr ap, rp, pp, scratch;
+  int count = COUNT;
+  unsigned i;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  rp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  pp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  scratch = TMP_ALLOC_LIMBS (3*MAX_LIMBS); /* For mpn_powlo */
+
+  for (i = 0; i < count; i++)
+    {
+      mp_size_t n;
+      mp_limb_t k;
+      int c;
+
+      n = 1 + gmp_urandomm_ui (rands, MAX_LIMBS);
+
+      if (i & 1)
+       mpn_random2 (ap, n);
+      else
+       mpn_random (ap, n);
+
+      ap[0] |= 1;
+
+      if (i < 100)
+       k = 3 + 2*i;
+      else
+       {
+         mpn_random (&k, 1);
+         if (k < 3)
+           k = 3;
+         else
+           k |= 1;
+       }
+      mpn_broot (rp, ap, n, k);
+      mpn_powlo (pp, rp, &k, 1, n, scratch);
+
+      MPN_CMP (c, ap, pp, n);
+      if (c != 0)
+       {
+         gmp_fprintf (stderr,
+                      "mpn_broot returned bad result: %u limbs\n",
+                      (unsigned) n);
+         gmp_fprintf (stderr, "k   = %Mx\n", k);
+         gmp_fprintf (stderr, "a   = %Nx\n", ap, n);
+         gmp_fprintf (stderr, "r   = %Nx\n", rp, n);
+         gmp_fprintf (stderr, "r^n = %Nx\n", pp, n);
+         abort ();
+       }
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-brootinv.c b/tests/mpn/t-brootinv.c

new file mode 100644 (file)

index 0000000..78b2515
--- /dev/null
+++ b/tests/mpn/t-brootinv.c
@@ -0,0 +1,107 @@
+/* Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>            /* for strtol */
+#include <stdio.h>             /* for printf */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_LIMBS 150
+#define COUNT 500
+
+int
+main (int argc, char **argv)
+{
+  gmp_randstate_ptr rands;
+
+  mp_ptr ap, rp, pp, app, scratch;
+  int count = COUNT;
+  unsigned i;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  rp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  pp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  app = TMP_ALLOC_LIMBS (MAX_LIMBS);
+  scratch = TMP_ALLOC_LIMBS (5*MAX_LIMBS);
+
+  for (i = 0; i < count; i++)
+    {
+      mp_size_t n;
+      mp_limb_t k;
+      int c;
+
+      n = 1 + gmp_urandomm_ui (rands, MAX_LIMBS);
+
+      if (i & 1)
+       mpn_random2 (ap, n);
+      else
+       mpn_random (ap, n);
+
+      ap[0] |= 1;
+
+      if (i < 100)
+       k = 3 + 2*i;
+      else
+       {
+         mpn_random (&k, 1);
+         if (k < 3)
+           k = 3;
+         else
+           k |= 1;
+       }
+      mpn_brootinv (rp, ap, n, k, scratch);
+      mpn_powlo (pp, rp, &k, 1, n, scratch);
+      mpn_mullo_n (app, ap, pp, n);
+
+      if (app[0] != 1 || !mpn_zero_p (app+1, n-1))
+       {
+         gmp_fprintf (stderr,
+                      "mpn_brootinv returned bad result: %u limbs\n",
+                      (unsigned) n);
+         gmp_fprintf (stderr, "k     = %Mx\n", k);
+         gmp_fprintf (stderr, "a     = %Nx\n", ap, n);
+         gmp_fprintf (stderr, "r     = %Nx\n", rp, n);
+         gmp_fprintf (stderr, "r^n   = %Nx\n", pp, n);
+         gmp_fprintf (stderr, "a r^n = %Nx\n", app, n);
+         abort ();
+       }
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-div.c b/tests/mpn/t-div.c

index 5ef0ec6598fb566b2c70301792cc60d0618432a7..a497244560d2fe9a54942b6189b59b4a25551d18 100644 (file)
--- a/tests/mpn/t-div.c
+++ b/tests/mpn/t-div.c
@@ -1,16 +1,19 @@
-/* Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+/* Copyright 2006, 2007, 2009, 2010, 2013 Free Software Foundation, Inc.
  
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; either version 3 of the License, or (at your option) any later
-version.
+This file is part of the GNU MP Library test suite.
  
-This program is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
  You should have received a copy of the GNU General Public License along with
-this program.  If not, see http://www.gnu.org/licenses/.  */
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #include <stdlib.h>            /* for strtol */
@@ -51,12 +54,12 @@ dumpy (mp_srcptr p, mp_size_t n)
    puts ("");
  }
  
-static unsigned long test;
+static signed long test;
  
  static void
  check_one (mp_ptr qp, mp_srcptr rp,
            mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn,
-          char *fname, mp_limb_t q_allowed_err)
+          const char *fname, mp_limb_t q_allowed_err)
  {
    mp_size_t qn = nn - dn + 1;
    mp_ptr tp;
@@ -81,7 +84,7 @@ check_one (mp_ptr qp, mp_srcptr rp,
        tvalue = "Q*D";
      error:
        printf ("\r*******************************************************************************\n");
-      printf ("%s failed test %lu: %s\n", fname, test, msg);
+      printf ("%s failed test %ld: %s\n", fname, test, msg);
        printf ("N=    "); dumpy (np, nn);
        printf ("D=    "); dumpy (dp, dn);
        printf ("Q=    "); dumpy (qp, qn);
@@ -139,12 +142,12 @@ main (int argc, char **argv)
  {
    gmp_randstate_ptr rands;
    unsigned long maxnbits, maxdbits, nbits, dbits;
-  mpz_t n, d, q, r, tz;
+  mpz_t n, d, q, r, tz, junk;
    mp_size_t maxnn, maxdn, nn, dn, clearn, i;
-  mp_ptr np, dp, qp, rp;
+  mp_ptr np, dup, dnp, qp, rp, junkp;
    mp_limb_t t;
    gmp_pi1_t dinv;
-  int count = COUNT;
+  long count = COUNT;
    mp_ptr scratch;
    mp_limb_t ran;
    mp_size_t alloc, itch;
@@ -162,7 +165,6 @@ main (int argc, char **argv)
         }
      }
  
-
    maxdbits = MAX_DN;
    maxnbits = MAX_NN;
  
@@ -174,6 +176,7 @@ main (int argc, char **argv)
    mpz_init (q);
    mpz_init (r);
    mpz_init (tz);
+  mpz_init (junk);
  
    maxnn = maxnbits / GMP_NUMB_BITS + 1;
    maxdn = maxdbits / GMP_NUMB_BITS + 1;
@@ -182,21 +185,19 @@ main (int argc, char **argv)
  
    qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
    rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+  dnp = TMP_ALLOC_LIMBS (maxdn);
  
    alloc = 1;
    scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
  
-  for (test = 0; test < count;)
+  for (test = -300; test < count; test++)
      {
-      do
-       {
-         nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
-         if (maxdbits > nbits)
-           dbits = random_word (rands) % nbits + 1;
-         else
-           dbits = random_word (rands) % maxdbits + 1;
-       }
-      while (nbits < dbits);
+      nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
+
+      if (test < 0)
+       dbits = (test + 300) % (nbits - 1) + 1;
+      else
+       dbits = random_word (rands) % (nbits - 1) % maxdbits + 1;
  
  #if RAND_UNIFORM
  #define RANDFUNC mpz_urandomb
@@ -208,8 +209,9 @@ main (int argc, char **argv)
         RANDFUNC (d, rands, dbits);
        while (mpz_sgn (d) == 0);
        dn = SIZ (d);
-      dp = PTR (d);
-      dp[dn - 1] |= GMP_NUMB_HIGHBIT;
+      dup = PTR (d);
+      MPN_COPY (dnp, dup, dn);
+      dnp[dn - 1] |= GMP_NUMB_HIGHBIT;
  
        if (test % 2 == 0)
         {
@@ -233,13 +235,19 @@ main (int argc, char **argv)
        ASSERT_ALWAYS (nn <= maxnn);
        ASSERT_ALWAYS (dn <= maxdn);
  
+      mpz_urandomb (junk, rands, nbits);
+      junkp = PTR (junk);
+
        np = PTR (n);
  
        mpz_urandomb (tz, rands, 32);
        t = mpz_get_ui (tz);
  
        if (t % 17 == 0)
-       dp[dn - 1] = GMP_NUMB_MAX;
+       {
+         dnp[dn - 1] = GMP_NUMB_MAX;
+         dup[dn - 1] = GMP_NUMB_MAX;
+       }
  
        switch ((int) t % 16)
         {
@@ -249,16 +257,15 @@ main (int argc, char **argv)
             np[i] = 0;
           break;
         case 1:
-         mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands));
+         mpn_sub_1 (np + nn - dn, dnp, dn, random_word (rands));
           break;
         case 2:
-         mpn_add_1 (np + nn - dn, dp, dn, random_word (rands));
+         mpn_add_1 (np + nn - dn, dnp, dn, random_word (rands));
           break;
         }
  
-      test++;
-
-      invert_pi1 (dinv, dp[dn - 1], dp[dn - 2]);
+      if (dn >= 2)
+       invert_pi1 (dinv, dnp[dn - 1], dnp[dn - 2]);
  
        rran0 = random_word (rands);
        rran1 = random_word (rands);
@@ -278,9 +285,9 @@ main (int argc, char **argv)
             {
               MPN_COPY (rp, np, nn);
               if (nn > dn)
-               MPN_ZERO (qp, nn - dn);
-             qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dp, dn, dinv.inv32);
-             check_one (qp, rp, np, nn, dp, dn, "mpn_sbpi1_div_qr", 0);
+               MPN_COPY (qp, junkp, nn - dn);
+             qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dnp, dn, dinv.inv32);
+             check_one (qp, rp, np, nn, dnp, dn, "mpn_sbpi1_div_qr", 0);
             }
  
           /* Test mpn_sbpi1_divappr_q */
@@ -288,9 +295,9 @@ main (int argc, char **argv)
             {
               MPN_COPY (rp, np, nn);
               if (nn > dn)
-               MPN_ZERO (qp, nn - dn);
-             qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dp, dn, dinv.inv32);
-             check_one (qp, NULL, np, nn, dp, dn, "mpn_sbpi1_divappr_q", 1);
+               MPN_COPY (qp, junkp, nn - dn);
+             qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dnp, dn, dinv.inv32);
+             check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_divappr_q", 1);
             }
  
           /* Test mpn_sbpi1_div_q */
@@ -298,10 +305,40 @@ main (int argc, char **argv)
             {
               MPN_COPY (rp, np, nn);
               if (nn > dn)
-               MPN_ZERO (qp, nn - dn);
-             qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dp, dn, dinv.inv32);
-             check_one (qp, NULL, np, nn, dp, dn, "mpn_sbpi1_div_q", 0);
+               MPN_COPY (qp, junkp, nn - dn);
+             qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dnp, dn, dinv.inv32);
+             check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_div_q", 0);
             }
+
+         /* Test mpn_sb_div_qr_sec */
+         itch = 3 * nn + 4;
+         if (itch + 1 > alloc)
+           {
+             scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+             alloc = itch + 1;
+           }
+         scratch[itch] = ran;
+         MPN_COPY (rp, np, nn);
+         if (nn >= dn)
+           MPN_COPY (qp, junkp, nn - dn + 1);
+         mpn_sb_div_qr_sec (qp, rp, nn, dup, dn, scratch);
+         ASSERT_ALWAYS (ran == scratch[itch]);
+         check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_qr_sec", 0);
+
+         /* Test mpn_sb_div_r_sec */
+         itch = nn + 2 * dn + 2;
+         if (itch + 1 > alloc)
+           {
+             scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+             alloc = itch + 1;
+           }
+         scratch[itch] = ran;
+         MPN_COPY (rp, np, nn);
+         mpn_sb_div_r_sec (rp, nn, dup, dn, scratch);
+         ASSERT_ALWAYS (ran == scratch[itch]);
+         /* Note: Since check_one cannot cope with random-only functions, we
+            pass qp[] from the previous function, mpn_sb_div_qr_sec.  */
+         check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_r_sec", 0);
         }
  
        /* Test mpn_dcpi1_div_qr */
@@ -309,11 +346,11 @@ main (int argc, char **argv)
         {
           MPN_COPY (rp, np, nn);
           if (nn > dn)
-           MPN_ZERO (qp, nn - dn);
-         qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dp, dn, &dinv);
+           MPN_COPY (qp, junkp, nn - dn);
+         qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dnp, dn, &dinv);
           ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
           ASSERT_ALWAYS (rp[-1] == rran0);
-         check_one (qp, rp, np, nn, dp, dn, "mpn_dcpi1_div_qr", 0);
+         check_one (qp, rp, np, nn, dnp, dn, "mpn_dcpi1_div_qr", 0);
         }
  
        /* Test mpn_dcpi1_divappr_q */
@@ -321,11 +358,11 @@ main (int argc, char **argv)
         {
           MPN_COPY (rp, np, nn);
           if (nn > dn)
-           MPN_ZERO (qp, nn - dn);
-         qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dp, dn, &dinv);
+           MPN_COPY (qp, junkp, nn - dn);
+         qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dnp, dn, &dinv);
           ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
           ASSERT_ALWAYS (rp[-1] == rran0);
-         check_one (qp, NULL, np, nn, dp, dn, "mpn_dcpi1_divappr_q", 1);
+         check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_divappr_q", 1);
         }
  
        /* Test mpn_dcpi1_div_q */
@@ -333,11 +370,11 @@ main (int argc, char **argv)
         {
           MPN_COPY (rp, np, nn);
           if (nn > dn)
-           MPN_ZERO (qp, nn - dn);
-         qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dp, dn, &dinv);
+           MPN_COPY (qp, junkp, nn - dn);
+         qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dnp, dn, &dinv);
           ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
           ASSERT_ALWAYS (rp[-1] == rran0);
-         check_one (qp, NULL, np, nn, dp, dn, "mpn_dcpi1_div_q", 0);
+         check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_div_q", 0);
         }
  
       /* Test mpn_mu_div_qr */
@@ -350,14 +387,14 @@ main (int argc, char **argv)
               alloc = itch + 1;
             }
           scratch[itch] = ran;
-         MPN_ZERO (qp, nn - dn);
+         MPN_COPY (qp, junkp, nn - dn);
           MPN_ZERO (rp, dn);
           rp[dn] = rran1;
-         qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
+         qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dnp, dn, scratch);
           ASSERT_ALWAYS (ran == scratch[itch]);
           ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
           ASSERT_ALWAYS (rp[-1] == rran0);  ASSERT_ALWAYS (rp[dn] == rran1);
-         check_one (qp, rp, np, nn, dp, dn, "mpn_mu_div_qr", 0);
+         check_one (qp, rp, np, nn, dnp, dn, "mpn_mu_div_qr", 0);
         }
  
        /* Test mpn_mu_divappr_q */
@@ -370,11 +407,11 @@ main (int argc, char **argv)
               alloc = itch + 1;
             }
           scratch[itch] = ran;
-         MPN_ZERO (qp, nn - dn);
-         qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dp, dn, scratch);
+         MPN_COPY (qp, junkp, nn - dn);
+         qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dnp, dn, scratch);
           ASSERT_ALWAYS (ran == scratch[itch]);
           ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
-         check_one (qp, NULL, np, nn, dp, dn, "mpn_mu_divappr_q", 4);
+         check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_divappr_q", 4);
         }
  
        /* Test mpn_mu_div_q */
@@ -387,14 +424,13 @@ main (int argc, char **argv)
               alloc = itch + 1;
             }
           scratch[itch] = ran;
-         MPN_ZERO (qp, nn - dn);
-         qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
+         MPN_COPY (qp, junkp, nn - dn);
+         qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dnp, dn, scratch);
           ASSERT_ALWAYS (ran == scratch[itch]);
           ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
-         check_one (qp, NULL, np, nn, dp, dn, "mpn_mu_div_q", 0);
+         check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_div_q", 0);
         }
  
-
        if (1)
         {
           itch = nn + 1;
@@ -404,28 +440,37 @@ main (int argc, char **argv)
               alloc = itch + 1;
             }
           scratch[itch] = ran;
-         mpn_div_q (qp, np, nn, dp, dn, scratch);
+         mpn_div_q (qp, np, nn, dup, dn, scratch);
           ASSERT_ALWAYS (ran == scratch[itch]);
           ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
-         check_one (qp, NULL, np, nn, dp, dn, "mpn_div_q", 0);
+         check_one (qp, NULL, np, nn, dup, dn, "mpn_div_q", 0);
         }
  
-      /* Finally, test mpn_div_q without msb set.  */
-      dp[dn - 1] &= ~GMP_NUMB_HIGHBIT;
-      if (dp[dn - 1] == 0)
-       continue;
-
-      itch = nn + 1;
-      if (itch + 1> alloc)
+      if (dn >= 2 && nn >= 2)
         {
-         scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
-         alloc = itch + 1;
+         mp_limb_t qh;
+
+         /* mpn_divrem_2 */
+         MPN_COPY (rp, np, nn);
+         qp[nn - 2] = qp[nn-1] = qran1;
+
+         qh = mpn_divrem_2 (qp, 0, rp, nn, dnp + dn - 2);
+         ASSERT_ALWAYS (qp[nn - 2] == qran1);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
+         qp[nn - 2] = qh;
+         check_one (qp, rp, np, nn, dnp + dn - 2, 2, "mpn_divrem_2", 0);
+
+         /* Missing: divrem_2 with fraction limbs. */
+
+         /* mpn_div_qr_2 */
+         qp[nn - 2] = qran1;
+
+         qh = mpn_div_qr_2 (qp, rp, np, nn, dup + dn - 2);
+         ASSERT_ALWAYS (qp[nn - 2] == qran1);
+         ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - 1] == qran1);
+         qp[nn - 2] = qh;
+         check_one (qp, rp, np, nn, dup + dn - 2, 2, "mpn_div_qr_2", 0);
         }
-      scratch[itch] = ran;
-      mpn_div_q (qp, np, nn, dp, dn, scratch);
-      ASSERT_ALWAYS (ran == scratch[itch]);
-      ASSERT_ALWAYS (qp[-1] == qran0);  ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
-      check_one (qp, NULL, np, nn, dp, dn, "mpn_div_q", 0);
      }
  
    __GMP_FREE_FUNC_LIMBS (scratch, alloc);
@@ -437,6 +482,7 @@ main (int argc, char **argv)
    mpz_clear (q);
    mpz_clear (r);
    mpz_clear (tz);
+  mpz_clear (junk);
  
    tests_end ();
    return 0;
diff --git a/tests/mpn/t-divrem_1.c b/tests/mpn/t-divrem_1.c

index 27eba9c0b0bfd80118f47d9979784ea333d0d3aa..745b177df5e41d6a407ca0ec0fccbcce91abb3f9 100644 (file)
--- a/tests/mpn/t-divrem_1.c
+++ b/tests/mpn/t-divrem_1.c
@@ -2,20 +2,20 @@
  
  Copyright 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -42,6 +42,11 @@ check_data (void)
      { { 5 }, 1, 2, 0,
        { 2 }, 1},
  
+    /* Exercises the q update in the nl == constant 0 case of
+       udiv_qrnnd_preinv3. Test case copied from t-fat.c. */
+    { { 287 }, 1, 7, 1,
+      { 0, 41 }, 0 },
+
  #if GMP_NUMB_BITS == 32
      { { 0x3C }, 1, 0xF2, 1,
        { 0x3F789854, 0 }, 0x98 },
diff --git a/tests/mpn/t-fat.c b/tests/mpn/t-fat.c

index 9e208245fef732dacec268711486ae14f8b73270..fd71e891e7c9e048923f340a85c40b6a57c296f0 100644 (file)
--- a/tests/mpn/t-fat.c
+++ b/tests/mpn/t-fat.c
@@ -1,21 +1,21 @@
  /* Test fat binary setups.
  
-Copyright 2003 Free Software Foundation, Inc.
+Copyright 2003, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -66,7 +66,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  /* dummies when not a fat binary */
  #if ! WANT_FAT_BINARY
  struct cpuvec_t {
-  int  initialized;
+  int  dummy;
  };
  struct cpuvec_t __gmpn_cpuvec;
  #define ITERATE_FAT_THRESHOLDS()  do { } while (0)
@@ -207,6 +207,15 @@ check_functions (void)
        ASSERT_ALWAYS (wp[1] == 0);
      }
  
+  memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+  for (i = 0; i < 2; i++)
+    {
+      xp[0] = 5;
+      yp[0] = 7;
+      mpn_mullo_basecase (wp, xp, yp, (mp_size_t) 1);
+      ASSERT_ALWAYS (wp[0] == 35);
+    }
+
  #if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0
    memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
    for (i = 0; i < 2; i++)
@@ -269,17 +278,18 @@ check_functions (void)
      }
  }
  
-/* Expect the first use of a each fat threshold to invoke the necessary
+/* Expect the first use of each fat threshold to invoke the necessary
     initialization.  */
  void
  check_thresholds (void)
  {
  #define ITERATE(name,field)                                             \
    do {                                                                  \
+    __gmpn_cpuvec_initialized = 0;                                     \
      memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));   \
      ASSERT_ALWAYS (name != 0);                                          \
      ASSERT_ALWAYS (name == __gmpn_cpuvec.field);                        \
-    ASSERT_ALWAYS (__gmpn_cpuvec.initialized);                          \
+    ASSERT_ALWAYS (__gmpn_cpuvec_initialized);                          \
    } while (0)
  
    ITERATE_FAT_THRESHOLDS ();
diff --git a/tests/mpn/t-get_d.c b/tests/mpn/t-get_d.c

index c3999e2a7c5eb09664f84cb645890e1a8d1cce48..a98472fad8bb7ce8c479f7e34c1c8705f5840632 100644 (file)
--- a/tests/mpn/t-get_d.c
+++ b/tests/mpn/t-get_d.c
@@ -2,20 +2,20 @@
  
  Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  /* Note that we don't use <limits.h> for LONG_MIN, but instead our own
     definition in gmp-impl.h.  In gcc 2.95.4 (debian 3.0) under
@@ -71,7 +71,7 @@ check_onebit (void)
    };
  
    /* FIXME: It'd be better to base this on the float format. */
-#ifdef __vax
+#if defined (__vax) || defined (__vax__)
    int     limit = 127;  /* vax fp numbers have limited range */
  #else
    int     limit = 511;
@@ -96,7 +96,7 @@ check_onebit (void)
            exp = exp_table[exp_i];
  
            want_bit = bit + exp;
-          if (want_bit > limit || want_bit < -limit)
+          if (want_bit >= limit || want_bit <= -limit)
              continue;
  
            want = 1.0;
@@ -495,7 +495,9 @@ main (void)
    check_ieee_denorm ();
    check_ieee_overflow ();
    check_0x81c25113 ();
+#if ! (defined (__vax) || defined (__vax__))
    check_rand ();
+#endif
  
    tests_end ();
    exit (0);
diff --git a/tests/mpn/t-hgcd.c b/tests/mpn/t-hgcd.c

index 60615cec24a7d22f7b6034dfd3ff85c732488eea..99357eb5611e1771e2b8735f44a1922a2f33e759 100644 (file)
--- a/tests/mpn/t-hgcd.c
+++ b/tests/mpn/t-hgcd.c
@@ -1,22 +1,22 @@
-/* Test mpz_gcd, mpz_gcdext, and mpz_gcd_ui.
+/* Test mpn_hgcd.
  
  Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004 Free
  Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-static mp_size_t one_test __GMP_PROTO ((mpz_t, mpz_t, int));
-static void debug_mp __GMP_PROTO ((mpz_t, int));
+static mp_size_t one_test (mpz_t, mpz_t, int);
+static void debug_mp (mpz_t, int);
  
  #define MIN_OPERAND_SIZE 2
  
@@ -50,10 +50,10 @@ struct hgcd_ref
    mpz_t m[2][2];
  };
  
-static void hgcd_ref_init __GMP_PROTO ((struct hgcd_ref *hgcd));
-static void hgcd_ref_clear __GMP_PROTO ((struct hgcd_ref *hgcd));
-static int hgcd_ref __GMP_PROTO ((struct hgcd_ref *hgcd, mpz_t a, mpz_t b));
-static int hgcd_ref_equal __GMP_PROTO ((const struct hgcd_matrix *hgcd, const struct hgcd_ref *ref));
+static void hgcd_ref_init (struct hgcd_ref *);
+static void hgcd_ref_clear (struct hgcd_ref *);
+static int hgcd_ref (struct hgcd_ref *, mpz_t, mpz_t);
+static int hgcd_ref_equal (const struct hgcd_matrix *, const struct hgcd_ref *);
  
  int
  main (int argc, char **argv)
@@ -97,9 +97,7 @@ main (int argc, char **argv)
      {
        /* Generate plain operands with unknown gcd.  These types of operands
          have proven to trigger certain bugs in development versions of the
-        gcd code.  The "hgcd->row[3].rsize > M" ASSERT is not triggered by
-        the division chain code below, but that is most likely just a result
-        of that other ASSERTs are triggered before it.  */
+        gcd code. */
  
        mpz_urandomb (bs, rands, 32);
        size_range = mpz_get_ui (bs) % 13 + 2;
diff --git a/tests/mpn/t-hgcd_appr.c b/tests/mpn/t-hgcd_appr.c

new file mode 100644 (file)

index 0000000..186dfab
--- /dev/null
+++ b/tests/mpn/t-hgcd_appr.c
@@ -0,0 +1,577 @@
+/* Test mpn_hgcd_appr.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2011 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+static mp_size_t one_test (mpz_t, mpz_t, int);
+static void debug_mp (mpz_t, int);
+
+#define MIN_OPERAND_SIZE 2
+
+struct hgcd_ref
+{
+  mpz_t m[2][2];
+};
+
+static void hgcd_ref_init (struct hgcd_ref *hgcd);
+static void hgcd_ref_clear (struct hgcd_ref *hgcd);
+static int hgcd_ref (struct hgcd_ref *hgcd, mpz_t a, mpz_t b);
+static int hgcd_ref_equal (const struct hgcd_ref *, const struct hgcd_ref *);
+static int hgcd_appr_valid_p (mpz_t, mpz_t, mp_size_t, struct hgcd_ref *,
+                             mpz_t, mpz_t, mp_size_t, struct hgcd_matrix *);
+
+static int verbose_flag = 0;
+
+int
+main (int argc, char **argv)
+{
+  mpz_t op1, op2, temp1, temp2;
+  int i, j, chain_len;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long size_range;
+
+  if (argc > 1)
+    {
+      if (strcmp (argv[1], "-v") == 0)
+       verbose_flag = 1;
+      else
+       {
+         fprintf (stderr, "Invalid argument.\n");
+         return 1;
+       }
+    }
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpz_init (op1);
+  mpz_init (op2);
+  mpz_init (temp1);
+  mpz_init (temp2);
+
+  for (i = 0; i < 15; i++)
+    {
+      /* Generate plain operands with unknown gcd.  These types of operands
+        have proven to trigger certain bugs in development versions of the
+        gcd code. */
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 13 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      mpz_urandomb (op1, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+      mpz_urandomb (bs, rands, size_range);
+      mpz_urandomb (op2, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+
+      if (mpz_cmp (op1, op2) < 0)
+       mpz_swap (op1, op2);
+
+      if (mpz_size (op1) > 0)
+       one_test (op1, op2, i);
+
+      /* Generate a division chain backwards, allowing otherwise
+        unlikely huge quotients.  */
+
+      mpz_set_ui (op1, 0);
+      mpz_urandomb (bs, rands, 32);
+      mpz_urandomb (bs, rands, mpz_get_ui (bs) % 16 + 1);
+      mpz_rrandomb (op2, rands, mpz_get_ui (bs));
+      mpz_add_ui (op2, op2, 1);
+
+#if 0
+      chain_len = 1000000;
+#else
+      mpz_urandomb (bs, rands, 32);
+      chain_len = mpz_get_ui (bs) % (GMP_NUMB_BITS * GCD_DC_THRESHOLD / 256);
+#endif
+
+      for (j = 0; j < chain_len; j++)
+       {
+         mpz_urandomb (bs, rands, 32);
+         mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+         mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+         mpz_add_ui (temp2, temp2, 1);
+         mpz_mul (temp1, op2, temp2);
+         mpz_add (op1, op1, temp1);
+
+         /* Don't generate overly huge operands.  */
+         if (SIZ (op1) > 3 * GCD_DC_THRESHOLD)
+           break;
+
+         mpz_urandomb (bs, rands, 32);
+         mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+         mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+         mpz_add_ui (temp2, temp2, 1);
+         mpz_mul (temp1, op1, temp2);
+         mpz_add (op2, op2, temp1);
+
+         /* Don't generate overly huge operands.  */
+         if (SIZ (op2) > 3 * GCD_DC_THRESHOLD)
+           break;
+       }
+      if (mpz_cmp (op1, op2) < 0)
+       mpz_swap (op1, op2);
+
+      if (mpz_size (op1) > 0)
+       one_test (op1, op2, i);
+    }
+
+  mpz_clear (bs);
+  mpz_clear (op1);
+  mpz_clear (op2);
+  mpz_clear (temp1);
+  mpz_clear (temp2);
+
+  tests_end ();
+  exit (0);
+}
+
+static void
+debug_mp (mpz_t x, int base)
+{
+  mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
+
+static int
+mpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize);
+
+static mp_size_t
+one_test (mpz_t a, mpz_t b, int i)
+{
+  struct hgcd_matrix hgcd;
+  struct hgcd_ref ref;
+
+  mpz_t ref_r0;
+  mpz_t ref_r1;
+  mpz_t hgcd_r0;
+  mpz_t hgcd_r1;
+
+  int res[2];
+  mp_size_t asize;
+  mp_size_t bsize;
+
+  mp_size_t hgcd_init_scratch;
+  mp_size_t hgcd_scratch;
+
+  mp_ptr hgcd_init_tp;
+  mp_ptr hgcd_tp;
+  mp_limb_t marker[4];
+
+  asize = a->_mp_size;
+  bsize = b->_mp_size;
+
+  ASSERT (asize >= bsize);
+
+  hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (asize);
+  hgcd_init_tp = refmpn_malloc_limbs (hgcd_init_scratch + 2) + 1;
+  mpn_hgcd_matrix_init (&hgcd, asize, hgcd_init_tp);
+
+  hgcd_scratch = mpn_hgcd_appr_itch (asize);
+  hgcd_tp = refmpn_malloc_limbs (hgcd_scratch + 2) + 1;
+
+  mpn_random (marker, 4);
+
+  hgcd_init_tp[-1] = marker[0];
+  hgcd_init_tp[hgcd_init_scratch] = marker[1];
+  hgcd_tp[-1] = marker[2];
+  hgcd_tp[hgcd_scratch] = marker[3];
+
+#if 0
+  fprintf (stderr,
+          "one_test: i = %d asize = %d, bsize = %d\n",
+          i, a->_mp_size, b->_mp_size);
+
+  gmp_fprintf (stderr,
+              "one_test: i = %d\n"
+              "  a = %Zx\n"
+              "  b = %Zx\n",
+              i, a, b);
+#endif
+  hgcd_ref_init (&ref);
+
+  mpz_init_set (ref_r0, a);
+  mpz_init_set (ref_r1, b);
+  res[0] = hgcd_ref (&ref, ref_r0, ref_r1);
+
+  mpz_init_set (hgcd_r0, a);
+  mpz_init_set (hgcd_r1, b);
+  if (bsize < asize)
+    {
+      _mpz_realloc (hgcd_r1, asize);
+      MPN_ZERO (hgcd_r1->_mp_d + bsize, asize - bsize);
+    }
+  res[1] = mpn_hgcd_appr (hgcd_r0->_mp_d,
+                         hgcd_r1->_mp_d,
+                         asize,
+                         &hgcd, hgcd_tp);
+
+  if (hgcd_init_tp[-1] != marker[0]
+      || hgcd_init_tp[hgcd_init_scratch] != marker[1]
+      || hgcd_tp[-1] != marker[2]
+      || hgcd_tp[hgcd_scratch] != marker[3])
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "scratch space overwritten!\n");
+
+      if (hgcd_init_tp[-1] != marker[0])
+       gmp_fprintf (stderr,
+                    "before init_tp: %Mx\n"
+                    "expected: %Mx\n",
+                    hgcd_init_tp[-1], marker[0]);
+      if (hgcd_init_tp[hgcd_init_scratch] != marker[1])
+       gmp_fprintf (stderr,
+                    "after init_tp: %Mx\n"
+                    "expected: %Mx\n",
+                    hgcd_init_tp[hgcd_init_scratch], marker[1]);
+      if (hgcd_tp[-1] != marker[2])
+       gmp_fprintf (stderr,
+                    "before tp: %Mx\n"
+                    "expected: %Mx\n",
+                    hgcd_tp[-1], marker[2]);
+      if (hgcd_tp[hgcd_scratch] != marker[3])
+       gmp_fprintf (stderr,
+                    "after tp: %Mx\n"
+                    "expected: %Mx\n",
+                    hgcd_tp[hgcd_scratch], marker[3]);
+
+      abort ();
+    }
+
+  if (!hgcd_appr_valid_p (a, b, res[0], &ref, ref_r0, ref_r1,
+                         res[1], &hgcd))
+    {
+      fprintf (stderr, "ERROR in test %d\n", i);
+      fprintf (stderr, "Invalid results for hgcd and hgcd_ref\n");
+      fprintf (stderr, "op1=");                 debug_mp (a, -16);
+      fprintf (stderr, "op2=");                 debug_mp (b, -16);
+      fprintf (stderr, "hgcd_ref: %ld\n", (long) res[0]);
+      fprintf (stderr, "mpn_hgcd_appr: %ld\n", (long) res[1]);
+      abort ();
+    }
+
+  refmpn_free_limbs (hgcd_init_tp - 1);
+  refmpn_free_limbs (hgcd_tp - 1);
+  hgcd_ref_clear (&ref);
+  mpz_clear (ref_r0);
+  mpz_clear (ref_r1);
+  mpz_clear (hgcd_r0);
+  mpz_clear (hgcd_r1);
+
+  return res[0];
+}
+
+static void
+hgcd_ref_init (struct hgcd_ref *hgcd)
+{
+  unsigned i;
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+      for (j = 0; j<2; j++)
+       mpz_init (hgcd->m[i][j]);
+    }
+}
+
+static void
+hgcd_ref_clear (struct hgcd_ref *hgcd)
+{
+  unsigned i;
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+      for (j = 0; j<2; j++)
+       mpz_clear (hgcd->m[i][j]);
+    }
+}
+
+static int
+sdiv_qr (mpz_t q, mpz_t r, mp_size_t s, const mpz_t a, const mpz_t b)
+{
+  mpz_fdiv_qr (q, r, a, b);
+  if (mpz_size (r) <= s)
+    {
+      mpz_add (r, r, b);
+      mpz_sub_ui (q, q, 1);
+    }
+
+  return (mpz_sgn (q) > 0);
+}
+
+static int
+hgcd_ref (struct hgcd_ref *hgcd, mpz_t a, mpz_t b)
+{
+  mp_size_t n = MAX (mpz_size (a), mpz_size (b));
+  mp_size_t s = n/2 + 1;
+  mpz_t q;
+  int res;
+
+  if (mpz_size (a) <= s || mpz_size (b) <= s)
+    return 0;
+
+  res = mpz_cmp (a, b);
+  if (res < 0)
+    {
+      mpz_sub (b, b, a);
+      if (mpz_size (b) <= s)
+       return 0;
+
+      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 0);
+      mpz_set_ui (hgcd->m[1][0], 1); mpz_set_ui (hgcd->m[1][1], 1);
+    }
+  else if (res > 0)
+    {
+      mpz_sub (a, a, b);
+      if (mpz_size (a) <= s)
+       return 0;
+
+      mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 1);
+      mpz_set_ui (hgcd->m[1][0], 0); mpz_set_ui (hgcd->m[1][1], 1);
+    }
+  else
+    return 0;
+
+  mpz_init (q);
+
+  for (;;)
+    {
+      ASSERT (mpz_size (a) > s);
+      ASSERT (mpz_size (b) > s);
+
+      if (mpz_cmp (a, b) > 0)
+       {
+         if (!sdiv_qr (q, a, s, a, b))
+           break;
+         mpz_addmul (hgcd->m[0][1], q, hgcd->m[0][0]);
+         mpz_addmul (hgcd->m[1][1], q, hgcd->m[1][0]);
+       }
+      else
+       {
+         if (!sdiv_qr (q, b, s, b, a))
+           break;
+         mpz_addmul (hgcd->m[0][0], q, hgcd->m[0][1]);
+         mpz_addmul (hgcd->m[1][0], q, hgcd->m[1][1]);
+       }
+    }
+
+  mpz_clear (q);
+
+  return 1;
+}
+
+static int
+mpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize)
+{
+  mp_srcptr ap = a->_mp_d;
+  mp_size_t asize = a->_mp_size;
+
+  MPN_NORMALIZE (bp, bsize);
+  return asize == bsize && mpn_cmp (ap, bp, asize) == 0;
+}
+
+static int
+hgcd_ref_equal (const struct hgcd_ref *A, const struct hgcd_ref *B)
+{
+  unsigned i;
+
+  for (i = 0; i<2; i++)
+    {
+      unsigned j;
+
+      for (j = 0; j<2; j++)
+       if (mpz_cmp (A->m[i][j], B->m[i][j]) != 0)
+         return 0;
+    }
+
+  return 1;
+}
+
+static int
+hgcd_appr_valid_p (mpz_t a, mpz_t b, mp_size_t res0,
+                  struct hgcd_ref *ref, mpz_t ref_r0, mpz_t ref_r1,
+                  mp_size_t res1, struct hgcd_matrix *hgcd)
+{
+  mp_size_t n = MAX (mpz_size (a), mpz_size (b));
+  mp_size_t s = n/2 + 1;
+
+  mp_bitcnt_t dbits, abits, margin;
+  mpz_t appr_r0, appr_r1, t, q;
+  struct hgcd_ref appr;
+
+  if (!res0)
+    {
+      if (!res1)
+       return 1;
+
+      fprintf (stderr, "mpn_hgcd_appr returned 1 when no reduction possible.\n");
+      return 0;
+    }
+
+  /* NOTE: No *_clear calls on error return, since we're going to
+     abort anyway. */
+  mpz_init (t);
+  mpz_init (q);
+  hgcd_ref_init (&appr);
+  mpz_init (appr_r0);
+  mpz_init (appr_r1);
+
+  if (mpz_size (ref_r0) <= s)
+    {
+      fprintf (stderr, "ref_r0 too small!!!: "); debug_mp (ref_r0, 16);
+      return 0;
+    }
+  if (mpz_size (ref_r1) <= s)
+    {
+      fprintf (stderr, "ref_r1 too small!!!: "); debug_mp (ref_r1, 16);
+      return 0;
+    }
+
+  mpz_sub (t, ref_r0, ref_r1);
+  dbits = mpz_sizeinbase (t, 2);
+  if (dbits > s*GMP_NUMB_BITS)
+    {
+      fprintf (stderr, "ref |r0 - r1| too large!!!: "); debug_mp (t, 16);
+      return 0;
+    }
+
+  if (!res1)
+    {
+      mpz_set (appr_r0, a);
+      mpz_set (appr_r1, b);
+    }
+  else
+    {
+      unsigned i;
+
+      for (i = 0; i<2; i++)
+       {
+         unsigned j;
+
+         for (j = 0; j<2; j++)
+           {
+             mp_size_t mn = hgcd->n;
+             MPN_NORMALIZE (hgcd->p[i][j], mn);
+             mpz_realloc (appr.m[i][j], mn);
+             MPN_COPY (PTR (appr.m[i][j]), hgcd->p[i][j], mn);
+             SIZ (appr.m[i][j]) = mn;
+           }
+       }
+      mpz_mul (appr_r0, appr.m[1][1], a);
+      mpz_mul (t, appr.m[0][1], b);
+      mpz_sub (appr_r0, appr_r0, t);
+      if (mpz_sgn (appr_r0) <= 0
+         || mpz_size (appr_r0) <= s)
+       {
+         fprintf (stderr, "appr_r0 too small: "); debug_mp (appr_r0, 16);
+         return 0;
+       }
+
+      mpz_mul (appr_r1, appr.m[1][0], a);
+      mpz_mul (t, appr.m[0][0], b);
+      mpz_sub (appr_r1, t, appr_r1);
+      if (mpz_sgn (appr_r1) <= 0
+         || mpz_size (appr_r1) <= s)
+       {
+         fprintf (stderr, "appr_r1 too small: "); debug_mp (appr_r1, 16);
+         return 0;
+       }
+    }
+
+  mpz_sub (t, appr_r0, appr_r1);
+  abits = mpz_sizeinbase (t, 2);
+  if (abits < dbits)
+    {
+      fprintf (stderr, "|r0 - r1| too small: "); debug_mp (t, 16);
+      return 0;
+    }
+
+  /* We lose one bit each time we discard the least significant limbs.
+     For the lehmer code, that can happen at most s * (GMP_NUMB_BITS)
+     / (GMP_NUMB_BITS - 1) times. For the dc code, we lose an entire
+     limb (or more?) for each level of recursion. */
+
+  margin = (n/2+1) * GMP_NUMB_BITS / (GMP_NUMB_BITS - 1);
+  {
+    mp_size_t rn;
+    for (rn = n; ABOVE_THRESHOLD (rn, HGCD_APPR_THRESHOLD); rn = (rn + 1)/2)
+      margin += GMP_NUMB_BITS;
+  }
+
+  if (verbose_flag && abits > dbits)
+    fprintf (stderr, "n = %u: sbits = %u: ref #(r0-r1): %u, appr #(r0-r1): %u excess: %d, margin: %u\n",
+            (unsigned) n, (unsigned) s*GMP_NUMB_BITS,
+            (unsigned) dbits, (unsigned) abits,
+            (int) abits - s * GMP_NUMB_BITS, (unsigned) margin);
+
+  if (abits > s*GMP_NUMB_BITS + margin)
+    {
+      fprintf (stderr, "appr |r0 - r1| much larger than minimal (by %u bits, margin = %u bits)\n",
+              (unsigned) (abits - s*GMP_NUMB_BITS), (unsigned) margin);
+      return 0;
+    }
+
+  while (mpz_cmp (appr_r0, ref_r0) > 0 || mpz_cmp (appr_r1, ref_r1) > 0)
+    {
+      ASSERT (mpz_size (appr_r0) > s);
+      ASSERT (mpz_size (appr_r1) > s);
+
+      if (mpz_cmp (appr_r0, appr_r1) > 0)
+       {
+         if (!sdiv_qr (q, appr_r0, s, appr_r0, appr_r1))
+           break;
+         mpz_addmul (appr.m[0][1], q, appr.m[0][0]);
+         mpz_addmul (appr.m[1][1], q, appr.m[1][0]);
+       }
+      else
+       {
+         if (!sdiv_qr (q, appr_r1, s, appr_r1, appr_r0))
+           break;
+         mpz_addmul (appr.m[0][0], q, appr.m[0][1]);
+         mpz_addmul (appr.m[1][0], q, appr.m[1][1]);
+       }
+    }
+
+  if (mpz_cmp (appr_r0, ref_r0) != 0
+      || mpz_cmp (appr_r1, ref_r1) != 0
+      || !hgcd_ref_equal (ref, &appr))
+    {
+      fprintf (stderr, "appr_r0: "); debug_mp (appr_r0, 16);
+      fprintf (stderr, "ref_r0: "); debug_mp (ref_r0, 16);
+
+      fprintf (stderr, "appr_r1: "); debug_mp (appr_r1, 16);
+      fprintf (stderr, "ref_r1: "); debug_mp (ref_r1, 16);
+
+      return 0;
+    }
+  mpz_clear (t);
+  mpz_clear (q);
+  hgcd_ref_clear (&appr);
+  mpz_clear (appr_r0);
+  mpz_clear (appr_r1);
+
+  return 1;
+}
diff --git a/tests/mpn/t-instrument.c b/tests/mpn/t-instrument.c

index a3d296958e68a58b13d2a059904c14643586d491..cf0aae1542c857427caf8e5066d51ece75aab10f 100644 (file)
--- a/tests/mpn/t-instrument.c
+++ b/tests/mpn/t-instrument.c
@@ -2,20 +2,20 @@
  
  Copyright 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -50,8 +50,8 @@ struct {
  int  ncall;
  
  
-void __cyg_profile_func_enter __GMP_PROTO ((void *this_fn, void *call_site))
-     __attribute__ ((no_instrument_function));
+void __cyg_profile_func_enter (void *, void *)
+  __attribute__ ((no_instrument_function));
  
  void
  __cyg_profile_func_enter (void *this_fn, void *call_site)
@@ -74,8 +74,8 @@ __cyg_profile_func_enter (void *this_fn, void *call_site)
    ncall++;
  }
  
-void __cyg_profile_func_exit __GMP_PROTO ((void *this_fn, void *call_site))
-     __attribute__ ((no_instrument_function));
+void __cyg_profile_func_exit (void *, void *)
+  __attribute__ ((no_instrument_function));
  
  void
  __cyg_profile_func_exit  (void *this_fn, void *call_site)
diff --git a/tests/mpn/t-invert.c b/tests/mpn/t-invert.c

index 7f747513ff7bd9d9ae93e618404d5173c70ca8e1..0bd8d102a4f4ceb04448d6f4712fe9076d150545 100644 (file)
--- a/tests/mpn/t-invert.c
+++ b/tests/mpn/t-invert.c
@@ -4,29 +4,29 @@
  
  Copyright 2009 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
+#include <stdlib.h>
+#include <stdio.h>
+
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "tests.h"
  
-#include <stdlib.h>
-#include <stdio.h>
-
  /* Sizes are up to 2^SIZE_LOG limbs */
  #ifndef SIZE_LOG
  #define SIZE_LOG 12
diff --git a/tests/mpn/t-iord_u.c b/tests/mpn/t-iord_u.c

index 3b472fd616beb12798dd73ffc9f0c4ef5efdde13..17188d272a3e644a4a30c28614ac5e2ee2a53be9 100644 (file)
--- a/tests/mpn/t-iord_u.c
+++ b/tests/mpn/t-iord_u.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpn/t-matrix22.c b/tests/mpn/t-matrix22.c

index 7521df0e1e1f0261fd2ad981cfdbae71782c8603..44832e4e7d47bb39dd05c11936064790be2bb187 100644 (file)
--- a/tests/mpn/t-matrix22.c
+++ b/tests/mpn/t-matrix22.c
@@ -2,20 +2,20 @@
  
  Copyright 2008 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpn/t-mod_1.c b/tests/mpn/t-mod_1.c

index 2f86ba277890bb62ed796fef71357a127ce9d878..100211a076ae98353f39b7b25bdc27dd0afe9b80 100644 (file)
--- a/tests/mpn/t-mod_1.c
+++ b/tests/mpn/t-mod_1.c
@@ -1,21 +1,21 @@
  /* Test mpn_mod_1 variants.
  
-Copyright 2010 Free Software Foundation, Inc.
+Copyright 2010, 2013 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -52,6 +52,17 @@ check_one (mp_srcptr ap, mp_size_t n, mp_limb_t b)
           goto fail;
         }
      }
+  if (b <= GMP_NUMB_MASK / 3)
+    {
+      mp_limb_t pre[6];
+      mpn_mod_1s_3p_cps (pre, b);
+      r = mpn_mod_1s_3p (ap, n, b << pre[1], pre);
+      if (r != r_ref)
+       {
+         printf ("mpn_mod_1s_3p failed\n");
+         goto fail;
+       }
+    }
    if (b <= GMP_NUMB_MASK / 4)
      {
        mp_limb_t pre[7];
diff --git a/tests/mpn/t-mp_bases.c b/tests/mpn/t-mp_bases.c

index 17950ecb4eee13d873048d2ce97d186c971239fc..c7863a0cc244adece1535f8e23f372b870ec40a2 100644 (file)
--- a/tests/mpn/t-mp_bases.c
+++ b/tests/mpn/t-mp_bases.c
@@ -2,20 +2,20 @@
  
  Copyright 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpn/t-mul.c b/tests/mpn/t-mul.c

index 79d3a38272743bd04d745523b3f41a1a9780bea6..63664ad55b46be63fd5086a1715ccc74fa04d813 100644 (file)
--- a/tests/mpn/t-mul.c
+++ b/tests/mpn/t-mul.c
@@ -2,20 +2,20 @@
  
  Copyright 2011, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #include <stdlib.h>
diff --git a/tests/mpn/t-mullo.c b/tests/mpn/t-mullo.c

index b44e23615162174067b7771b80d0689fcb2109ff..1a3172898b0b8ad2689e9ce8f86de492b3f30fd3 100644 (file)
--- a/tests/mpn/t-mullo.c
+++ b/tests/mpn/t-mullo.c
@@ -2,29 +2,29 @@
  
  Copyright 2009 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
+#include <stdlib.h>
+#include <stdio.h>
+
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "tests.h"
  
-#include <stdlib.h>
-#include <stdio.h>
-
  /* Sizes are up to 2^SIZE_LOG limbs */
  #ifndef SIZE_LOG
  #define SIZE_LOG 10
diff --git a/tests/mpn/t-mulmid.c b/tests/mpn/t-mulmid.c

new file mode 100644 (file)

index 0000000..be29f77
--- /dev/null
+++ b/tests/mpn/t-mulmid.c
@@ -0,0 +1,93 @@
+/* Test for mulmid function.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 9
+#endif
+
+#ifndef COUNT
+#define COUNT 5000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, bp, rp, refp;
+  gmp_randstate_ptr rands;
+  int test;
+  TMP_DECL;
+  TMP_MARK;
+
+  tests_start ();
+  rands = RANDS;
+
+  ap = TMP_ALLOC_LIMBS (MAX_N);
+  bp = TMP_ALLOC_LIMBS (MAX_N);
+  rp = TMP_ALLOC_LIMBS (MAX_N + 2);
+  refp = TMP_ALLOC_LIMBS (MAX_N + 2);
+
+  for (test = 0; test < COUNT; test++)
+    {
+      mp_size_t an, bn, rn;
+      unsigned size_log;
+
+      size_log = 1 + gmp_urandomm_ui (rands, SIZE_LOG);
+      an = 1 + gmp_urandomm_ui(rands, 1L << size_log);
+
+      size_log = 1 + gmp_urandomm_ui (rands, SIZE_LOG);
+      bn = 1 + gmp_urandomm_ui(rands, 1L << size_log);
+
+      /* Make sure an >= bn */
+      if (an < bn)
+       MP_SIZE_T_SWAP (an, bn);
+
+      mpn_random2 (ap, an);
+      mpn_random2 (bp, bn);
+
+      refmpn_mulmid (refp, ap, an, bp, bn);
+      mpn_mulmid (rp, ap, an, bp, bn);
+
+      rn = an + 3 - bn;
+      if (mpn_cmp (refp, rp, rn))
+       {
+         printf ("ERROR in test %d, an = %d, bn = %d, rn = %d\n",
+                 test, an, bn, rn);
+         printf("a: "); mpn_dump (ap, an);
+         printf("b: "); mpn_dump (bp, bn);
+         printf("r:   "); mpn_dump (rp, rn);
+         printf("ref: "); mpn_dump (refp, rn);
+
+         abort();
+       }
+    }
+  TMP_FREE;
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpn/t-mulmod_bnm1.c b/tests/mpn/t-mulmod_bnm1.c

index 87a201360f6a30025bfafce103a0baf4ea13c3ad..469a9eb4b07f69bfb214645af50320a0a7277f76 100644 (file)
--- a/tests/mpn/t-mulmod_bnm1.c
+++ b/tests/mpn/t-mulmod_bnm1.c
@@ -4,29 +4,29 @@
  
  Copyright 2009 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
+#include <stdlib.h>
+#include <stdio.h>
+
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "tests.h"
  
-#include <stdlib.h>
-#include <stdio.h>
-
  /* Sizes are up to 2^SIZE_LOG limbs */
  #ifndef SIZE_LOG
  #define SIZE_LOG 11
diff --git a/tests/mpn/t-perfsqr.c b/tests/mpn/t-perfsqr.c

index 6afe708deb8026ebfbce0ceec908dfa1d4a6622a..0a585a0869422a2eecd5db0ab32aaa7730bc91ec 100644 (file)
--- a/tests/mpn/t-perfsqr.c
+++ b/tests/mpn/t-perfsqr.c
@@ -2,20 +2,20 @@
  
  Copyright 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpn/t-scan.c b/tests/mpn/t-scan.c

index fc8d3cc650ea76ce1cd2480c131f82739138623c..ba96846471513b49ba1e8eee034d7fcaeeeb7bc2 100644 (file)
--- a/tests/mpn/t-scan.c
+++ b/tests/mpn/t-scan.c
@@ -2,20 +2,20 @@
  
  Copyright 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpn/t-sqrmod_bnm1.c b/tests/mpn/t-sqrmod_bnm1.c

index 8dfd5324ff13d95b443ca3f7e05de8208142ad5c..d3ee32a152f42eaa0fc683a6e2de15009ea7922a 100644 (file)
--- a/tests/mpn/t-sqrmod_bnm1.c
+++ b/tests/mpn/t-sqrmod_bnm1.c
@@ -4,29 +4,29 @@
  
  Copyright 2009 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
+#include <stdlib.h>
+#include <stdio.h>
+
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "tests.h"
  
-#include <stdlib.h>
-#include <stdio.h>
-
  /* Sizes are up to 2^SIZE_LOG limbs */
  #ifndef SIZE_LOG
  #define SIZE_LOG 12
diff --git a/tests/mpn/t-toom2-sqr.c b/tests/mpn/t-toom2-sqr.c

new file mode 100644 (file)

index 0000000..a5cdcb5
--- /dev/null
+++ b/tests/mpn/t-toom2-sqr.c
@@ -0,0 +1,6 @@
+#define mpn_toomN_sqr mpn_toom2_sqr
+#define mpn_toomN_sqr_itch mpn_toom2_sqr_itch
+#define MIN_AN MPN_TOOM2_SQR_MINSIZE
+#define MAX_AN SQR_TOOM3_THRESHOLD
+
+#include "toom-sqr-shared.h"
diff --git a/tests/mpn/t-toom3-sqr.c b/tests/mpn/t-toom3-sqr.c

new file mode 100644 (file)

index 0000000..ccc3b99
--- /dev/null
+++ b/tests/mpn/t-toom3-sqr.c
@@ -0,0 +1,6 @@
+#define mpn_toomN_sqr mpn_toom3_sqr
+#define mpn_toomN_sqr_itch mpn_toom3_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MPN_TOOM3_SQR_MINSIZE)
+#define MAX_AN SQR_TOOM4_THRESHOLD
+
+#include "toom-sqr-shared.h"
diff --git a/tests/mpn/t-toom4-sqr.c b/tests/mpn/t-toom4-sqr.c

new file mode 100644 (file)

index 0000000..ca14ab1
--- /dev/null
+++ b/tests/mpn/t-toom4-sqr.c
@@ -0,0 +1,6 @@
+#define mpn_toomN_sqr mpn_toom4_sqr
+#define mpn_toomN_sqr_itch mpn_toom4_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MAX(SQR_TOOM4_THRESHOLD,MPN_TOOM4_SQR_MINSIZE))
+#define MAX_AN SQR_TOOM6_THRESHOLD
+
+#include "toom-sqr-shared.h"
diff --git a/tests/mpn/t-toom54.c b/tests/mpn/t-toom54.c

new file mode 100644 (file)

index 0000000..52a2bee
--- /dev/null
+++ b/tests/mpn/t-toom54.c
@@ -0,0 +1,8 @@
+#define mpn_toomMN_mul mpn_toom54_mul
+#define mpn_toomMN_mul_itch mpn_toom54_mul_itch
+
+#define MIN_AN 31
+#define MIN_BN(an) ((3*(an) + 32) / (size_t) 5)                /* 3/5 */
+#define MAX_BN(an) ((an) - 6)                          /* 1/1 */
+
+#include "toom-shared.h"
diff --git a/tests/mpn/t-toom6-sqr.c b/tests/mpn/t-toom6-sqr.c

new file mode 100644 (file)

index 0000000..479779f
--- /dev/null
+++ b/tests/mpn/t-toom6-sqr.c
@@ -0,0 +1,6 @@
+#define mpn_toomN_sqr mpn_toom6_sqr
+#define mpn_toomN_sqr_itch mpn_toom6_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MAX(SQR_TOOM4_THRESHOLD,MAX(SQR_TOOM6_THRESHOLD,MPN_TOOM6_SQR_MINSIZE)))
+#define MAX_AN SQR_TOOM8_THRESHOLD
+
+#include "toom-sqr-shared.h"
diff --git a/tests/mpn/t-toom6h.c b/tests/mpn/t-toom6h.c

index fc5df5d00d5ebf1d1744999f7b4ac385319d4504..5cca9fc90e4d0cdaa6ccb2881a1f5363a13856e9 100644 (file)
--- a/tests/mpn/t-toom6h.c
+++ b/tests/mpn/t-toom6h.c
@@ -5,7 +5,7 @@
  
  /* Smaller sizes not supported; may lead to recursive calls to
     toom22_mul, toom33_mul, or toom44_mul with invalid input size. */
-#define MIN_AN MUL_TOOM6H_THRESHOLD
+#define MIN_AN MUL_TOOM6H_MIN
  #define MIN_BN(an) (MAX ((an*3)>>3, 46))
  
  #define COUNT 1000
diff --git a/tests/mpn/t-toom8-sqr.c b/tests/mpn/t-toom8-sqr.c

new file mode 100644 (file)

index 0000000..80df955
--- /dev/null
+++ b/tests/mpn/t-toom8-sqr.c
@@ -0,0 +1,6 @@
+#define mpn_toomN_sqr mpn_toom8_sqr
+#define mpn_toomN_sqr_itch mpn_toom8_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MAX(SQR_TOOM4_THRESHOLD,MAX(SQR_TOOM6_THRESHOLD,MAX(SQR_TOOM8_THRESHOLD,MPN_TOOM8_SQR_MINSIZE))))
+#define MAX_AN SQR_FFT_THRESHOLD
+
+#include "toom-sqr-shared.h"
diff --git a/tests/mpn/t-toom8h.c b/tests/mpn/t-toom8h.c

index 33604603dffe222b7f8ef218c990d67dd1ae3cb0..aeeabb274b6140bf9f0172d26f937e9434fe0872 100644 (file)
--- a/tests/mpn/t-toom8h.c
+++ b/tests/mpn/t-toom8h.c
@@ -5,21 +5,13 @@
  
  /* Smaller sizes not supported; may lead to recursive calls to
     toom{22,33,44,6h}_mul with invalid input size. */
-#define MIN_AN MUL_TOOM8H_THRESHOLD
+#define MIN_AN MUL_TOOM8H_MIN
  
-#if GMP_NUMB_BITS <= 10*3
-#define MIN_BN(an) (MAX ((an*6)/10, 86) )
-#else
-#if GMP_NUMB_BITS <= 11*3
-#define MIN_BN(an) (MAX ((an*5)/11, 86) )
-#else
-#if GMP_NUMB_BITS <= 12*3
-#define MIN_BN(an) (MAX ((an*4)/12, 86) )
-#else
-#define MIN_BN(an) (MAX ((an*4)/13, 86) )
-#endif
-#endif
-#endif
+#define MIN_BN(an)                      \
+(MAX(GMP_NUMB_BITS <= 10*3 ? (an*6)/10 : \
+     GMP_NUMB_BITS <= 11*3 ? (an*5)/11 : \
+     GMP_NUMB_BITS <= 12*3 ? (an*4)/12 : \
+     (an*4)/13, 86) )
  
  #define COUNT 1000
  
diff --git a/tests/mpn/toom-shared.h b/tests/mpn/toom-shared.h

index 57b3181c11138feeb892085203c05036ff987cdb..a775f1fea3caec2da3b3ecd4402a5021206de74a 100644 (file)
--- a/tests/mpn/toom-shared.h
+++ b/tests/mpn/toom-shared.h
@@ -2,29 +2,29 @@
  
  Copyright 2009 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
+#include <stdlib.h>
+#include <stdio.h>
+
  #include "gmp.h"
  #include "gmp-impl.h"
  #include "tests.h"
  
-#include <stdlib.h>
-#include <stdio.h>
-
  /* Main file is expected to define mpn_toomMN_mul,
   * mpn_toomMN_mul_itch, MIN_AN, MIN_BN(an), MAX_BN(an) and then
   * include this file. */
diff --git a/tests/mpn/toom-sqr-shared.h b/tests/mpn/toom-sqr-shared.h

new file mode 100644 (file)

index 0000000..f08838f
--- /dev/null
+++ b/tests/mpn/toom-sqr-shared.h
@@ -0,0 +1,129 @@
+/* Test for various Toom squaring functions.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Main file is expected to define mpn_toomN_mul, mpn_toomN_sqr_itch,
+ * MIN_AN, MAX_AN and then include this file. */
+
+#ifndef COUNT
+#define COUNT 500
+#endif
+
+int
+main (int argc, char **argv)
+{
+  mp_ptr ap, refp, pp, scratch;
+  int count = COUNT;
+  int test;
+  gmp_randstate_ptr rands;
+  TMP_DECL;
+  TMP_MARK;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+
+  tests_start ();
+
+  if (MAX_AN > MIN_AN) {
+    rands = RANDS;
+
+    ap = TMP_ALLOC_LIMBS (MAX_AN);
+    refp = TMP_ALLOC_LIMBS (MAX_AN * 2);
+    pp = 1 + TMP_ALLOC_LIMBS (MAX_AN * 2 + 2);
+    scratch
+      = 1+TMP_ALLOC_LIMBS (mpn_toomN_sqr_itch (MAX_AN) + 2);
+
+    for (test = 0; test < count; test++)
+      {
+       unsigned size_min;
+       unsigned size_range;
+       mp_size_t an;
+       mp_size_t itch;
+       mp_limb_t p_before, p_after, s_before, s_after;
+
+       an = MIN_AN
+         + gmp_urandomm_ui (rands, MAX_AN - MIN_AN);
+
+       mpn_random2 (ap, an);
+       mpn_random2 (pp-1, an * 2 + 2);
+       p_before = pp[-1];
+       p_after = pp[an * 2];
+
+       itch = mpn_toomN_sqr_itch (an);
+       ASSERT_ALWAYS (itch <= mpn_toomN_sqr_itch (MAX_AN));
+       mpn_random2 (scratch-1, itch+2);
+       s_before = scratch[-1];
+       s_after = scratch[itch];
+
+       mpn_toomN_sqr (pp, ap, an, scratch);
+       refmpn_mul (refp, ap, an, ap, an);
+       if (pp[-1] != p_before || pp[an * 2] != p_after
+           || scratch[-1] != s_before || scratch[itch] != s_after
+           || mpn_cmp (refp, pp, an * 2) != 0)
+         {
+           printf ("ERROR in test %d, an = %d\n",
+                   test, (int) an);
+           if (pp[-1] != p_before)
+             {
+               printf ("before pp:"); mpn_dump (pp -1, 1);
+               printf ("keep:   "); mpn_dump (&p_before, 1);
+             }
+           if (pp[an * 2] != p_after)
+             {
+               printf ("after pp:"); mpn_dump (pp + an * 2, 1);
+               printf ("keep:   "); mpn_dump (&p_after, 1);
+             }
+           if (scratch[-1] != s_before)
+             {
+               printf ("before scratch:"); mpn_dump (scratch-1, 1);
+               printf ("keep:   "); mpn_dump (&s_before, 1);
+             }
+           if (scratch[itch] != s_after)
+             {
+               printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+               printf ("keep:   "); mpn_dump (&s_after, 1);
+             }
+           mpn_dump (ap, an);
+           mpn_dump (pp, an * 2);
+           mpn_dump (refp, an * 2);
+
+           abort();
+         }
+      }
+    TMP_FREE;
+  }
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpq/Makefile.am b/tests/mpq/Makefile.am

index 57c6817f95aaf45aedad62463b431bb65630c809..e84f685da90ade53faad58965287323b8761acb0 100644 (file)
--- a/tests/mpq/Makefile.am
+++ b/tests/mpq/Makefile.am
@@ -1,28 +1,28 @@
  ## Process this file with automake to generate Makefile.in
  
-# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+# Copyright 1996, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
  LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
  
  check_PROGRAMS = t-aors t-cmp t-cmp_ui t-cmp_si t-equal t-get_d t-get_str \
-  t-inp_str t-md_2exp t-set_f t-set_str
+  t-inp_str t-inv t-md_2exp t-set_f t-set_str io reuse
  TESTS = $(check_PROGRAMS)
  
  # Temporary files used by the tests.  Removed automatically if the tests
diff --git a/tests/mpq/Makefile.in b/tests/mpq/Makefile.in

index 05ba2efbb3ec252b24ccd9f46a8b1fcfc7fa455e..0edd0c490346acfaefbbf64207119dea99b7e790 100644 (file)
--- a/tests/mpq/Makefile.in
+++ b/tests/mpq/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -15,23 +15,40 @@
  
  @SET_MAKE@
  
-# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+# Copyright 1996, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -50,74 +67,89 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  check_PROGRAMS = t-aors$(EXEEXT) t-cmp$(EXEEXT) t-cmp_ui$(EXEEXT) \
         t-cmp_si$(EXEEXT) t-equal$(EXEEXT) t-get_d$(EXEEXT) \
-       t-get_str$(EXEEXT) t-inp_str$(EXEEXT) t-md_2exp$(EXEEXT) \
-       t-set_f$(EXEEXT) t-set_str$(EXEEXT)
+       t-get_str$(EXEEXT) t-inp_str$(EXEEXT) t-inv$(EXEEXT) \
+       t-md_2exp$(EXEEXT) t-set_f$(EXEEXT) t-set_str$(EXEEXT) \
+       io$(EXEEXT) reuse$(EXEEXT)
  subdir = tests/mpq
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
  CONFIG_HEADER = $(top_builddir)/config.h
  CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
+io_SOURCES = io.c
+io_OBJECTS = io.$(OBJEXT)
+io_LDADD = $(LDADD)
+io_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+reuse_SOURCES = reuse.c
+reuse_OBJECTS = reuse.$(OBJEXT)
+reuse_LDADD = $(LDADD)
+reuse_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_aors_SOURCES = t-aors.c
-t_aors_OBJECTS = t-aors$U.$(OBJEXT)
+t_aors_OBJECTS = t-aors.$(OBJEXT)
  t_aors_LDADD = $(LDADD)
  t_aors_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cmp_SOURCES = t-cmp.c
-t_cmp_OBJECTS = t-cmp$U.$(OBJEXT)
+t_cmp_OBJECTS = t-cmp.$(OBJEXT)
  t_cmp_LDADD = $(LDADD)
  t_cmp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cmp_si_SOURCES = t-cmp_si.c
-t_cmp_si_OBJECTS = t-cmp_si$U.$(OBJEXT)
+t_cmp_si_OBJECTS = t-cmp_si.$(OBJEXT)
  t_cmp_si_LDADD = $(LDADD)
  t_cmp_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cmp_ui_SOURCES = t-cmp_ui.c
-t_cmp_ui_OBJECTS = t-cmp_ui$U.$(OBJEXT)
+t_cmp_ui_OBJECTS = t-cmp_ui.$(OBJEXT)
  t_cmp_ui_LDADD = $(LDADD)
  t_cmp_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_equal_SOURCES = t-equal.c
-t_equal_OBJECTS = t-equal$U.$(OBJEXT)
+t_equal_OBJECTS = t-equal.$(OBJEXT)
  t_equal_LDADD = $(LDADD)
  t_equal_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_d_SOURCES = t-get_d.c
-t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_OBJECTS = t-get_d.$(OBJEXT)
  t_get_d_LDADD = $(LDADD)
  t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_str_SOURCES = t-get_str.c
-t_get_str_OBJECTS = t-get_str$U.$(OBJEXT)
+t_get_str_OBJECTS = t-get_str.$(OBJEXT)
  t_get_str_LDADD = $(LDADD)
  t_get_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_inp_str_SOURCES = t-inp_str.c
-t_inp_str_OBJECTS = t-inp_str$U.$(OBJEXT)
+t_inp_str_OBJECTS = t-inp_str.$(OBJEXT)
  t_inp_str_LDADD = $(LDADD)
  t_inp_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_inv_SOURCES = t-inv.c
+t_inv_OBJECTS = t-inv.$(OBJEXT)
+t_inv_LDADD = $(LDADD)
+t_inv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_md_2exp_SOURCES = t-md_2exp.c
-t_md_2exp_OBJECTS = t-md_2exp$U.$(OBJEXT)
+t_md_2exp_OBJECTS = t-md_2exp.$(OBJEXT)
  t_md_2exp_LDADD = $(LDADD)
  t_md_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_f_SOURCES = t-set_f.c
-t_set_f_OBJECTS = t-set_f$U.$(OBJEXT)
+t_set_f_OBJECTS = t-set_f.$(OBJEXT)
  t_set_f_LDADD = $(LDADD)
  t_set_f_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_str_SOURCES = t-set_str.c
-t_set_str_OBJECTS = t-set_str$U.$(OBJEXT)
+t_set_str_OBJECTS = t-set_str.$(OBJEXT)
  t_set_str_LDADD = $(LDADD)
  t_set_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
@@ -133,11 +165,17 @@ CCLD = $(CC)
  LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
         $(LDFLAGS) -o $@
-SOURCES = t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c t-equal.c t-get_d.c \
-       t-get_str.c t-inp_str.c t-md_2exp.c t-set_f.c t-set_str.c
-DIST_SOURCES = t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c t-equal.c \
-       t-get_d.c t-get_str.c t-inp_str.c t-md_2exp.c t-set_f.c \
-       t-set_str.c
+SOURCES = io.c reuse.c t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c \
+       t-equal.c t-get_d.c t-get_str.c t-inp_str.c t-inv.c \
+       t-md_2exp.c t-set_f.c t-set_str.c
+DIST_SOURCES = io.c reuse.c t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c \
+       t-equal.c t-get_d.c t-get_str.c t-inp_str.c t-inv.c \
+       t-md_2exp.c t-set_f.c t-set_str.c
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  am__tty_colors = \
@@ -241,8 +279,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -289,7 +327,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -354,37 +391,46 @@ clean-checkPROGRAMS:
         list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
         echo " rm -f" $$list; \
         rm -f $$list
-t-aors$(EXEEXT): $(t_aors_OBJECTS) $(t_aors_DEPENDENCIES) 
+io$(EXEEXT): $(io_OBJECTS) $(io_DEPENDENCIES) $(EXTRA_io_DEPENDENCIES) 
+       @rm -f io$(EXEEXT)
+       $(LINK) $(io_OBJECTS) $(io_LDADD) $(LIBS)
+reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) $(EXTRA_reuse_DEPENDENCIES) 
+       @rm -f reuse$(EXEEXT)
+       $(LINK) $(reuse_OBJECTS) $(reuse_LDADD) $(LIBS)
+t-aors$(EXEEXT): $(t_aors_OBJECTS) $(t_aors_DEPENDENCIES) $(EXTRA_t_aors_DEPENDENCIES) 
         @rm -f t-aors$(EXEEXT)
         $(LINK) $(t_aors_OBJECTS) $(t_aors_LDADD) $(LIBS)
-t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES) 
+t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES) $(EXTRA_t_cmp_DEPENDENCIES) 
         @rm -f t-cmp$(EXEEXT)
         $(LINK) $(t_cmp_OBJECTS) $(t_cmp_LDADD) $(LIBS)
-t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) 
+t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) $(EXTRA_t_cmp_si_DEPENDENCIES) 
         @rm -f t-cmp_si$(EXEEXT)
         $(LINK) $(t_cmp_si_OBJECTS) $(t_cmp_si_LDADD) $(LIBS)
-t-cmp_ui$(EXEEXT): $(t_cmp_ui_OBJECTS) $(t_cmp_ui_DEPENDENCIES) 
+t-cmp_ui$(EXEEXT): $(t_cmp_ui_OBJECTS) $(t_cmp_ui_DEPENDENCIES) $(EXTRA_t_cmp_ui_DEPENDENCIES) 
         @rm -f t-cmp_ui$(EXEEXT)
         $(LINK) $(t_cmp_ui_OBJECTS) $(t_cmp_ui_LDADD) $(LIBS)
-t-equal$(EXEEXT): $(t_equal_OBJECTS) $(t_equal_DEPENDENCIES) 
+t-equal$(EXEEXT): $(t_equal_OBJECTS) $(t_equal_DEPENDENCIES) $(EXTRA_t_equal_DEPENDENCIES) 
         @rm -f t-equal$(EXEEXT)
         $(LINK) $(t_equal_OBJECTS) $(t_equal_LDADD) $(LIBS)
-t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) 
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) $(EXTRA_t_get_d_DEPENDENCIES) 
         @rm -f t-get_d$(EXEEXT)
         $(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
-t-get_str$(EXEEXT): $(t_get_str_OBJECTS) $(t_get_str_DEPENDENCIES) 
+t-get_str$(EXEEXT): $(t_get_str_OBJECTS) $(t_get_str_DEPENDENCIES) $(EXTRA_t_get_str_DEPENDENCIES) 
         @rm -f t-get_str$(EXEEXT)
         $(LINK) $(t_get_str_OBJECTS) $(t_get_str_LDADD) $(LIBS)
-t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) 
+t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) $(EXTRA_t_inp_str_DEPENDENCIES) 
         @rm -f t-inp_str$(EXEEXT)
         $(LINK) $(t_inp_str_OBJECTS) $(t_inp_str_LDADD) $(LIBS)
-t-md_2exp$(EXEEXT): $(t_md_2exp_OBJECTS) $(t_md_2exp_DEPENDENCIES) 
+t-inv$(EXEEXT): $(t_inv_OBJECTS) $(t_inv_DEPENDENCIES) $(EXTRA_t_inv_DEPENDENCIES) 
+       @rm -f t-inv$(EXEEXT)
+       $(LINK) $(t_inv_OBJECTS) $(t_inv_LDADD) $(LIBS)
+t-md_2exp$(EXEEXT): $(t_md_2exp_OBJECTS) $(t_md_2exp_DEPENDENCIES) $(EXTRA_t_md_2exp_DEPENDENCIES) 
         @rm -f t-md_2exp$(EXEEXT)
         $(LINK) $(t_md_2exp_OBJECTS) $(t_md_2exp_LDADD) $(LIBS)
-t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES) 
+t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES) $(EXTRA_t_set_f_DEPENDENCIES) 
         @rm -f t-set_f$(EXEEXT)
         $(LINK) $(t_set_f_OBJECTS) $(t_set_f_LDADD) $(LIBS)
-t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES) 
+t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES) $(EXTRA_t_set_str_DEPENDENCIES) 
         @rm -f t-set_str$(EXEEXT)
         $(LINK) $(t_set_str_OBJECTS) $(t_set_str_LDADD) $(LIBS)
  
@@ -393,11 +439,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -407,34 +448,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-t-aors_.c: t-aors.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-aors.c; then echo $(srcdir)/t-aors.c; else echo t-aors.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_.c: t-cmp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp.c; then echo $(srcdir)/t-cmp.c; else echo t-cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_si_.c: t-cmp_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_si.c; then echo $(srcdir)/t-cmp_si.c; else echo t-cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_ui_.c: t-cmp_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_ui.c; then echo $(srcdir)/t-cmp_ui.c; else echo t-cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-equal_.c: t-equal.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-equal.c; then echo $(srcdir)/t-equal.c; else echo t-equal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_.c: t-get_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_str_.c: t-get_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_str.c; then echo $(srcdir)/t-get_str.c; else echo t-get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-inp_str_.c: t-inp_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-inp_str.c; then echo $(srcdir)/t-inp_str.c; else echo t-inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-md_2exp_.c: t-md_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-md_2exp.c; then echo $(srcdir)/t-md_2exp.c; else echo t-md_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_f_.c: t-set_f.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_f.c; then echo $(srcdir)/t-set_f.c; else echo t-set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_str_.c: t-set_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_str.c; then echo $(srcdir)/t-set_str.c; else echo t-set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-aors_.$(OBJEXT) t-aors_.lo t-cmp_.$(OBJEXT) t-cmp_.lo \
-t-cmp_si_.$(OBJEXT) t-cmp_si_.lo t-cmp_ui_.$(OBJEXT) t-cmp_ui_.lo \
-t-equal_.$(OBJEXT) t-equal_.lo t-get_d_.$(OBJEXT) t-get_d_.lo \
-t-get_str_.$(OBJEXT) t-get_str_.lo t-inp_str_.$(OBJEXT) t-inp_str_.lo \
-t-md_2exp_.$(OBJEXT) t-md_2exp_.lo t-set_f_.$(OBJEXT) t-set_f_.lo \
-t-set_str_.$(OBJEXT) t-set_str_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -575,14 +588,15 @@ check-TESTS: $(TESTS)
           fi; \
           dashes=`echo "$$dashes" | sed s/./=/g`; \
           if test "$$failed" -eq 0; then \
-           echo "$$grn$$dashes"; \
+           col="$$grn"; \
           else \
-           echo "$$red$$dashes"; \
+           col="$$red"; \
           fi; \
-         echo "$$banner"; \
-         test -z "$$skipped" || echo "$$skipped"; \
-         test -z "$$report" || echo "$$report"; \
-         echo "$$dashes$$std"; \
+         echo "$${col}$$dashes$${std}"; \
+         echo "$${col}$$banner$${std}"; \
+         test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+         test -z "$$report" || echo "$${col}$$report$${std}"; \
+         echo "$${col}$$dashes$${std}"; \
           test "$$failed" -eq 0; \
         else :; fi
  
@@ -632,10 +646,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -704,7 +723,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -717,7 +736,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
         clean-checkPROGRAMS clean-generic clean-libtool ctags \
@@ -730,8 +749,8 @@ uninstall-am:
         install-ps install-ps-am install-strip installcheck \
         installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  $(top_builddir)/tests/libtests.la:
diff --git a/tests/mpq/io.c b/tests/mpq/io.c

new file mode 100644 (file)

index 0000000..404cbe9
--- /dev/null
+++ b/tests/mpq/io.c
@@ -0,0 +1,137 @@
+/* Test conversion and I/O using mpq_out_str and mpq_inp_str.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>            /* for unlink */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define FILENAME  "io.tmp"
+
+void
+debug_mp (mpq_t x, int base)
+{
+  mpq_out_str (stdout, base, x); fputc ('\n', stdout);
+}
+
+int
+main (int argc, char **argv)
+{
+  mpq_t  op1, op2;
+  mp_size_t size;
+  int i;
+  int reps = 10000;
+  FILE *fp;
+  int base;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long bsi, size_range;
+  size_t nread;
+
+  tests_start ();
+  rands = RANDS;
+
+  mpz_init (bs);
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpq_init (op1);
+  mpq_init (op2);
+
+  fp = fopen (FILENAME, "w+");
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 10 + 2;
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_errandomb (mpq_numref(op1), rands, 512L);
+      mpz_errandomb_nonzero (mpq_denref(op1), rands, 512L);
+      mpq_canonicalize (op1);
+
+      mpz_urandomb (bs, rands, 1);
+      bsi = mpz_get_ui (bs);
+      if ((bsi & 1) != 0)
+       mpq_neg (op1, op1);
+
+      mpz_urandomb (bs, rands, 16);
+      bsi = mpz_get_ui (bs);
+      base = bsi % 36 + 1;
+      if (base == 1)
+       base = 0;
+
+      rewind (fp);
+      if (mpq_out_str (fp, base, op1) == 0
+         || putc (' ', fp) == EOF
+         || fflush (fp) != 0)
+       {
+         printf ("mpq_out_str write error\n");
+         abort ();
+       }
+
+      rewind (fp);
+      nread = mpq_inp_str (op2, fp, base);
+      if (nread == 0)
+       {
+         if (ferror (fp))
+           printf ("mpq_inp_str stream read error\n");
+         else
+           printf ("mpq_inp_str data conversion error\n");
+         abort ();
+       }
+
+      if (nread != ftell(fp))
+       {
+         printf ("mpq_inp_str nread doesn't match ftell\n");
+         printf ("  nread  %lu\n", (unsigned long) nread);
+         printf ("  ftell  %ld\n", ftell(fp));
+         abort ();
+       }
+
+      if (mpq_cmp (op1, op2))
+       {
+         printf ("ERROR\n");
+         printf ("op1  = "); debug_mp (op1, -16);
+         printf ("op2  = "); debug_mp (op2, -16);
+         printf ("base = %d\n", base);
+         abort ();
+       }
+    }
+
+  fclose (fp);
+
+  unlink (FILENAME);
+
+  mpz_clear (bs);
+  mpq_clear (op1);
+  mpq_clear (op2);
+
+  tests_end ();
+  exit (0);
+}
diff --git a/tests/mpq/reuse.c b/tests/mpq/reuse.c

new file mode 100644 (file)

index 0000000..a9bfb64
--- /dev/null
+++ b/tests/mpq/reuse.c
@@ -0,0 +1,230 @@
+/* Test that routines allow reusing a source variable as destination.
+
+Copyright 1996, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMP_LIBGMP_DLL
+
+/* FIXME: When linking to a DLL libgmp, mpq_add etc can't be used as
+   initializers for global variables because they're effectively global
+   variables (function pointers) themselves.  Perhaps calling a test
+   function successively with mpq_add etc would be better.  */
+
+int
+main (void)
+{
+  printf ("Test suppressed for windows DLL\n");
+  exit (0);
+}
+
+
+#else /* ! DLL_EXPORT */
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+void dump_abort (const char *, mpq_t, mpq_t);
+
+typedef void (*dss_func) (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+dss_func dss_funcs[] =
+{
+  mpq_div, mpq_add, mpq_mul, mpq_sub,
+};
+
+const char *dss_func_names[] =
+{
+  "mpq_div", "mpq_add", "mpq_mul", "mpq_sub",
+};
+
+typedef void (*ds_func) (mpq_ptr, mpq_srcptr);
+
+ds_func ds_funcs[] =
+{
+  mpq_abs, mpq_neg,
+};
+
+const char *ds_func_names[] =
+{
+  "mpq_abs", "mpq_neg",
+};
+
+typedef void (*dsi_func) (mpq_ptr, mpq_srcptr, unsigned long int);
+
+dsi_func dsi_funcs[] =
+{
+  mpq_mul_2exp, mpq_div_2exp
+};
+
+const char *dsi_func_names[] =
+{
+  "mpq_mul_2exp", "mpq_div_2exp"
+};
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int pass, reps = 100;
+  mpq_t in1, in2, out1;
+  unsigned long int randbits, in2i;
+  mpq_t res1, res2, res3;
+  gmp_randstate_ptr  rands;
+
+  tests_start ();
+
+  if (argc > 1)
+    reps = strtol (argv[1], 0, 0);
+
+  rands = RANDS;
+
+  mpq_init (in1);
+  mpq_init (in2);
+  mpq_init (out1);
+  mpq_init (res1);
+  mpq_init (res2);
+  mpq_init (res3);
+
+  for (pass = 1; pass <= reps; pass++)
+    {
+      randbits = urandom ();
+
+      if (randbits & 1)
+       {
+         mpq_clear (in1);
+         mpq_init (in1);
+       }
+      randbits >>= 1;
+      mpz_errandomb (mpq_numref(in1), rands, 512L);
+      mpz_errandomb_nonzero (mpq_denref(in1), rands, 512L);
+      if (randbits & 1)
+       mpz_neg (mpq_numref(in1),mpq_numref(in1));
+      randbits >>= 1;
+      mpq_canonicalize (in1);
+
+      if (randbits & 1)
+       {
+         mpq_clear (in2);
+         mpq_init (in2);
+       }
+      randbits >>= 1;
+      mpz_errandomb (mpq_numref(in2), rands, 512L);
+      mpz_errandomb_nonzero (mpq_denref(in2), rands, 512L);
+      if (randbits & 1)
+       mpz_neg (mpq_numref(in2),mpq_numref(in2));
+      randbits >>= 1;
+      mpq_canonicalize (in2);
+
+      for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+       {
+         /* Don't divide by 0.  */
+         if (i == 0 && mpq_cmp_ui (in2, 0, 1) == 0)
+           continue;
+
+         if (randbits & 1)
+           {
+             mpq_clear (res1);
+             mpq_init (res1);
+           }
+         randbits >>= 1;
+
+         (dss_funcs[i]) (res1, in1, in2);
+
+         mpq_set (out1, in1);
+         (dss_funcs[i]) (out1, out1, in2);
+         mpq_set (res2, out1);
+
+         mpq_set (out1, in2);
+         (dss_funcs[i]) (out1, in1, out1);
+         mpq_set (res3, out1);
+
+         if (mpq_cmp (res1, res2) != 0)
+           dump_abort (dss_func_names[i], res1, res2);
+         if (mpq_cmp (res1, res3) != 0)
+           dump_abort (dss_func_names[i], res1, res3);
+       }
+
+      for (i = 0; i < sizeof (ds_funcs) / sizeof (ds_func); i++)
+       {
+         if (randbits & 1)
+           {
+             mpq_clear (res1);
+             mpq_init (res1);
+           }
+         randbits >>= 1;
+         (ds_funcs[i]) (res1, in1);
+
+         mpq_set (out1, in1);
+         (ds_funcs[i]) (out1, out1);
+         mpq_set (res2, out1);
+
+         if (mpq_cmp (res1, res2) != 0)
+           dump_abort (ds_func_names[i], res1, res2);
+       }
+
+      in2i = urandom () % 65536;
+      for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+       {
+         if (randbits & 1)
+           {
+             mpq_clear (res1);
+             mpq_init (res1);
+           }
+         randbits >>= 1;
+
+         (dsi_funcs[i]) (res1, in1, in2i);
+
+         mpq_set (out1, in1);
+         (dsi_funcs[i]) (out1, out1, in2i);
+         mpq_set (res2, out1);
+
+         if (mpq_cmp (res1, res2) != 0)
+           dump_abort (dsi_func_names[i], res1, res2);
+       }
+
+    }
+
+  mpq_clear (in1);
+  mpq_clear (in2);
+  mpq_clear (out1);
+  mpq_clear (res1);
+  mpq_clear (res2);
+  mpq_clear (res3);
+
+  tests_end ();
+  exit (0);
+}
+
+void
+dump_abort (const char *name, mpq_t res1, mpq_t res2)
+{
+  printf ("failure in %s:\n", name);
+  mpq_trace ("  res1  ", res1);
+  mpq_trace ("  res2  ", res2);
+  abort ();
+}
+
+#endif /* ! DLL_EXPORT */
diff --git a/tests/mpq/t-aors.c b/tests/mpq/t-aors.c

index df2dbe2148dda150413d0e171fbca69d3e846bb9..97aeeb392636579ddabd0e0f164654cbe98a7ff3 100644 (file)
--- a/tests/mpq/t-aors.c
+++ b/tests/mpq/t-aors.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
diff --git a/tests/mpq/t-cmp.c b/tests/mpq/t-cmp.c

index ac0dc72e7cca1367e1ef1d9346a68c460c4ae1c7..9aaed6afd108eb77d28ad6546eb25e343b8c46d9 100644 (file)
--- a/tests/mpq/t-cmp.c
+++ b/tests/mpq/t-cmp.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -24,9 +24,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-#define NUM(x) (&((x)->_mp_num))
-#define DEN(x) (&((x)->_mp_den))
-
  #define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)
  
  int
diff --git a/tests/mpq/t-cmp_si.c b/tests/mpq/t-cmp_si.c

index 89b2239bb85c1a5d4a7b33ed319e5f8222821430..d12bd19680fe317c7cee88ab08beb2ec705d1984 100644 (file)
--- a/tests/mpq/t-cmp_si.c
+++ b/tests/mpq/t-cmp_si.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpq/t-cmp_ui.c b/tests/mpq/t-cmp_ui.c

index 3768a77dd0678baae11ada0b7ce17d1397bcaa3b..fec46c384b06563271093874851a702343090b93 100644 (file)
--- a/tests/mpq/t-cmp_ui.c
+++ b/tests/mpq/t-cmp_ui.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 1997, 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -24,9 +24,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-#define NUM(x) (&((x)->_mp_num))
-#define DEN(x) (&((x)->_mp_den))
-
  #define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)
  
  int
diff --git a/tests/mpq/t-equal.c b/tests/mpq/t-equal.c

index 982b143f67301d4c3fcf89c901cc84391a7db788..8d6d650e8e30e18940144db9b590c5a7a57c890c 100644 (file)
--- a/tests/mpq/t-equal.c
+++ b/tests/mpq/t-equal.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -125,6 +125,9 @@ check_various (void)
    SET4 (x, 1,1,2,3,4, 3,88,5,6,7);
    SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
    check_all (x, y, 0);
+  SET4 (x, 4,1,2,3,4, 3,88,5,6,7);
+  SET4 (y, 4,1,2,3,4, 2,99,5,6,7);
+  check_all (x, y, 0);
  
    mpq_clear (x);
    mpq_clear (y);
diff --git a/tests/mpq/t-get_d.c b/tests/mpq/t-get_d.c

index f116189012a731f0d47f80e62c81743bcf12bf59..484001eaa48e0b61ff37387263bdae012160d517 100644 (file)
--- a/tests/mpq/t-get_d.c
+++ b/tests/mpq/t-get_d.c
@@ -1,22 +1,22 @@
  /* Test mpq_get_d and mpq_set_d
  
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2003, 2012, 2013 Free
+Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -31,7 +31,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /* VAX D floats only have an 8 bit signed exponent, so anything 2^128 or
     bigger will overflow, that being 4 limbs. */
-#if defined (__vax__) && SIZE > 4
+#if defined (__vax) || defined (__vax__) && SIZE > 4
  #undef SIZE
  #define SIZE 4
  #define EPSIZE 3
@@ -39,7 +39,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #define EPSIZE SIZE
  #endif
  
-void dump __GMP_PROTO ((mpq_t));
+void dump (mpq_t);
  
  void
  check_monotonic (int argc, char **argv)
@@ -153,40 +153,63 @@ my_ldexp (double d, int e)
      }
  }
  
+#define MAXEXP 500
+
+#if defined (__vax) || defined (__vax__)
+#undef MAXEXP
+#define MAXEXP 30
+#endif
+
  void
  check_random (int argc, char **argv)
  {
-  double d, d2, nd, dd;
+  gmp_randstate_ptr rands = RANDS;
+
+  double d;
    mpq_t q;
-  mp_limb_t rp[LIMBS_PER_DOUBLE + 1];
+  mpz_t a, t;
+  int exp;
+
    int test, reps = 100000;
-  int i;
  
    if (argc == 2)
       reps = 100 * atoi (argv[1]);
  
    mpq_init (q);
+  mpz_init (a);
+  mpz_init (t);
  
    for (test = 0; test < reps; test++)
      {
-      mpn_random2 (rp, LIMBS_PER_DOUBLE + 1);
-      d = 0.0;
-      for (i = LIMBS_PER_DOUBLE - 1; i >= 0; i--)
-       d = d * MP_BASE_AS_DOUBLE + rp[i];
-      d = my_ldexp (d, (int) (rp[LIMBS_PER_DOUBLE] % 1000) - 500);
+      mpz_rrandomb (a, rands, 53);
+      mpz_urandomb (t, rands, 32);
+      exp = mpz_get_ui (t) % (2*MAXEXP) - MAXEXP;
+
+      d = my_ldexp (mpz_get_d (a), exp);
        mpq_set_d (q, d);
-      nd = mpz_get_d (mpq_numref (q));
-      dd = mpz_get_d (mpq_denref (q));
-      d2 = nd / dd;
-      if (d != d2)
+      /* Check that n/d = a * 2^exp, or
+        d*a 2^{exp} = n */
+      mpz_mul (t, a, mpq_denref (q));
+      if (exp > 0)
+       mpz_mul_2exp (t, t, exp);
+      else
+       {
+         if (!mpz_divisible_2exp_p (t, -exp))
+           goto fail;
+         mpz_div_2exp (t, t, -exp);
+       }
+      if (mpz_cmp (t, mpq_numref (q)) != 0)
         {
+       fail:
           printf ("ERROR (check_random test %d): bad mpq_set_d results\n", test);
           printf ("%.16g\n", d);
-         printf ("%.16g\n", d2);
+         gmp_printf ("%Qd\n", q);
           abort ();
         }
      }
    mpq_clear (q);
+  mpz_clear (t);
+  mpz_clear (a);
  }
  
  void
diff --git a/tests/mpq/t-get_str.c b/tests/mpq/t-get_str.c

index 3a13ffa7a6ec7d157343b3b8d4156a12735c4dcb..84325bbf65bfa0aec82fe767337331cd01566c56 100644 (file)
--- a/tests/mpq/t-get_str.c
+++ b/tests/mpq/t-get_str.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpq/t-inp_str.c b/tests/mpq/t-inp_str.c

index 24d9642dcd0a6d88e055917b343aa24d03a7e6fe..699bc6747dd3daa6fe3f8a458142015a50bf138c 100644 (file)
--- a/tests/mpq/t-inp_str.c
+++ b/tests/mpq/t-inp_str.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
diff --git a/tests/mpq/t-inv.c b/tests/mpq/t-inv.c

new file mode 100644 (file)

index 0000000..868eba2
--- /dev/null
+++ b/tests/mpq/t-inv.c
@@ -0,0 +1,61 @@
+/* Test mpq_inv (and set/get_num/den).
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+  mpq_t a, b;
+  mpz_t m, n;
+  const char* s = "-420000000000000000000000";
+
+  tests_start ();
+
+  mpq_inits (a, b, (mpq_ptr)0);
+  mpz_inits (m, n, (mpz_ptr)0);
+
+  mpz_set_ui (m, 13);
+  mpq_set_den (a, m);
+  mpz_set_str (m, s, 0);
+  mpq_set_num (a, m);
+  MPQ_CHECK_FORMAT (a);
+  mpq_inv (b, a);
+  MPQ_CHECK_FORMAT (b);
+  mpq_get_num (n, b);
+  ASSERT_ALWAYS (mpz_cmp_si (n, -13) == 0);
+  mpq_neg (b, b);
+  mpq_inv (a, b);
+  MPQ_CHECK_FORMAT (a);
+  mpq_inv (b, b);
+  MPQ_CHECK_FORMAT (b);
+  mpq_get_den (n, b);
+  ASSERT_ALWAYS (mpz_cmp_ui (n, 13) == 0);
+  mpq_get_num (n, a);
+  mpz_add (n, n, m);
+  ASSERT_ALWAYS (mpz_sgn (n) == 0);
+
+  mpq_clears (a, b, (mpq_ptr)0);
+  mpz_clears (m, n, (mpz_ptr)0);
+
+  tests_end ();
+  return 0;
+}
diff --git a/tests/mpq/t-md_2exp.c b/tests/mpq/t-md_2exp.c

index 3c1f9f44baa4e31295a91f4b3e73797e8e1ebf40..fb894dc33718791f1f53d2272f9935498b98f14b 100644 (file)
--- a/tests/mpq/t-md_2exp.c
+++ b/tests/mpq/t-md_2exp.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -29,8 +29,69 @@ struct pair_t {
    const char     *den;
  };
  
+void
+check_random ()
+{
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long arg_size, size_range;
+  mpq_t q, r;
+  int i;
+  mp_bitcnt_t shift;
+  int reps = 10000;
+
+  rands = RANDS;
+
+  mpz_init (bs);
+  mpq_init (q);
+  mpq_init (r);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 11 + 2; /* 0..4096 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      arg_size = mpz_get_ui (bs);
+      mpz_rrandomb (mpq_numref (q), rands, arg_size);
+      do
+       {
+         mpz_urandomb (bs, rands, size_range);
+         arg_size = mpz_get_ui (bs);
+         mpz_rrandomb (mpq_denref (q), rands, arg_size);
+       }
+      while (mpz_sgn (mpq_denref (q)) == 0);
+
+      /* We now have a random rational in q, albeit an unnormalised one.  The
+        lack of normalisation should not matter here, so let's save the time a
+        gcd would require.  */
+
+      mpz_urandomb (bs, rands, 32);
+      shift = mpz_get_ui (bs) % 4096;
+
+      mpq_mul_2exp (r, q, shift);
+
+      if (mpq_cmp (r, q) < 0)
+       {
+         printf ("mpq_mul_2exp wrong on random\n");
+         abort ();
+       }
+
+      mpq_div_2exp (r, r, shift);
+
+      if (mpq_cmp (r, q) != 0)
+       {
+         printf ("mpq_mul_2exp or mpq_div_2exp wrong on random\n");
+         abort ();
+       }
+    }
+  mpq_clear (q);
+  mpq_clear (r);
+  mpz_clear (bs);
+}
+
  int
-main (void)
+main (int argc, char **argv)
  {
    static const struct {
      struct pair_t  left;
@@ -96,7 +157,7 @@ main (void)
      { {"1","0x10000000000000000"}, 3, {"1","0x2000000000000000"} },
    };
  
-  void (*fun) __GMP_PROTO ((mpq_ptr, mpq_srcptr, unsigned long));
+  void (*fun) (mpq_ptr, mpq_srcptr, unsigned long);
    const struct pair_t  *p_start, *p_want;
    const char  *name;
    mpq_t    sep, got, want;
@@ -173,6 +234,8 @@ main (void)
          }
      }
  
+  check_random ();
+
    mpq_clear (sep);
    mpq_clear (got);
    mpq_clear (want);
diff --git a/tests/mpq/t-set_f.c b/tests/mpq/t-set_f.c

index 2b30f5e00d03035d2588875e6aaffde5881ff7e8..3d0795acd8c6d5e937ab783d14d869bc9da63f7c 100644 (file)
--- a/tests/mpq/t-set_f.c
+++ b/tests/mpq/t-set_f.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpq/t-set_str.c b/tests/mpq/t-set_str.c

index 9f95d263bf53c6c5993deb7e621f52cb96c993d7..c3494f7883cacd0aa59152d3aa41101f85bb45ea 100644 (file)
--- a/tests/mpq/t-set_str.c
+++ b/tests/mpq/t-set_str.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/Makefile.am b/tests/mpz/Makefile.am

index f081bfb04ea808d0c07eea28cc8fff5f5cb83bee..c1e11e7f7703a66a70f4a81911960b825c814296 100644 (file)
--- a/tests/mpz/Makefile.am
+++ b/tests/mpz/Makefile.am
@@ -3,20 +3,20 @@
  # Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2009, 2012 Free Software
  # Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
@@ -27,10 +27,10 @@ check_PROGRAMS = t-addsub t-cmp t-mul t-mul_i t-tdiv t-tdiv_ui t-fdiv   \
    convert io t-inp_str logic bit t-powm t-powm_ui t-pow t-div_2exp reuse   \
    t-root t-perfsqr t-perfpow t-jac t-bin t-get_d t-get_d_2exp t-get_si \
    t-set_d t-set_si                                                     \
-  t-fac_ui t-fib_ui t-lucnum_ui t-scan t-fits                           \
+  t-fac_ui t-mfac_uiui t-primorial_ui t-fib_ui t-lucnum_ui t-scan t-fits   \
    t-divis t-divis_2exp t-cong t-cong_2exp t-sizeinbase t-set_str        \
    t-aorsmul t-cmp_d t-cmp_si t-hamdist t-oddeven t-popcount t-set_f     \
-  t-io_raw t-import t-export t-pprime_p t-nextprime
+  t-io_raw t-import t-export t-pprime_p t-nextprime t-remove
  
  TESTS = $(check_PROGRAMS)
  
diff --git a/tests/mpz/Makefile.in b/tests/mpz/Makefile.in

index aa2349c6a44afe6d5828b040fb0f628adf26282a..46a23827507a276dc315699da8943141aa83f3aa 100644 (file)
--- a/tests/mpz/Makefile.in
+++ b/tests/mpz/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -18,21 +18,38 @@
  # Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2009, 2012 Free Software
  # Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -51,7 +68,6 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  check_PROGRAMS = t-addsub$(EXEEXT) t-cmp$(EXEEXT) t-mul$(EXEEXT) \
         t-mul_i$(EXEEXT) t-tdiv$(EXEEXT) t-tdiv_ui$(EXEEXT) \
         t-fdiv$(EXEEXT) t-fdiv_ui$(EXEEXT) t-cdiv_ui$(EXEEXT) \
@@ -64,19 +80,20 @@ check_PROGRAMS = t-addsub$(EXEEXT) t-cmp$(EXEEXT) t-mul$(EXEEXT) \
         t-perfpow$(EXEEXT) t-jac$(EXEEXT) t-bin$(EXEEXT) \
         t-get_d$(EXEEXT) t-get_d_2exp$(EXEEXT) t-get_si$(EXEEXT) \
         t-set_d$(EXEEXT) t-set_si$(EXEEXT) t-fac_ui$(EXEEXT) \
-       t-fib_ui$(EXEEXT) t-lucnum_ui$(EXEEXT) t-scan$(EXEEXT) \
-       t-fits$(EXEEXT) t-divis$(EXEEXT) t-divis_2exp$(EXEEXT) \
-       t-cong$(EXEEXT) t-cong_2exp$(EXEEXT) t-sizeinbase$(EXEEXT) \
-       t-set_str$(EXEEXT) t-aorsmul$(EXEEXT) t-cmp_d$(EXEEXT) \
-       t-cmp_si$(EXEEXT) t-hamdist$(EXEEXT) t-oddeven$(EXEEXT) \
-       t-popcount$(EXEEXT) t-set_f$(EXEEXT) t-io_raw$(EXEEXT) \
-       t-import$(EXEEXT) t-export$(EXEEXT) t-pprime_p$(EXEEXT) \
-       t-nextprime$(EXEEXT)
+       t-mfac_uiui$(EXEEXT) t-primorial_ui$(EXEEXT) t-fib_ui$(EXEEXT) \
+       t-lucnum_ui$(EXEEXT) t-scan$(EXEEXT) t-fits$(EXEEXT) \
+       t-divis$(EXEEXT) t-divis_2exp$(EXEEXT) t-cong$(EXEEXT) \
+       t-cong_2exp$(EXEEXT) t-sizeinbase$(EXEEXT) t-set_str$(EXEEXT) \
+       t-aorsmul$(EXEEXT) t-cmp_d$(EXEEXT) t-cmp_si$(EXEEXT) \
+       t-hamdist$(EXEEXT) t-oddeven$(EXEEXT) t-popcount$(EXEEXT) \
+       t-set_f$(EXEEXT) t-io_raw$(EXEEXT) t-import$(EXEEXT) \
+       t-export$(EXEEXT) t-pprime_p$(EXEEXT) t-nextprime$(EXEEXT) \
+       t-remove$(EXEEXT)
  subdir = tests/mpz
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -84,297 +101,312 @@ CONFIG_HEADER = $(top_builddir)/config.h
  CONFIG_CLEAN_FILES =
  CONFIG_CLEAN_VPATH_FILES =
  bit_SOURCES = bit.c
-bit_OBJECTS = bit$U.$(OBJEXT)
+bit_OBJECTS = bit.$(OBJEXT)
  bit_LDADD = $(LDADD)
  bit_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  convert_SOURCES = convert.c
-convert_OBJECTS = convert$U.$(OBJEXT)
+convert_OBJECTS = convert.$(OBJEXT)
  convert_LDADD = $(LDADD)
  convert_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  dive_SOURCES = dive.c
-dive_OBJECTS = dive$U.$(OBJEXT)
+dive_OBJECTS = dive.$(OBJEXT)
  dive_LDADD = $(LDADD)
  dive_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  dive_ui_SOURCES = dive_ui.c
-dive_ui_OBJECTS = dive_ui$U.$(OBJEXT)
+dive_ui_OBJECTS = dive_ui.$(OBJEXT)
  dive_ui_LDADD = $(LDADD)
  dive_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  io_SOURCES = io.c
-io_OBJECTS = io$U.$(OBJEXT)
+io_OBJECTS = io.$(OBJEXT)
  io_LDADD = $(LDADD)
  io_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  logic_SOURCES = logic.c
-logic_OBJECTS = logic$U.$(OBJEXT)
+logic_OBJECTS = logic.$(OBJEXT)
  logic_LDADD = $(LDADD)
  logic_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  reuse_SOURCES = reuse.c
-reuse_OBJECTS = reuse$U.$(OBJEXT)
+reuse_OBJECTS = reuse.$(OBJEXT)
  reuse_LDADD = $(LDADD)
  reuse_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_addsub_SOURCES = t-addsub.c
-t_addsub_OBJECTS = t-addsub$U.$(OBJEXT)
+t_addsub_OBJECTS = t-addsub.$(OBJEXT)
  t_addsub_LDADD = $(LDADD)
  t_addsub_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_aorsmul_SOURCES = t-aorsmul.c
-t_aorsmul_OBJECTS = t-aorsmul$U.$(OBJEXT)
+t_aorsmul_OBJECTS = t-aorsmul.$(OBJEXT)
  t_aorsmul_LDADD = $(LDADD)
  t_aorsmul_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_bin_SOURCES = t-bin.c
-t_bin_OBJECTS = t-bin$U.$(OBJEXT)
+t_bin_OBJECTS = t-bin.$(OBJEXT)
  t_bin_LDADD = $(LDADD)
  t_bin_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cdiv_ui_SOURCES = t-cdiv_ui.c
-t_cdiv_ui_OBJECTS = t-cdiv_ui$U.$(OBJEXT)
+t_cdiv_ui_OBJECTS = t-cdiv_ui.$(OBJEXT)
  t_cdiv_ui_LDADD = $(LDADD)
  t_cdiv_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cmp_SOURCES = t-cmp.c
-t_cmp_OBJECTS = t-cmp$U.$(OBJEXT)
+t_cmp_OBJECTS = t-cmp.$(OBJEXT)
  t_cmp_LDADD = $(LDADD)
  t_cmp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cmp_d_SOURCES = t-cmp_d.c
-t_cmp_d_OBJECTS = t-cmp_d$U.$(OBJEXT)
+t_cmp_d_OBJECTS = t-cmp_d.$(OBJEXT)
  t_cmp_d_LDADD = $(LDADD)
  t_cmp_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cmp_si_SOURCES = t-cmp_si.c
-t_cmp_si_OBJECTS = t-cmp_si$U.$(OBJEXT)
+t_cmp_si_OBJECTS = t-cmp_si.$(OBJEXT)
  t_cmp_si_LDADD = $(LDADD)
  t_cmp_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cong_SOURCES = t-cong.c
-t_cong_OBJECTS = t-cong$U.$(OBJEXT)
+t_cong_OBJECTS = t-cong.$(OBJEXT)
  t_cong_LDADD = $(LDADD)
  t_cong_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_cong_2exp_SOURCES = t-cong_2exp.c
-t_cong_2exp_OBJECTS = t-cong_2exp$U.$(OBJEXT)
+t_cong_2exp_OBJECTS = t-cong_2exp.$(OBJEXT)
  t_cong_2exp_LDADD = $(LDADD)
  t_cong_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_div_2exp_SOURCES = t-div_2exp.c
-t_div_2exp_OBJECTS = t-div_2exp$U.$(OBJEXT)
+t_div_2exp_OBJECTS = t-div_2exp.$(OBJEXT)
  t_div_2exp_LDADD = $(LDADD)
  t_div_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_divis_SOURCES = t-divis.c
-t_divis_OBJECTS = t-divis$U.$(OBJEXT)
+t_divis_OBJECTS = t-divis.$(OBJEXT)
  t_divis_LDADD = $(LDADD)
  t_divis_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_divis_2exp_SOURCES = t-divis_2exp.c
-t_divis_2exp_OBJECTS = t-divis_2exp$U.$(OBJEXT)
+t_divis_2exp_OBJECTS = t-divis_2exp.$(OBJEXT)
  t_divis_2exp_LDADD = $(LDADD)
  t_divis_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_export_SOURCES = t-export.c
-t_export_OBJECTS = t-export$U.$(OBJEXT)
+t_export_OBJECTS = t-export.$(OBJEXT)
  t_export_LDADD = $(LDADD)
  t_export_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_fac_ui_SOURCES = t-fac_ui.c
-t_fac_ui_OBJECTS = t-fac_ui$U.$(OBJEXT)
+t_fac_ui_OBJECTS = t-fac_ui.$(OBJEXT)
  t_fac_ui_LDADD = $(LDADD)
  t_fac_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_fdiv_SOURCES = t-fdiv.c
-t_fdiv_OBJECTS = t-fdiv$U.$(OBJEXT)
+t_fdiv_OBJECTS = t-fdiv.$(OBJEXT)
  t_fdiv_LDADD = $(LDADD)
  t_fdiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_fdiv_ui_SOURCES = t-fdiv_ui.c
-t_fdiv_ui_OBJECTS = t-fdiv_ui$U.$(OBJEXT)
+t_fdiv_ui_OBJECTS = t-fdiv_ui.$(OBJEXT)
  t_fdiv_ui_LDADD = $(LDADD)
  t_fdiv_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_fib_ui_SOURCES = t-fib_ui.c
-t_fib_ui_OBJECTS = t-fib_ui$U.$(OBJEXT)
+t_fib_ui_OBJECTS = t-fib_ui.$(OBJEXT)
  t_fib_ui_LDADD = $(LDADD)
  t_fib_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_fits_SOURCES = t-fits.c
-t_fits_OBJECTS = t-fits$U.$(OBJEXT)
+t_fits_OBJECTS = t-fits.$(OBJEXT)
  t_fits_LDADD = $(LDADD)
  t_fits_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_gcd_SOURCES = t-gcd.c
-t_gcd_OBJECTS = t-gcd$U.$(OBJEXT)
+t_gcd_OBJECTS = t-gcd.$(OBJEXT)
  t_gcd_LDADD = $(LDADD)
  t_gcd_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_gcd_ui_SOURCES = t-gcd_ui.c
-t_gcd_ui_OBJECTS = t-gcd_ui$U.$(OBJEXT)
+t_gcd_ui_OBJECTS = t-gcd_ui.$(OBJEXT)
  t_gcd_ui_LDADD = $(LDADD)
  t_gcd_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_d_SOURCES = t-get_d.c
-t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_OBJECTS = t-get_d.$(OBJEXT)
  t_get_d_LDADD = $(LDADD)
  t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_d_2exp_SOURCES = t-get_d_2exp.c
-t_get_d_2exp_OBJECTS = t-get_d_2exp$U.$(OBJEXT)
+t_get_d_2exp_OBJECTS = t-get_d_2exp.$(OBJEXT)
  t_get_d_2exp_LDADD = $(LDADD)
  t_get_d_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_get_si_SOURCES = t-get_si.c
-t_get_si_OBJECTS = t-get_si$U.$(OBJEXT)
+t_get_si_OBJECTS = t-get_si.$(OBJEXT)
  t_get_si_LDADD = $(LDADD)
  t_get_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_hamdist_SOURCES = t-hamdist.c
-t_hamdist_OBJECTS = t-hamdist$U.$(OBJEXT)
+t_hamdist_OBJECTS = t-hamdist.$(OBJEXT)
  t_hamdist_LDADD = $(LDADD)
  t_hamdist_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_import_SOURCES = t-import.c
-t_import_OBJECTS = t-import$U.$(OBJEXT)
+t_import_OBJECTS = t-import.$(OBJEXT)
  t_import_LDADD = $(LDADD)
  t_import_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_inp_str_SOURCES = t-inp_str.c
-t_inp_str_OBJECTS = t-inp_str$U.$(OBJEXT)
+t_inp_str_OBJECTS = t-inp_str.$(OBJEXT)
  t_inp_str_LDADD = $(LDADD)
  t_inp_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_invert_SOURCES = t-invert.c
-t_invert_OBJECTS = t-invert$U.$(OBJEXT)
+t_invert_OBJECTS = t-invert.$(OBJEXT)
  t_invert_LDADD = $(LDADD)
  t_invert_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_io_raw_SOURCES = t-io_raw.c
-t_io_raw_OBJECTS = t-io_raw$U.$(OBJEXT)
+t_io_raw_OBJECTS = t-io_raw.$(OBJEXT)
  t_io_raw_LDADD = $(LDADD)
  t_io_raw_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_jac_SOURCES = t-jac.c
-t_jac_OBJECTS = t-jac$U.$(OBJEXT)
+t_jac_OBJECTS = t-jac.$(OBJEXT)
  t_jac_LDADD = $(LDADD)
  t_jac_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_lcm_SOURCES = t-lcm.c
-t_lcm_OBJECTS = t-lcm$U.$(OBJEXT)
+t_lcm_OBJECTS = t-lcm.$(OBJEXT)
  t_lcm_LDADD = $(LDADD)
  t_lcm_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_lucnum_ui_SOURCES = t-lucnum_ui.c
-t_lucnum_ui_OBJECTS = t-lucnum_ui$U.$(OBJEXT)
+t_lucnum_ui_OBJECTS = t-lucnum_ui.$(OBJEXT)
  t_lucnum_ui_LDADD = $(LDADD)
  t_lucnum_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_mfac_uiui_SOURCES = t-mfac_uiui.c
+t_mfac_uiui_OBJECTS = t-mfac_uiui.$(OBJEXT)
+t_mfac_uiui_LDADD = $(LDADD)
+t_mfac_uiui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_mul_SOURCES = t-mul.c
-t_mul_OBJECTS = t-mul$U.$(OBJEXT)
+t_mul_OBJECTS = t-mul.$(OBJEXT)
  t_mul_LDADD = $(LDADD)
  t_mul_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_mul_i_SOURCES = t-mul_i.c
-t_mul_i_OBJECTS = t-mul_i$U.$(OBJEXT)
+t_mul_i_OBJECTS = t-mul_i.$(OBJEXT)
  t_mul_i_LDADD = $(LDADD)
  t_mul_i_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_nextprime_SOURCES = t-nextprime.c
-t_nextprime_OBJECTS = t-nextprime$U.$(OBJEXT)
+t_nextprime_OBJECTS = t-nextprime.$(OBJEXT)
  t_nextprime_LDADD = $(LDADD)
  t_nextprime_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_oddeven_SOURCES = t-oddeven.c
-t_oddeven_OBJECTS = t-oddeven$U.$(OBJEXT)
+t_oddeven_OBJECTS = t-oddeven.$(OBJEXT)
  t_oddeven_LDADD = $(LDADD)
  t_oddeven_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_perfpow_SOURCES = t-perfpow.c
-t_perfpow_OBJECTS = t-perfpow$U.$(OBJEXT)
+t_perfpow_OBJECTS = t-perfpow.$(OBJEXT)
  t_perfpow_LDADD = $(LDADD)
  t_perfpow_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_perfsqr_SOURCES = t-perfsqr.c
-t_perfsqr_OBJECTS = t-perfsqr$U.$(OBJEXT)
+t_perfsqr_OBJECTS = t-perfsqr.$(OBJEXT)
  t_perfsqr_LDADD = $(LDADD)
  t_perfsqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_popcount_SOURCES = t-popcount.c
-t_popcount_OBJECTS = t-popcount$U.$(OBJEXT)
+t_popcount_OBJECTS = t-popcount.$(OBJEXT)
  t_popcount_LDADD = $(LDADD)
  t_popcount_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_pow_SOURCES = t-pow.c
-t_pow_OBJECTS = t-pow$U.$(OBJEXT)
+t_pow_OBJECTS = t-pow.$(OBJEXT)
  t_pow_LDADD = $(LDADD)
  t_pow_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_powm_SOURCES = t-powm.c
-t_powm_OBJECTS = t-powm$U.$(OBJEXT)
+t_powm_OBJECTS = t-powm.$(OBJEXT)
  t_powm_LDADD = $(LDADD)
  t_powm_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_powm_ui_SOURCES = t-powm_ui.c
-t_powm_ui_OBJECTS = t-powm_ui$U.$(OBJEXT)
+t_powm_ui_OBJECTS = t-powm_ui.$(OBJEXT)
  t_powm_ui_LDADD = $(LDADD)
  t_powm_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_pprime_p_SOURCES = t-pprime_p.c
-t_pprime_p_OBJECTS = t-pprime_p$U.$(OBJEXT)
+t_pprime_p_OBJECTS = t-pprime_p.$(OBJEXT)
  t_pprime_p_LDADD = $(LDADD)
  t_pprime_p_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
+t_primorial_ui_SOURCES = t-primorial_ui.c
+t_primorial_ui_OBJECTS = t-primorial_ui.$(OBJEXT)
+t_primorial_ui_LDADD = $(LDADD)
+t_primorial_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
+t_remove_SOURCES = t-remove.c
+t_remove_OBJECTS = t-remove.$(OBJEXT)
+t_remove_LDADD = $(LDADD)
+t_remove_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+       $(top_builddir)/libgmp.la
  t_root_SOURCES = t-root.c
-t_root_OBJECTS = t-root$U.$(OBJEXT)
+t_root_OBJECTS = t-root.$(OBJEXT)
  t_root_LDADD = $(LDADD)
  t_root_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_scan_SOURCES = t-scan.c
-t_scan_OBJECTS = t-scan$U.$(OBJEXT)
+t_scan_OBJECTS = t-scan.$(OBJEXT)
  t_scan_LDADD = $(LDADD)
  t_scan_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_d_SOURCES = t-set_d.c
-t_set_d_OBJECTS = t-set_d$U.$(OBJEXT)
+t_set_d_OBJECTS = t-set_d.$(OBJEXT)
  t_set_d_LDADD = $(LDADD)
  t_set_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_f_SOURCES = t-set_f.c
-t_set_f_OBJECTS = t-set_f$U.$(OBJEXT)
+t_set_f_OBJECTS = t-set_f.$(OBJEXT)
  t_set_f_LDADD = $(LDADD)
  t_set_f_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_si_SOURCES = t-set_si.c
-t_set_si_OBJECTS = t-set_si$U.$(OBJEXT)
+t_set_si_OBJECTS = t-set_si.$(OBJEXT)
  t_set_si_LDADD = $(LDADD)
  t_set_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_set_str_SOURCES = t-set_str.c
-t_set_str_OBJECTS = t-set_str$U.$(OBJEXT)
+t_set_str_OBJECTS = t-set_str.$(OBJEXT)
  t_set_str_LDADD = $(LDADD)
  t_set_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_sizeinbase_SOURCES = t-sizeinbase.c
-t_sizeinbase_OBJECTS = t-sizeinbase$U.$(OBJEXT)
+t_sizeinbase_OBJECTS = t-sizeinbase.$(OBJEXT)
  t_sizeinbase_LDADD = $(LDADD)
  t_sizeinbase_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_sqrtrem_SOURCES = t-sqrtrem.c
-t_sqrtrem_OBJECTS = t-sqrtrem$U.$(OBJEXT)
+t_sqrtrem_OBJECTS = t-sqrtrem.$(OBJEXT)
  t_sqrtrem_LDADD = $(LDADD)
  t_sqrtrem_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_tdiv_SOURCES = t-tdiv.c
-t_tdiv_OBJECTS = t-tdiv$U.$(OBJEXT)
+t_tdiv_OBJECTS = t-tdiv.$(OBJEXT)
  t_tdiv_LDADD = $(LDADD)
  t_tdiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_tdiv_ui_SOURCES = t-tdiv_ui.c
-t_tdiv_ui_OBJECTS = t-tdiv_ui$U.$(OBJEXT)
+t_tdiv_ui_OBJECTS = t-tdiv_ui.$(OBJEXT)
  t_tdiv_ui_LDADD = $(LDADD)
  t_tdiv_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
@@ -396,10 +428,11 @@ SOURCES = bit.c convert.c dive.c dive_ui.c io.c logic.c reuse.c \
         t-divis_2exp.c t-export.c t-fac_ui.c t-fdiv.c t-fdiv_ui.c \
         t-fib_ui.c t-fits.c t-gcd.c t-gcd_ui.c t-get_d.c \
         t-get_d_2exp.c t-get_si.c t-hamdist.c t-import.c t-inp_str.c \
-       t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c t-mul.c \
-       t-mul_i.c t-nextprime.c t-oddeven.c t-perfpow.c t-perfsqr.c \
-       t-popcount.c t-pow.c t-powm.c t-powm_ui.c t-pprime_p.c \
-       t-root.c t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
+       t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c \
+       t-mfac_uiui.c t-mul.c t-mul_i.c t-nextprime.c t-oddeven.c \
+       t-perfpow.c t-perfsqr.c t-popcount.c t-pow.c t-powm.c \
+       t-powm_ui.c t-pprime_p.c t-primorial_ui.c t-remove.c t-root.c \
+       t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
         t-sizeinbase.c t-sqrtrem.c t-tdiv.c t-tdiv_ui.c
  DIST_SOURCES = bit.c convert.c dive.c dive_ui.c io.c logic.c reuse.c \
         t-addsub.c t-aorsmul.c t-bin.c t-cdiv_ui.c t-cmp.c t-cmp_d.c \
@@ -407,11 +440,17 @@ DIST_SOURCES = bit.c convert.c dive.c dive_ui.c io.c logic.c reuse.c \
         t-divis_2exp.c t-export.c t-fac_ui.c t-fdiv.c t-fdiv_ui.c \
         t-fib_ui.c t-fits.c t-gcd.c t-gcd_ui.c t-get_d.c \
         t-get_d_2exp.c t-get_si.c t-hamdist.c t-import.c t-inp_str.c \
-       t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c t-mul.c \
-       t-mul_i.c t-nextprime.c t-oddeven.c t-perfpow.c t-perfsqr.c \
-       t-popcount.c t-pow.c t-powm.c t-powm_ui.c t-pprime_p.c \
-       t-root.c t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
+       t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c \
+       t-mfac_uiui.c t-mul.c t-mul_i.c t-nextprime.c t-oddeven.c \
+       t-perfpow.c t-perfsqr.c t-popcount.c t-pow.c t-powm.c \
+       t-powm_ui.c t-pprime_p.c t-primorial_ui.c t-remove.c t-root.c \
+       t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
         t-sizeinbase.c t-sqrtrem.c t-tdiv.c t-tdiv_ui.c
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  am__tty_colors = \
@@ -515,8 +554,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -563,7 +602,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -628,181 +666,190 @@ clean-checkPROGRAMS:
         list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
         echo " rm -f" $$list; \
         rm -f $$list
-bit$(EXEEXT): $(bit_OBJECTS) $(bit_DEPENDENCIES) 
+bit$(EXEEXT): $(bit_OBJECTS) $(bit_DEPENDENCIES) $(EXTRA_bit_DEPENDENCIES) 
         @rm -f bit$(EXEEXT)
         $(LINK) $(bit_OBJECTS) $(bit_LDADD) $(LIBS)
-convert$(EXEEXT): $(convert_OBJECTS) $(convert_DEPENDENCIES) 
+convert$(EXEEXT): $(convert_OBJECTS) $(convert_DEPENDENCIES) $(EXTRA_convert_DEPENDENCIES) 
         @rm -f convert$(EXEEXT)
         $(LINK) $(convert_OBJECTS) $(convert_LDADD) $(LIBS)
-dive$(EXEEXT): $(dive_OBJECTS) $(dive_DEPENDENCIES) 
+dive$(EXEEXT): $(dive_OBJECTS) $(dive_DEPENDENCIES) $(EXTRA_dive_DEPENDENCIES) 
         @rm -f dive$(EXEEXT)
         $(LINK) $(dive_OBJECTS) $(dive_LDADD) $(LIBS)
-dive_ui$(EXEEXT): $(dive_ui_OBJECTS) $(dive_ui_DEPENDENCIES) 
+dive_ui$(EXEEXT): $(dive_ui_OBJECTS) $(dive_ui_DEPENDENCIES) $(EXTRA_dive_ui_DEPENDENCIES) 
         @rm -f dive_ui$(EXEEXT)
         $(LINK) $(dive_ui_OBJECTS) $(dive_ui_LDADD) $(LIBS)
-io$(EXEEXT): $(io_OBJECTS) $(io_DEPENDENCIES) 
+io$(EXEEXT): $(io_OBJECTS) $(io_DEPENDENCIES) $(EXTRA_io_DEPENDENCIES) 
         @rm -f io$(EXEEXT)
         $(LINK) $(io_OBJECTS) $(io_LDADD) $(LIBS)
-logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES) 
+logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES) $(EXTRA_logic_DEPENDENCIES) 
         @rm -f logic$(EXEEXT)
         $(LINK) $(logic_OBJECTS) $(logic_LDADD) $(LIBS)
-reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) 
+reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) $(EXTRA_reuse_DEPENDENCIES) 
         @rm -f reuse$(EXEEXT)
         $(LINK) $(reuse_OBJECTS) $(reuse_LDADD) $(LIBS)
-t-addsub$(EXEEXT): $(t_addsub_OBJECTS) $(t_addsub_DEPENDENCIES) 
+t-addsub$(EXEEXT): $(t_addsub_OBJECTS) $(t_addsub_DEPENDENCIES) $(EXTRA_t_addsub_DEPENDENCIES) 
         @rm -f t-addsub$(EXEEXT)
         $(LINK) $(t_addsub_OBJECTS) $(t_addsub_LDADD) $(LIBS)
-t-aorsmul$(EXEEXT): $(t_aorsmul_OBJECTS) $(t_aorsmul_DEPENDENCIES) 
+t-aorsmul$(EXEEXT): $(t_aorsmul_OBJECTS) $(t_aorsmul_DEPENDENCIES) $(EXTRA_t_aorsmul_DEPENDENCIES) 
         @rm -f t-aorsmul$(EXEEXT)
         $(LINK) $(t_aorsmul_OBJECTS) $(t_aorsmul_LDADD) $(LIBS)
-t-bin$(EXEEXT): $(t_bin_OBJECTS) $(t_bin_DEPENDENCIES) 
+t-bin$(EXEEXT): $(t_bin_OBJECTS) $(t_bin_DEPENDENCIES) $(EXTRA_t_bin_DEPENDENCIES) 
         @rm -f t-bin$(EXEEXT)
         $(LINK) $(t_bin_OBJECTS) $(t_bin_LDADD) $(LIBS)
-t-cdiv_ui$(EXEEXT): $(t_cdiv_ui_OBJECTS) $(t_cdiv_ui_DEPENDENCIES) 
+t-cdiv_ui$(EXEEXT): $(t_cdiv_ui_OBJECTS) $(t_cdiv_ui_DEPENDENCIES) $(EXTRA_t_cdiv_ui_DEPENDENCIES) 
         @rm -f t-cdiv_ui$(EXEEXT)
         $(LINK) $(t_cdiv_ui_OBJECTS) $(t_cdiv_ui_LDADD) $(LIBS)
-t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES) 
+t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES) $(EXTRA_t_cmp_DEPENDENCIES) 
         @rm -f t-cmp$(EXEEXT)
         $(LINK) $(t_cmp_OBJECTS) $(t_cmp_LDADD) $(LIBS)
-t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES) 
+t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES) $(EXTRA_t_cmp_d_DEPENDENCIES) 
         @rm -f t-cmp_d$(EXEEXT)
         $(LINK) $(t_cmp_d_OBJECTS) $(t_cmp_d_LDADD) $(LIBS)
-t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) 
+t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) $(EXTRA_t_cmp_si_DEPENDENCIES) 
         @rm -f t-cmp_si$(EXEEXT)
         $(LINK) $(t_cmp_si_OBJECTS) $(t_cmp_si_LDADD) $(LIBS)
-t-cong$(EXEEXT): $(t_cong_OBJECTS) $(t_cong_DEPENDENCIES) 
+t-cong$(EXEEXT): $(t_cong_OBJECTS) $(t_cong_DEPENDENCIES) $(EXTRA_t_cong_DEPENDENCIES) 
         @rm -f t-cong$(EXEEXT)
         $(LINK) $(t_cong_OBJECTS) $(t_cong_LDADD) $(LIBS)
-t-cong_2exp$(EXEEXT): $(t_cong_2exp_OBJECTS) $(t_cong_2exp_DEPENDENCIES) 
+t-cong_2exp$(EXEEXT): $(t_cong_2exp_OBJECTS) $(t_cong_2exp_DEPENDENCIES) $(EXTRA_t_cong_2exp_DEPENDENCIES) 
         @rm -f t-cong_2exp$(EXEEXT)
         $(LINK) $(t_cong_2exp_OBJECTS) $(t_cong_2exp_LDADD) $(LIBS)
-t-div_2exp$(EXEEXT): $(t_div_2exp_OBJECTS) $(t_div_2exp_DEPENDENCIES) 
+t-div_2exp$(EXEEXT): $(t_div_2exp_OBJECTS) $(t_div_2exp_DEPENDENCIES) $(EXTRA_t_div_2exp_DEPENDENCIES) 
         @rm -f t-div_2exp$(EXEEXT)
         $(LINK) $(t_div_2exp_OBJECTS) $(t_div_2exp_LDADD) $(LIBS)
-t-divis$(EXEEXT): $(t_divis_OBJECTS) $(t_divis_DEPENDENCIES) 
+t-divis$(EXEEXT): $(t_divis_OBJECTS) $(t_divis_DEPENDENCIES) $(EXTRA_t_divis_DEPENDENCIES) 
         @rm -f t-divis$(EXEEXT)
         $(LINK) $(t_divis_OBJECTS) $(t_divis_LDADD) $(LIBS)
-t-divis_2exp$(EXEEXT): $(t_divis_2exp_OBJECTS) $(t_divis_2exp_DEPENDENCIES) 
+t-divis_2exp$(EXEEXT): $(t_divis_2exp_OBJECTS) $(t_divis_2exp_DEPENDENCIES) $(EXTRA_t_divis_2exp_DEPENDENCIES) 
         @rm -f t-divis_2exp$(EXEEXT)
         $(LINK) $(t_divis_2exp_OBJECTS) $(t_divis_2exp_LDADD) $(LIBS)
-t-export$(EXEEXT): $(t_export_OBJECTS) $(t_export_DEPENDENCIES) 
+t-export$(EXEEXT): $(t_export_OBJECTS) $(t_export_DEPENDENCIES) $(EXTRA_t_export_DEPENDENCIES) 
         @rm -f t-export$(EXEEXT)
         $(LINK) $(t_export_OBJECTS) $(t_export_LDADD) $(LIBS)
-t-fac_ui$(EXEEXT): $(t_fac_ui_OBJECTS) $(t_fac_ui_DEPENDENCIES) 
+t-fac_ui$(EXEEXT): $(t_fac_ui_OBJECTS) $(t_fac_ui_DEPENDENCIES) $(EXTRA_t_fac_ui_DEPENDENCIES) 
         @rm -f t-fac_ui$(EXEEXT)
         $(LINK) $(t_fac_ui_OBJECTS) $(t_fac_ui_LDADD) $(LIBS)
-t-fdiv$(EXEEXT): $(t_fdiv_OBJECTS) $(t_fdiv_DEPENDENCIES) 
+t-fdiv$(EXEEXT): $(t_fdiv_OBJECTS) $(t_fdiv_DEPENDENCIES) $(EXTRA_t_fdiv_DEPENDENCIES) 
         @rm -f t-fdiv$(EXEEXT)
         $(LINK) $(t_fdiv_OBJECTS) $(t_fdiv_LDADD) $(LIBS)
-t-fdiv_ui$(EXEEXT): $(t_fdiv_ui_OBJECTS) $(t_fdiv_ui_DEPENDENCIES) 
+t-fdiv_ui$(EXEEXT): $(t_fdiv_ui_OBJECTS) $(t_fdiv_ui_DEPENDENCIES) $(EXTRA_t_fdiv_ui_DEPENDENCIES) 
         @rm -f t-fdiv_ui$(EXEEXT)
         $(LINK) $(t_fdiv_ui_OBJECTS) $(t_fdiv_ui_LDADD) $(LIBS)
-t-fib_ui$(EXEEXT): $(t_fib_ui_OBJECTS) $(t_fib_ui_DEPENDENCIES) 
+t-fib_ui$(EXEEXT): $(t_fib_ui_OBJECTS) $(t_fib_ui_DEPENDENCIES) $(EXTRA_t_fib_ui_DEPENDENCIES) 
         @rm -f t-fib_ui$(EXEEXT)
         $(LINK) $(t_fib_ui_OBJECTS) $(t_fib_ui_LDADD) $(LIBS)
-t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES) 
+t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES) $(EXTRA_t_fits_DEPENDENCIES) 
         @rm -f t-fits$(EXEEXT)
         $(LINK) $(t_fits_OBJECTS) $(t_fits_LDADD) $(LIBS)
-t-gcd$(EXEEXT): $(t_gcd_OBJECTS) $(t_gcd_DEPENDENCIES) 
+t-gcd$(EXEEXT): $(t_gcd_OBJECTS) $(t_gcd_DEPENDENCIES) $(EXTRA_t_gcd_DEPENDENCIES) 
         @rm -f t-gcd$(EXEEXT)
         $(LINK) $(t_gcd_OBJECTS) $(t_gcd_LDADD) $(LIBS)
-t-gcd_ui$(EXEEXT): $(t_gcd_ui_OBJECTS) $(t_gcd_ui_DEPENDENCIES) 
+t-gcd_ui$(EXEEXT): $(t_gcd_ui_OBJECTS) $(t_gcd_ui_DEPENDENCIES) $(EXTRA_t_gcd_ui_DEPENDENCIES) 
         @rm -f t-gcd_ui$(EXEEXT)
         $(LINK) $(t_gcd_ui_OBJECTS) $(t_gcd_ui_LDADD) $(LIBS)
-t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) 
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) $(EXTRA_t_get_d_DEPENDENCIES) 
         @rm -f t-get_d$(EXEEXT)
         $(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
-t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES) 
+t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES) $(EXTRA_t_get_d_2exp_DEPENDENCIES) 
         @rm -f t-get_d_2exp$(EXEEXT)
         $(LINK) $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_LDADD) $(LIBS)
-t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES) 
+t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES) $(EXTRA_t_get_si_DEPENDENCIES) 
         @rm -f t-get_si$(EXEEXT)
         $(LINK) $(t_get_si_OBJECTS) $(t_get_si_LDADD) $(LIBS)
-t-hamdist$(EXEEXT): $(t_hamdist_OBJECTS) $(t_hamdist_DEPENDENCIES) 
+t-hamdist$(EXEEXT): $(t_hamdist_OBJECTS) $(t_hamdist_DEPENDENCIES) $(EXTRA_t_hamdist_DEPENDENCIES) 
         @rm -f t-hamdist$(EXEEXT)
         $(LINK) $(t_hamdist_OBJECTS) $(t_hamdist_LDADD) $(LIBS)
-t-import$(EXEEXT): $(t_import_OBJECTS) $(t_import_DEPENDENCIES) 
+t-import$(EXEEXT): $(t_import_OBJECTS) $(t_import_DEPENDENCIES) $(EXTRA_t_import_DEPENDENCIES) 
         @rm -f t-import$(EXEEXT)
         $(LINK) $(t_import_OBJECTS) $(t_import_LDADD) $(LIBS)
-t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) 
+t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) $(EXTRA_t_inp_str_DEPENDENCIES) 
         @rm -f t-inp_str$(EXEEXT)
         $(LINK) $(t_inp_str_OBJECTS) $(t_inp_str_LDADD) $(LIBS)
-t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES) 
+t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES) $(EXTRA_t_invert_DEPENDENCIES) 
         @rm -f t-invert$(EXEEXT)
         $(LINK) $(t_invert_OBJECTS) $(t_invert_LDADD) $(LIBS)
-t-io_raw$(EXEEXT): $(t_io_raw_OBJECTS) $(t_io_raw_DEPENDENCIES) 
+t-io_raw$(EXEEXT): $(t_io_raw_OBJECTS) $(t_io_raw_DEPENDENCIES) $(EXTRA_t_io_raw_DEPENDENCIES) 
         @rm -f t-io_raw$(EXEEXT)
         $(LINK) $(t_io_raw_OBJECTS) $(t_io_raw_LDADD) $(LIBS)
-t-jac$(EXEEXT): $(t_jac_OBJECTS) $(t_jac_DEPENDENCIES) 
+t-jac$(EXEEXT): $(t_jac_OBJECTS) $(t_jac_DEPENDENCIES) $(EXTRA_t_jac_DEPENDENCIES) 
         @rm -f t-jac$(EXEEXT)
         $(LINK) $(t_jac_OBJECTS) $(t_jac_LDADD) $(LIBS)
-t-lcm$(EXEEXT): $(t_lcm_OBJECTS) $(t_lcm_DEPENDENCIES) 
+t-lcm$(EXEEXT): $(t_lcm_OBJECTS) $(t_lcm_DEPENDENCIES) $(EXTRA_t_lcm_DEPENDENCIES) 
         @rm -f t-lcm$(EXEEXT)
         $(LINK) $(t_lcm_OBJECTS) $(t_lcm_LDADD) $(LIBS)
-t-lucnum_ui$(EXEEXT): $(t_lucnum_ui_OBJECTS) $(t_lucnum_ui_DEPENDENCIES) 
+t-lucnum_ui$(EXEEXT): $(t_lucnum_ui_OBJECTS) $(t_lucnum_ui_DEPENDENCIES) $(EXTRA_t_lucnum_ui_DEPENDENCIES) 
         @rm -f t-lucnum_ui$(EXEEXT)
         $(LINK) $(t_lucnum_ui_OBJECTS) $(t_lucnum_ui_LDADD) $(LIBS)
-t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES) 
+t-mfac_uiui$(EXEEXT): $(t_mfac_uiui_OBJECTS) $(t_mfac_uiui_DEPENDENCIES) $(EXTRA_t_mfac_uiui_DEPENDENCIES) 
+       @rm -f t-mfac_uiui$(EXEEXT)
+       $(LINK) $(t_mfac_uiui_OBJECTS) $(t_mfac_uiui_LDADD) $(LIBS)
+t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES) $(EXTRA_t_mul_DEPENDENCIES) 
         @rm -f t-mul$(EXEEXT)
         $(LINK) $(t_mul_OBJECTS) $(t_mul_LDADD) $(LIBS)
-t-mul_i$(EXEEXT): $(t_mul_i_OBJECTS) $(t_mul_i_DEPENDENCIES) 
+t-mul_i$(EXEEXT): $(t_mul_i_OBJECTS) $(t_mul_i_DEPENDENCIES) $(EXTRA_t_mul_i_DEPENDENCIES) 
         @rm -f t-mul_i$(EXEEXT)
         $(LINK) $(t_mul_i_OBJECTS) $(t_mul_i_LDADD) $(LIBS)
-t-nextprime$(EXEEXT): $(t_nextprime_OBJECTS) $(t_nextprime_DEPENDENCIES) 
+t-nextprime$(EXEEXT): $(t_nextprime_OBJECTS) $(t_nextprime_DEPENDENCIES) $(EXTRA_t_nextprime_DEPENDENCIES) 
         @rm -f t-nextprime$(EXEEXT)
         $(LINK) $(t_nextprime_OBJECTS) $(t_nextprime_LDADD) $(LIBS)
-t-oddeven$(EXEEXT): $(t_oddeven_OBJECTS) $(t_oddeven_DEPENDENCIES) 
+t-oddeven$(EXEEXT): $(t_oddeven_OBJECTS) $(t_oddeven_DEPENDENCIES) $(EXTRA_t_oddeven_DEPENDENCIES) 
         @rm -f t-oddeven$(EXEEXT)
         $(LINK) $(t_oddeven_OBJECTS) $(t_oddeven_LDADD) $(LIBS)
-t-perfpow$(EXEEXT): $(t_perfpow_OBJECTS) $(t_perfpow_DEPENDENCIES) 
+t-perfpow$(EXEEXT): $(t_perfpow_OBJECTS) $(t_perfpow_DEPENDENCIES) $(EXTRA_t_perfpow_DEPENDENCIES) 
         @rm -f t-perfpow$(EXEEXT)
         $(LINK) $(t_perfpow_OBJECTS) $(t_perfpow_LDADD) $(LIBS)
-t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES) 
+t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES) $(EXTRA_t_perfsqr_DEPENDENCIES) 
         @rm -f t-perfsqr$(EXEEXT)
         $(LINK) $(t_perfsqr_OBJECTS) $(t_perfsqr_LDADD) $(LIBS)
-t-popcount$(EXEEXT): $(t_popcount_OBJECTS) $(t_popcount_DEPENDENCIES) 
+t-popcount$(EXEEXT): $(t_popcount_OBJECTS) $(t_popcount_DEPENDENCIES) $(EXTRA_t_popcount_DEPENDENCIES) 
         @rm -f t-popcount$(EXEEXT)
         $(LINK) $(t_popcount_OBJECTS) $(t_popcount_LDADD) $(LIBS)
-t-pow$(EXEEXT): $(t_pow_OBJECTS) $(t_pow_DEPENDENCIES) 
+t-pow$(EXEEXT): $(t_pow_OBJECTS) $(t_pow_DEPENDENCIES) $(EXTRA_t_pow_DEPENDENCIES) 
         @rm -f t-pow$(EXEEXT)
         $(LINK) $(t_pow_OBJECTS) $(t_pow_LDADD) $(LIBS)
-t-powm$(EXEEXT): $(t_powm_OBJECTS) $(t_powm_DEPENDENCIES) 
+t-powm$(EXEEXT): $(t_powm_OBJECTS) $(t_powm_DEPENDENCIES) $(EXTRA_t_powm_DEPENDENCIES) 
         @rm -f t-powm$(EXEEXT)
         $(LINK) $(t_powm_OBJECTS) $(t_powm_LDADD) $(LIBS)
-t-powm_ui$(EXEEXT): $(t_powm_ui_OBJECTS) $(t_powm_ui_DEPENDENCIES) 
+t-powm_ui$(EXEEXT): $(t_powm_ui_OBJECTS) $(t_powm_ui_DEPENDENCIES) $(EXTRA_t_powm_ui_DEPENDENCIES) 
         @rm -f t-powm_ui$(EXEEXT)
         $(LINK) $(t_powm_ui_OBJECTS) $(t_powm_ui_LDADD) $(LIBS)
-t-pprime_p$(EXEEXT): $(t_pprime_p_OBJECTS) $(t_pprime_p_DEPENDENCIES) 
+t-pprime_p$(EXEEXT): $(t_pprime_p_OBJECTS) $(t_pprime_p_DEPENDENCIES) $(EXTRA_t_pprime_p_DEPENDENCIES) 
         @rm -f t-pprime_p$(EXEEXT)
         $(LINK) $(t_pprime_p_OBJECTS) $(t_pprime_p_LDADD) $(LIBS)
-t-root$(EXEEXT): $(t_root_OBJECTS) $(t_root_DEPENDENCIES) 
+t-primorial_ui$(EXEEXT): $(t_primorial_ui_OBJECTS) $(t_primorial_ui_DEPENDENCIES) $(EXTRA_t_primorial_ui_DEPENDENCIES) 
+       @rm -f t-primorial_ui$(EXEEXT)
+       $(LINK) $(t_primorial_ui_OBJECTS) $(t_primorial_ui_LDADD) $(LIBS)
+t-remove$(EXEEXT): $(t_remove_OBJECTS) $(t_remove_DEPENDENCIES) $(EXTRA_t_remove_DEPENDENCIES) 
+       @rm -f t-remove$(EXEEXT)
+       $(LINK) $(t_remove_OBJECTS) $(t_remove_LDADD) $(LIBS)
+t-root$(EXEEXT): $(t_root_OBJECTS) $(t_root_DEPENDENCIES) $(EXTRA_t_root_DEPENDENCIES) 
         @rm -f t-root$(EXEEXT)
         $(LINK) $(t_root_OBJECTS) $(t_root_LDADD) $(LIBS)
-t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES) 
+t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES) $(EXTRA_t_scan_DEPENDENCIES) 
         @rm -f t-scan$(EXEEXT)
         $(LINK) $(t_scan_OBJECTS) $(t_scan_LDADD) $(LIBS)
-t-set_d$(EXEEXT): $(t_set_d_OBJECTS) $(t_set_d_DEPENDENCIES) 
+t-set_d$(EXEEXT): $(t_set_d_OBJECTS) $(t_set_d_DEPENDENCIES) $(EXTRA_t_set_d_DEPENDENCIES) 
         @rm -f t-set_d$(EXEEXT)
         $(LINK) $(t_set_d_OBJECTS) $(t_set_d_LDADD) $(LIBS)
-t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES) 
+t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES) $(EXTRA_t_set_f_DEPENDENCIES) 
         @rm -f t-set_f$(EXEEXT)
         $(LINK) $(t_set_f_OBJECTS) $(t_set_f_LDADD) $(LIBS)
-t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES) 
+t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES) $(EXTRA_t_set_si_DEPENDENCIES) 
         @rm -f t-set_si$(EXEEXT)
         $(LINK) $(t_set_si_OBJECTS) $(t_set_si_LDADD) $(LIBS)
-t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES) 
+t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES) $(EXTRA_t_set_str_DEPENDENCIES) 
         @rm -f t-set_str$(EXEEXT)
         $(LINK) $(t_set_str_OBJECTS) $(t_set_str_LDADD) $(LIBS)
-t-sizeinbase$(EXEEXT): $(t_sizeinbase_OBJECTS) $(t_sizeinbase_DEPENDENCIES) 
+t-sizeinbase$(EXEEXT): $(t_sizeinbase_OBJECTS) $(t_sizeinbase_DEPENDENCIES) $(EXTRA_t_sizeinbase_DEPENDENCIES) 
         @rm -f t-sizeinbase$(EXEEXT)
         $(LINK) $(t_sizeinbase_OBJECTS) $(t_sizeinbase_LDADD) $(LIBS)
-t-sqrtrem$(EXEEXT): $(t_sqrtrem_OBJECTS) $(t_sqrtrem_DEPENDENCIES) 
+t-sqrtrem$(EXEEXT): $(t_sqrtrem_OBJECTS) $(t_sqrtrem_DEPENDENCIES) $(EXTRA_t_sqrtrem_DEPENDENCIES) 
         @rm -f t-sqrtrem$(EXEEXT)
         $(LINK) $(t_sqrtrem_OBJECTS) $(t_sqrtrem_LDADD) $(LIBS)
-t-tdiv$(EXEEXT): $(t_tdiv_OBJECTS) $(t_tdiv_DEPENDENCIES) 
+t-tdiv$(EXEEXT): $(t_tdiv_OBJECTS) $(t_tdiv_DEPENDENCIES) $(EXTRA_t_tdiv_DEPENDENCIES) 
         @rm -f t-tdiv$(EXEEXT)
         $(LINK) $(t_tdiv_OBJECTS) $(t_tdiv_LDADD) $(LIBS)
-t-tdiv_ui$(EXEEXT): $(t_tdiv_ui_OBJECTS) $(t_tdiv_ui_DEPENDENCIES) 
+t-tdiv_ui$(EXEEXT): $(t_tdiv_ui_OBJECTS) $(t_tdiv_ui_DEPENDENCIES) $(EXTRA_t_tdiv_ui_DEPENDENCIES) 
         @rm -f t-tdiv_ui$(EXEEXT)
         $(LINK) $(t_tdiv_ui_OBJECTS) $(t_tdiv_ui_LDADD) $(LIBS)
  
@@ -811,11 +858,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -825,154 +867,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-bit_.c: bit.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bit.c; then echo $(srcdir)/bit.c; else echo bit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-convert_.c: convert.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/convert.c; then echo $(srcdir)/convert.c; else echo convert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dive_.c: dive.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive.c; then echo $(srcdir)/dive.c; else echo dive.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dive_ui_.c: dive_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_ui.c; then echo $(srcdir)/dive_ui.c; else echo dive_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-io_.c: io.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/io.c; then echo $(srcdir)/io.c; else echo io.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-logic_.c: logic.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/logic.c; then echo $(srcdir)/logic.c; else echo logic.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-reuse_.c: reuse.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/reuse.c; then echo $(srcdir)/reuse.c; else echo reuse.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-addsub_.c: t-addsub.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-addsub.c; then echo $(srcdir)/t-addsub.c; else echo t-addsub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-aorsmul_.c: t-aorsmul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-aorsmul.c; then echo $(srcdir)/t-aorsmul.c; else echo t-aorsmul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-bin_.c: t-bin.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-bin.c; then echo $(srcdir)/t-bin.c; else echo t-bin.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cdiv_ui_.c: t-cdiv_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cdiv_ui.c; then echo $(srcdir)/t-cdiv_ui.c; else echo t-cdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_.c: t-cmp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp.c; then echo $(srcdir)/t-cmp.c; else echo t-cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_d_.c: t-cmp_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_d.c; then echo $(srcdir)/t-cmp_d.c; else echo t-cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_si_.c: t-cmp_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_si.c; then echo $(srcdir)/t-cmp_si.c; else echo t-cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cong_.c: t-cong.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cong.c; then echo $(srcdir)/t-cong.c; else echo t-cong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cong_2exp_.c: t-cong_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cong_2exp.c; then echo $(srcdir)/t-cong_2exp.c; else echo t-cong_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-div_2exp_.c: t-div_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-div_2exp.c; then echo $(srcdir)/t-div_2exp.c; else echo t-div_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-divis_.c: t-divis.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-divis.c; then echo $(srcdir)/t-divis.c; else echo t-divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-divis_2exp_.c: t-divis_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-divis_2exp.c; then echo $(srcdir)/t-divis_2exp.c; else echo t-divis_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-export_.c: t-export.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-export.c; then echo $(srcdir)/t-export.c; else echo t-export.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fac_ui_.c: t-fac_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fac_ui.c; then echo $(srcdir)/t-fac_ui.c; else echo t-fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fdiv_.c: t-fdiv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fdiv.c; then echo $(srcdir)/t-fdiv.c; else echo t-fdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fdiv_ui_.c: t-fdiv_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fdiv_ui.c; then echo $(srcdir)/t-fdiv_ui.c; else echo t-fdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fib_ui_.c: t-fib_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fib_ui.c; then echo $(srcdir)/t-fib_ui.c; else echo t-fib_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fits_.c: t-fits.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fits.c; then echo $(srcdir)/t-fits.c; else echo t-fits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-gcd_.c: t-gcd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gcd.c; then echo $(srcdir)/t-gcd.c; else echo t-gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-gcd_ui_.c: t-gcd_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gcd_ui.c; then echo $(srcdir)/t-gcd_ui.c; else echo t-gcd_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_.c: t-get_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_2exp_.c: t-get_d_2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d_2exp.c; then echo $(srcdir)/t-get_d_2exp.c; else echo t-get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_si_.c: t-get_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_si.c; then echo $(srcdir)/t-get_si.c; else echo t-get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-hamdist_.c: t-hamdist.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-hamdist.c; then echo $(srcdir)/t-hamdist.c; else echo t-hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-import_.c: t-import.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-import.c; then echo $(srcdir)/t-import.c; else echo t-import.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-inp_str_.c: t-inp_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-inp_str.c; then echo $(srcdir)/t-inp_str.c; else echo t-inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-invert_.c: t-invert.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-invert.c; then echo $(srcdir)/t-invert.c; else echo t-invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-io_raw_.c: t-io_raw.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-io_raw.c; then echo $(srcdir)/t-io_raw.c; else echo t-io_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-jac_.c: t-jac.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-jac.c; then echo $(srcdir)/t-jac.c; else echo t-jac.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-lcm_.c: t-lcm.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-lcm.c; then echo $(srcdir)/t-lcm.c; else echo t-lcm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-lucnum_ui_.c: t-lucnum_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-lucnum_ui.c; then echo $(srcdir)/t-lucnum_ui.c; else echo t-lucnum_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mul_.c: t-mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul.c; then echo $(srcdir)/t-mul.c; else echo t-mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mul_i_.c: t-mul_i.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul_i.c; then echo $(srcdir)/t-mul_i.c; else echo t-mul_i.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-nextprime_.c: t-nextprime.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-nextprime.c; then echo $(srcdir)/t-nextprime.c; else echo t-nextprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-oddeven_.c: t-oddeven.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-oddeven.c; then echo $(srcdir)/t-oddeven.c; else echo t-oddeven.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-perfpow_.c: t-perfpow.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-perfpow.c; then echo $(srcdir)/t-perfpow.c; else echo t-perfpow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-perfsqr_.c: t-perfsqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-perfsqr.c; then echo $(srcdir)/t-perfsqr.c; else echo t-perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-popcount_.c: t-popcount.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-popcount.c; then echo $(srcdir)/t-popcount.c; else echo t-popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-pow_.c: t-pow.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-pow.c; then echo $(srcdir)/t-pow.c; else echo t-pow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-powm_.c: t-powm.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-powm.c; then echo $(srcdir)/t-powm.c; else echo t-powm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-powm_ui_.c: t-powm_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-powm_ui.c; then echo $(srcdir)/t-powm_ui.c; else echo t-powm_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-pprime_p_.c: t-pprime_p.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-pprime_p.c; then echo $(srcdir)/t-pprime_p.c; else echo t-pprime_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-root_.c: t-root.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-root.c; then echo $(srcdir)/t-root.c; else echo t-root.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-scan_.c: t-scan.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-scan.c; then echo $(srcdir)/t-scan.c; else echo t-scan.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_d_.c: t-set_d.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_d.c; then echo $(srcdir)/t-set_d.c; else echo t-set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_f_.c: t-set_f.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_f.c; then echo $(srcdir)/t-set_f.c; else echo t-set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_si_.c: t-set_si.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_si.c; then echo $(srcdir)/t-set_si.c; else echo t-set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_str_.c: t-set_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_str.c; then echo $(srcdir)/t-set_str.c; else echo t-set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sizeinbase_.c: t-sizeinbase.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sizeinbase.c; then echo $(srcdir)/t-sizeinbase.c; else echo t-sizeinbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sqrtrem_.c: t-sqrtrem.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrtrem.c; then echo $(srcdir)/t-sqrtrem.c; else echo t-sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-tdiv_.c: t-tdiv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-tdiv.c; then echo $(srcdir)/t-tdiv.c; else echo t-tdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-tdiv_ui_.c: t-tdiv_ui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-tdiv_ui.c; then echo $(srcdir)/t-tdiv_ui.c; else echo t-tdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bit_.$(OBJEXT) bit_.lo convert_.$(OBJEXT) convert_.lo dive_.$(OBJEXT) \
-dive_.lo dive_ui_.$(OBJEXT) dive_ui_.lo io_.$(OBJEXT) io_.lo \
-logic_.$(OBJEXT) logic_.lo reuse_.$(OBJEXT) reuse_.lo \
-t-addsub_.$(OBJEXT) t-addsub_.lo t-aorsmul_.$(OBJEXT) t-aorsmul_.lo \
-t-bin_.$(OBJEXT) t-bin_.lo t-cdiv_ui_.$(OBJEXT) t-cdiv_ui_.lo \
-t-cmp_.$(OBJEXT) t-cmp_.lo t-cmp_d_.$(OBJEXT) t-cmp_d_.lo \
-t-cmp_si_.$(OBJEXT) t-cmp_si_.lo t-cong_.$(OBJEXT) t-cong_.lo \
-t-cong_2exp_.$(OBJEXT) t-cong_2exp_.lo t-div_2exp_.$(OBJEXT) \
-t-div_2exp_.lo t-divis_.$(OBJEXT) t-divis_.lo t-divis_2exp_.$(OBJEXT) \
-t-divis_2exp_.lo t-export_.$(OBJEXT) t-export_.lo t-fac_ui_.$(OBJEXT) \
-t-fac_ui_.lo t-fdiv_.$(OBJEXT) t-fdiv_.lo t-fdiv_ui_.$(OBJEXT) \
-t-fdiv_ui_.lo t-fib_ui_.$(OBJEXT) t-fib_ui_.lo t-fits_.$(OBJEXT) \
-t-fits_.lo t-gcd_.$(OBJEXT) t-gcd_.lo t-gcd_ui_.$(OBJEXT) t-gcd_ui_.lo \
-t-get_d_.$(OBJEXT) t-get_d_.lo t-get_d_2exp_.$(OBJEXT) \
-t-get_d_2exp_.lo t-get_si_.$(OBJEXT) t-get_si_.lo t-hamdist_.$(OBJEXT) \
-t-hamdist_.lo t-import_.$(OBJEXT) t-import_.lo t-inp_str_.$(OBJEXT) \
-t-inp_str_.lo t-invert_.$(OBJEXT) t-invert_.lo t-io_raw_.$(OBJEXT) \
-t-io_raw_.lo t-jac_.$(OBJEXT) t-jac_.lo t-lcm_.$(OBJEXT) t-lcm_.lo \
-t-lucnum_ui_.$(OBJEXT) t-lucnum_ui_.lo t-mul_.$(OBJEXT) t-mul_.lo \
-t-mul_i_.$(OBJEXT) t-mul_i_.lo t-nextprime_.$(OBJEXT) t-nextprime_.lo \
-t-oddeven_.$(OBJEXT) t-oddeven_.lo t-perfpow_.$(OBJEXT) t-perfpow_.lo \
-t-perfsqr_.$(OBJEXT) t-perfsqr_.lo t-popcount_.$(OBJEXT) \
-t-popcount_.lo t-pow_.$(OBJEXT) t-pow_.lo t-powm_.$(OBJEXT) t-powm_.lo \
-t-powm_ui_.$(OBJEXT) t-powm_ui_.lo t-pprime_p_.$(OBJEXT) \
-t-pprime_p_.lo t-root_.$(OBJEXT) t-root_.lo t-scan_.$(OBJEXT) \
-t-scan_.lo t-set_d_.$(OBJEXT) t-set_d_.lo t-set_f_.$(OBJEXT) \
-t-set_f_.lo t-set_si_.$(OBJEXT) t-set_si_.lo t-set_str_.$(OBJEXT) \
-t-set_str_.lo t-sizeinbase_.$(OBJEXT) t-sizeinbase_.lo \
-t-sqrtrem_.$(OBJEXT) t-sqrtrem_.lo t-tdiv_.$(OBJEXT) t-tdiv_.lo \
-t-tdiv_ui_.$(OBJEXT) t-tdiv_ui_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -1113,14 +1007,15 @@ check-TESTS: $(TESTS)
           fi; \
           dashes=`echo "$$dashes" | sed s/./=/g`; \
           if test "$$failed" -eq 0; then \
-           echo "$$grn$$dashes"; \
+           col="$$grn"; \
           else \
-           echo "$$red$$dashes"; \
+           col="$$red"; \
           fi; \
-         echo "$$banner"; \
-         test -z "$$skipped" || echo "$$skipped"; \
-         test -z "$$report" || echo "$$report"; \
-         echo "$$dashes$$std"; \
+         echo "$${col}$$dashes$${std}"; \
+         echo "$${col}$$banner$${std}"; \
+         test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+         test -z "$$report" || echo "$${col}$$report$${std}"; \
+         echo "$${col}$$dashes$${std}"; \
           test "$$failed" -eq 0; \
         else :; fi
  
@@ -1170,10 +1065,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -1242,7 +1142,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -1255,7 +1155,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
         clean-checkPROGRAMS clean-generic clean-libtool ctags \
@@ -1268,8 +1168,8 @@ uninstall-am:
         install-ps install-ps-am install-strip installcheck \
         installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  $(top_builddir)/tests/libtests.la:
diff --git a/tests/mpz/bit.c b/tests/mpz/bit.c

index 9f63e039e1cd162b6ef1c96d2ea388174816c962..c13cd1cec9e100235056e7ecf6728dd4d986b1fc 100644 (file)
--- a/tests/mpz/bit.c
+++ b/tests/mpz/bit.c
@@ -1,21 +1,22 @@
  /* Test mpz_setbit, mpz_clrbit, mpz_tstbit.
  
-Copyright 1997, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+Copyright 1997, 2000, 2001, 2002, 2003, 2012, 2013 Free Software
+Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -38,6 +39,7 @@ debug_mp (mpz_srcptr x, int base)
  
  /* exercise the case where mpz_clrbit or mpz_combit ends up extending a
     value like -2^(k*GMP_NUMB_BITS-1) when clearing bit k*GMP_NUMB_BITS-1.  */
+/* And vice-versa. */
  void
  check_clr_extend (void)
  {
@@ -53,7 +55,7 @@ check_clr_extend (void)
        for (f = 0; f <= 1; f++)
         {
           /* lots of 1 bits in _mp_d */
-         mpz_set_ui (got, 1L);
+         mpz_set_si (got, 1L);
           mpz_mul_2exp (got, got, 10*GMP_NUMB_BITS);
           mpz_sub_ui (got, got, 1L);
  
@@ -82,6 +84,28 @@ check_clr_extend (void)
               mpz_trace ("want", want);
               abort ();
             }
+
+         /* complement bit n, going back to ..11100..00 which is -2^(n-1) */
+         if (f == 0)
+           mpz_setbit (got, i*GMP_NUMB_BITS-1);
+         else
+           mpz_combit (got, i*GMP_NUMB_BITS-1);
+         MPZ_CHECK_FORMAT (got);
+
+         mpz_set_si (want, -1L);
+         mpz_mul_2exp (want, want, i*GMP_NUMB_BITS - 1);
+
+         if (mpz_cmp (got, want) != 0)
+           {
+             if (f == 0)
+               printf ("mpz_setbit: ");
+             else
+               printf ("mpz_combit: ");
+             printf ("wrong after shrinking\n");
+             mpz_trace ("got ", got);
+             mpz_trace ("want", want);
+             abort ();
+           }
         }
      }
  
@@ -203,7 +227,7 @@ check_single (void)
      {
        for (offset = (limb==0 ? 0 : -2); offset <= 2; offset++)
         {
-         for (initial = 0; initial >= -1; initial--)
+         for (initial = 1; initial >= -1; initial--)
             {
               mpz_set_si (x, (long) initial);
  
@@ -295,7 +319,7 @@ check_random (int argc, char *argv[])
  
        mpz_set (s2, x);
        bit2 = mpz_tstbit (x, bitindex);
-      mpz_setbit (x, bitindex);
+      mpz_combit (x, bitindex);
        MPZ_CHECK_FORMAT (x);
  
        mpz_set (s3, x);
@@ -323,16 +347,26 @@ check_random (int argc, char *argv[])
        if (mpz_cmp (s2, s3) == 0)
         abort ();
  
+      mpz_combit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+      if (mpz_cmp (s2, x) != 0)
+       abort ();
+
+      mpz_clrbit (x, bitindex);
+      MPZ_CHECK_FORMAT (x);
+      if (mpz_cmp (s2, x) != 0)
+       abort ();
+
        mpz_ui_pow_ui (m, 2L, bitindex);
        MPZ_CHECK_FORMAT (m);
-      mpz_ior (x, s2, m);
+      mpz_ior (x, s0, m);
        MPZ_CHECK_FORMAT (x);
        if (mpz_cmp (x, s3) != 0)
         abort ();
  
        mpz_com (m, m);
        MPZ_CHECK_FORMAT (m);
-      mpz_and (x, s1, m);
+      mpz_and (x, s0, m);
        MPZ_CHECK_FORMAT (x);
        if (mpz_cmp (x, s2) != 0)
         abort ();
diff --git a/tests/mpz/convert.c b/tests/mpz/convert.c

index 2ab4205639201b27eee0580fb60a9e88bf224e14..987fb237f05b6175d76f84b84f2037ba11af4182 100644 (file)
--- a/tests/mpz/convert.c
+++ b/tests/mpz/convert.c
@@ -3,20 +3,20 @@
  Copyright 1993, 1994, 1996, 1999, 2000, 2001, 2002, 2006, 2007 Free Software
  Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -26,7 +26,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void debug_mp (mpz_t, int);
  
  
  void
@@ -35,7 +35,7 @@ string_urandomb (char *bp, size_t len, int base, gmp_randstate_ptr rands)
    mpz_t bs;
    unsigned long bsi;
    int d, l;
-  char *collseq = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+  const char *collseq = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
  
    mpz_init (bs);
  
@@ -129,7 +129,7 @@ main (int argc, char **argv)
        size_range = mpz_get_ui (bs) % 16 + 1;   /* 1..16 */
        mpz_urandomb (bs, rands, size_range);    /* 1..65536 digits */
        len = mpz_get_ui (bs) + 1;
-      buf = (*__gmp_allocate_func) (len + 1);
+      buf = (char *) (*__gmp_allocate_func) (len + 1);
        if (base == 0)
         base = 10;
        string_urandomb (buf, len, base, rands);
diff --git a/tests/mpz/dive.c b/tests/mpz/dive.c

index 9aaea57dce432846647b6a700a0b4e9f00897270..1b9e8be8e341cfe94169c9fa968db011e442ef94 100644 (file)
--- a/tests/mpz/dive.c
+++ b/tests/mpz/dive.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/dive_ui.c b/tests/mpz/dive_ui.c

index 93f7a9b6e348f6cb9765684fae93670d40ea83b6..de6a7922ca9f95259b233a1d655015283b01fec1 100644 (file)
--- a/tests/mpz/dive_ui.c
+++ b/tests/mpz/dive_ui.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/io.c b/tests/mpz/io.c

index d6bbbbabe603e1cc8f6eeb9c95cd8813b564ae37..172ca66b967b41c86257539e4f85ce977d15f0e7 100644 (file)
--- a/tests/mpz/io.c
+++ b/tests/mpz/io.c
@@ -1,21 +1,21 @@
  /* Test conversion and I/O using mpz_out_str and mpz_inp_str.
  
-Copyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1993, 1994, 1996, 2000, 2001, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
@@ -45,7 +45,7 @@ main (int argc, char **argv)
    int i;
    int reps = 10000;
    FILE *fp;
-  int base;
+  int base, base_out;
    gmp_randstate_ptr rands;
    mpz_t bs;
    unsigned long bsi, size_range;
@@ -79,12 +79,17 @@ main (int argc, char **argv)
  
        mpz_urandomb (bs, rands, 16);
        bsi = mpz_get_ui (bs);
-      base = bsi % 36 + 1;
+      base = bsi % 62 + 1;
        if (base == 1)
         base = 0;
  
+      if (i % 2 == 0 && base <= 36)
+       base_out = -base;
+      else
+       base_out = base;
+
        rewind (fp);
-      if (mpz_out_str (fp, base, op1) == 0
+      if (mpz_out_str (fp, base_out, op1) == 0
           || putc (' ', fp) == EOF
           || fflush (fp) != 0)
         {
diff --git a/tests/mpz/logic.c b/tests/mpz/logic.c

index e5d3930235415fd243276917b57c508a8716dbbc..04f396c8c9412d9b7cb417c69590f2508a23295d 100644 (file)
--- a/tests/mpz/logic.c
+++ b/tests/mpz/logic.c
@@ -1,21 +1,21 @@
  /* Test mpz_com, mpz_and, mpz_ior, and mpz_xor.
  
-Copyright 1993, 1994, 1996, 1997, 2001 Free Software Foundation, Inc.
+Copyright 1993, 1994, 1996, 1997, 2001, 2013 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -24,8 +24,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void dump_abort __GMP_PROTO (());
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (void);
+void debug_mp (mpz_t, int);
  
  int
  main (int argc, char **argv)
@@ -55,6 +55,45 @@ main (int argc, char **argv)
    mpz_init (t2);
    mpz_init (t3);
  
+  mpz_set_si (x, -1);
+  mpz_set_ui (y, 0);
+  for (i = 0; i < 300; i++)
+    {
+      mpz_mul_2exp (x, x, 1);
+
+      mpz_and (r1, x, x);
+      MPZ_CHECK_FORMAT (r1);
+      if (mpz_cmp (r1, x) != 0)
+       dump_abort ();
+
+      mpz_ior (r2, x, x);
+      MPZ_CHECK_FORMAT (r2);
+      if (mpz_cmp (r2, x) != 0)
+       dump_abort ();
+
+      mpz_xor (t1, x, x);
+      MPZ_CHECK_FORMAT (t1);
+      if (mpz_cmp_si (t1, 0) != 0)
+       dump_abort ();
+
+      mpz_ior (t1, x, y);
+      MPZ_CHECK_FORMAT (t1);
+      if (mpz_cmp (t1, x) != 0)
+       dump_abort ();
+
+      mpz_xor (t2, x, y);
+      MPZ_CHECK_FORMAT (t2);
+      if (mpz_cmp (t2, x) != 0)
+       dump_abort ();
+
+      mpz_com (t2, x);
+      MPZ_CHECK_FORMAT (t2);
+      mpz_xor (t3, t2, x);
+      MPZ_CHECK_FORMAT (t3);
+      if (mpz_cmp_si (t3, -1) != 0)
+       dump_abort ();
+    }
+
    for (i = 0; i < reps; i++)
      {
        mpz_urandomb (bs, rands, 32);
diff --git a/tests/mpz/reuse.c b/tests/mpz/reuse.c

index ae13fe993b1b8619a075d00e06dfe59b351b8c61..dd33c0b06f6fe6ac5f98f3c66fbf17f96f3ca27e 100644 (file)
--- a/tests/mpz/reuse.c
+++ b/tests/mpz/reuse.c
@@ -6,26 +6,28 @@
         mpz_mul_si
         mpz_addmul_ui (should this really allow a+=a*c?)
  
-Copyright 1996, 1999, 2000, 2001, 2002, 2009 Free Software Foundation, Inc.
+Copyright 1996, 1999, 2000, 2001, 2002, 2009, 2012, 2013 Free Software
+Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
+#include <unistd.h>
  
  #include "gmp.h"
  #include "gmp-impl.h"
@@ -48,14 +50,14 @@ main (void)
  
  #else /* ! DLL_EXPORT */
  
-void dump __GMP_PROTO ((char *, mpz_t, mpz_t, mpz_t));
+void dump (const char *, mpz_t, mpz_t, mpz_t);
  
-typedef void (*dss_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
-typedef void (*dsi_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
-typedef unsigned long int (*dsi_div_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
-typedef unsigned long int (*ddsi_div_func) __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
-typedef void (*ddss_div_func) __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
-typedef void (*ds_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+typedef void (*dss_func) (mpz_ptr, mpz_srcptr, mpz_srcptr);
+typedef void (*dsi_func) (mpz_ptr, mpz_srcptr, unsigned long int);
+typedef unsigned long int (*dsi_div_func) (mpz_ptr, mpz_srcptr, unsigned long int);
+typedef unsigned long int (*ddsi_div_func) (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
+typedef void (*ddss_div_func) (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
+typedef void (*ds_func) (mpz_ptr, mpz_srcptr);
  
  
  void
@@ -67,125 +69,140 @@ mpz_xinvert (mpz_ptr r, mpz_srcptr a, mpz_srcptr b)
      mpz_set_ui (r, 0);
  }
  
-dss_func dss_funcs[] =
-{
-  mpz_add, mpz_sub, mpz_mul,
-  mpz_cdiv_q, mpz_cdiv_r, mpz_fdiv_q, mpz_fdiv_r, mpz_tdiv_q, mpz_tdiv_r,
-  mpz_xinvert,
-  mpz_gcd, mpz_lcm, mpz_and, mpz_ior, mpz_xor
-};
-char *dss_func_names[] =
-{
-  "mpz_add", "mpz_sub", "mpz_mul",
-  "mpz_cdiv_q", "mpz_cdiv_r", "mpz_fdiv_q", "mpz_fdiv_r", "mpz_tdiv_q", "mpz_tdiv_r",
-  "mpz_xinvert",
-  "mpz_gcd", "mpz_lcm", "mpz_and", "mpz_ior", "mpz_xor"
-};
-char dss_func_division[] = {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};
-
-dsi_func dsi_funcs[] =
+struct {
+  dss_func fptr;
+  const char *fname;
+  int isdivision;
+  int isslow;
+} dss[] =
+  { { mpz_add,     "mpz_add",    0, 0 },
+    { mpz_sub,     "mpz_sub",    0, 0 },
+    { mpz_mul,     "mpz_mul",    0, 0 },
+    { mpz_cdiv_q,  "mpz_cdiv_q",  1, 0 },
+    { mpz_cdiv_r,  "mpz_cdiv_r",  1, 0 },
+    { mpz_fdiv_q,  "mpz_fdiv_q",  1, 0 },
+    { mpz_fdiv_r,  "mpz_fdiv_r",  1, 0 },
+    { mpz_tdiv_q,  "mpz_tdiv_q",  1, 0 },
+    { mpz_tdiv_r,  "mpz_tdiv_r",  1, 0 },
+    { mpz_mod,     "mpz_mod",    1, 0 },
+    { mpz_xinvert, "mpz_xinvert", 1, 1 },
+    { mpz_gcd,     "mpz_gcd",    0, 1 },
+    { mpz_lcm,     "mpz_lcm",    0, 1 },
+    { mpz_and,     "mpz_and",    0, 0 },
+    { mpz_ior,     "mpz_ior",    0, 0 },
+    { mpz_xor,     "mpz_xor",     0, 0 }
+  };
+
+
+struct {
+  dsi_func fptr;
+  const char *fname;
+  int mod;
+} dsi[] =
  {
    /* Don't change order here without changing the code in main(). */
-  mpz_add_ui, mpz_mul_ui, mpz_sub_ui,
-  mpz_fdiv_q_2exp, mpz_fdiv_r_2exp,
-  mpz_cdiv_q_2exp, mpz_cdiv_r_2exp,
-  mpz_tdiv_q_2exp, mpz_tdiv_r_2exp,
-  mpz_mul_2exp,
-  mpz_pow_ui
-};
-char *dsi_func_names[] =
-{
-  "mpz_add_ui", "mpz_mul_ui", "mpz_sub_ui",
-  "mpz_fdiv_q_2exp", "mpz_fdiv_r_2exp",
-  "mpz_cdiv_q_2exp", "mpz_cdiv_r_2exp",
-  "mpz_tdiv_q_2exp", "mpz_tdiv_r_2exp",
-  "mpz_mul_2exp",
-  "mpz_pow_ui"
+  { mpz_add_ui,         "mpz_add_ui",       0 },
+  { mpz_mul_ui,                "mpz_mul_ui",        0 },
+  { mpz_sub_ui,                "mpz_sub_ui",        0 },
+  { mpz_fdiv_q_2exp,    "mpz_fdiv_q_2exp",   0x1000 },
+  { mpz_fdiv_r_2exp,    "mpz_fdiv_r_2exp",   0x1000 },
+  { mpz_cdiv_q_2exp,    "mpz_cdiv_q_2exp",   0x1000 },
+  { mpz_cdiv_r_2exp,    "mpz_cdiv_r_2exp",   0x1000 },
+  { mpz_tdiv_q_2exp,    "mpz_tdiv_q_2exp",   0x1000 },
+  { mpz_tdiv_r_2exp,    "mpz_tdiv_r_2exp",   0x1000 },
+  { mpz_mul_2exp,      "mpz_mul_2exp",      0x100 },
+  { mpz_pow_ui,                "mpz_pow_ui",        0x10 }
  };
  
-dsi_div_func dsi_div_funcs[] =
+struct {
+  dsi_div_func fptr;
+  const char *fname;
+} dsi_div[] =
  {
-  mpz_cdiv_q_ui, mpz_cdiv_r_ui,
-  mpz_fdiv_q_ui, mpz_fdiv_r_ui,
-  mpz_tdiv_q_ui, mpz_tdiv_r_ui
-};
-char *dsi_div_func_names[] =
-{
-  "mpz_cdiv_q_ui", "mpz_cdiv_r_ui",
-  "mpz_fdiv_q_ui", "mpz_fdiv_r_ui",
-  "mpz_tdiv_q_ui", "mpz_tdiv_r_ui"
+  { mpz_cdiv_q_ui,       "mpz_cdiv_q_ui" },
+  { mpz_cdiv_r_ui,       "mpz_cdiv_r_ui" },
+  { mpz_fdiv_q_ui,       "mpz_fdiv_q_ui" },
+  { mpz_fdiv_r_ui,       "mpz_fdiv_r_ui" },
+  { mpz_tdiv_q_ui,       "mpz_tdiv_q_ui" },
+  { mpz_tdiv_r_ui,       "mpz_tdiv_r_ui" }
  };
  
-ddsi_div_func ddsi_div_funcs[] =
-{
-  mpz_cdiv_qr_ui,
-  mpz_fdiv_qr_ui,
-  mpz_tdiv_qr_ui
-};
-char *ddsi_div_func_names[] =
+struct {
+  ddsi_div_func fptr;
+  const char *fname;
+  int isslow;
+} ddsi_div[] =
  {
-  "mpz_cdiv_qr_ui",
-  "mpz_fdiv_qr_ui",
-  "mpz_tdiv_qr_ui"
+  { mpz_cdiv_qr_ui,     "mpz_cdiv_qr_ui",    0 },
+  { mpz_fdiv_qr_ui,     "mpz_fdiv_qr_ui",    0 },
+  { mpz_tdiv_qr_ui,     "mpz_tdiv_qr_ui",    0 },
  };
  
-ddss_div_func ddss_div_funcs[] =
-{
-  mpz_cdiv_qr,
-  mpz_fdiv_qr,
-  mpz_tdiv_qr
-};
-char *ddss_div_func_names[] =
-{
-  "mpz_cdiv_qr",
-  "mpz_fdiv_qr",
-  "mpz_tdiv_qr"
-};
  
-ds_func ds_funcs[] =
+struct {
+  ddss_div_func fptr;
+  const char *fname;
+  int isslow;
+} ddss_div[] =
  {
-  mpz_abs, mpz_com, mpz_neg, mpz_sqrt
+  { mpz_cdiv_qr,  "mpz_cdiv_qr",    0 },
+  { mpz_fdiv_qr,  "mpz_fdiv_qr",    0 },
+  { mpz_tdiv_qr,  "mpz_tdiv_qr",    0 },
  };
-char *ds_func_names[] =
+
+struct {
+  ds_func fptr;
+  const char *fname;
+  int nonneg;
+} ds[] =
  {
-  "mpz_abs", "mpz_com", "mpz_neg", "mpz_sqrt"
+  { mpz_abs,    "mpz_abs",    0 },
+  { mpz_com,    "mpz_com",    0 },
+  { mpz_neg,    "mpz_neg",    0 },
+  { mpz_sqrt,   "mpz_sqrt",   1 },
  };
  
+#define FAIL(class,indx,op1,op2,op3)                                   \
+  do {                                                                 \
+    dump (class[indx].fname, op1, op2, op3);                           \
+    exit (1);                                                          \
+  } while (0)
+
+#define FAIL2(fname,op1,op2,op3)                                       \
+  do {                                                                 \
+    dump (#fname, op1, op2, op3);                                      \
+    exit (1);                                                          \
+  } while (0)
+
  
-/* Really use `defined (__STDC__)' here; we want it to be true for Sun C */
-#if defined (__STDC__) || defined (__cplusplus)
-#define FAIL(class,indx,op1,op2,op3) \
+#define INVOKE_RRS(desc,r1,r2,i1)                                      \
    do {                                                                 \
-  class##_funcs[indx] = 0;                                             \
-  dump (class##_func_names[indx], op1, op2, op3);                      \
-  failures++;                                                          \
+    if (pass & 1) _mpz_realloc (r1, ABSIZ(r1));                                \
+    if (pass & 2) _mpz_realloc (r2, ABSIZ(r2));                                \
+    (desc).fptr (r1, r2, i1);                                          \
    } while (0)
-#define FAIL2(fname,op1,op2,op3) \
+#define INVOKE_RS(desc,r1,i1)                                          \
    do {                                                                 \
-  dump (#fname, op1, op2, op3);                                                \
-  failures++;                                                          \
+    if (pass & 1) _mpz_realloc (r1, ABSIZ(r1));                                \
+    (desc).fptr (r1, i1);                                              \
    } while (0)
-#else
-#define FAIL(class,indx,op1,op2,op3) \
+#define INVOKE_RRSS(desc,r1,r2,i1,i2)                                  \
    do {                                                                 \
-  class/**/_funcs[indx] = 0;                                           \
-  dump (class/**/_func_names[indx], op1, op2, op3);                    \
-  failures++;                                                          \
+    if (pass & 1) _mpz_realloc (r1, ABSIZ(r1));                                \
+    if (pass & 2) _mpz_realloc (r2, ABSIZ(r2));                                \
+    (desc).fptr (r1, r2, i1, i2);                                      \
    } while (0)
-#define FAIL2(fname,op1,op2,op3) \
+#define INVOKE_RSS(desc,r1,i1,i2)                                      \
    do {                                                                 \
-  dump ("fname", op1, op2, op3);                                       \
-  failures++;                                                          \
+    if (pass & 1) _mpz_realloc (r1, ABSIZ(r1));                                \
+    (desc).fptr (r1, i1, i2);                                          \
    } while (0)
-#endif
-
-
  
  int
  main (int argc, char **argv)
  {
    int i;
-  int pass, reps = 100;
+  int pass, reps = 400;
    mpz_t in1, in2, in3;
    unsigned long int in2i;
    mp_size_t size;
@@ -193,7 +210,6 @@ main (int argc, char **argv)
    mpz_t ref1, ref2, ref3;
    mpz_t t;
    unsigned long int r1, r2;
-  long failures = 0;
    gmp_randstate_ptr rands;
    mpz_t bs;
    unsigned long bsi, size_range;
@@ -218,106 +234,126 @@ main (int argc, char **argv)
  
    for (pass = 1; pass <= reps; pass++)
      {
-      mpz_urandomb (bs, rands, 32);
-      size_range = mpz_get_ui (bs) % 17 + 2;
-
-      mpz_urandomb (bs, rands, size_range);
-      size = mpz_get_ui (bs);
-      mpz_rrandomb (in1, rands, size);
+      if (isatty (fileno (stdout)))
+       {
+         printf ("\r%d/%d passes", pass, reps);
+         fflush (stdout);
+       }
  
-      mpz_urandomb (bs, rands, size_range);
-      size = mpz_get_ui (bs);
-      mpz_rrandomb (in2, rands, size);
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 21 + 2;
  
-      mpz_urandomb (bs, rands, size_range);
-      size = mpz_get_ui (bs);
-      mpz_rrandomb (in3, rands, size);
+      if ((pass & 1) == 0)
+       {
+         /* Make all input operands have quite different sizes */
+         mpz_urandomb (bs, rands, 32);
+         size = mpz_get_ui (bs) % size_range;
+         mpz_rrandomb (in1, rands, size);
+
+         mpz_urandomb (bs, rands, 32);
+         size = mpz_get_ui (bs) % size_range;
+         mpz_rrandomb (in2, rands, size);
+
+         mpz_urandomb (bs, rands, 32);
+         size = mpz_get_ui (bs) % size_range;
+         mpz_rrandomb (in3, rands, size);
+       }
+      else
+       {
+         /* Make all input operands have about the same size */
+         mpz_urandomb (bs, rands, size_range);
+         size = mpz_get_ui (bs);
+         mpz_rrandomb (in1, rands, size);
+
+         mpz_urandomb (bs, rands, size_range);
+         size = mpz_get_ui (bs);
+         mpz_rrandomb (in2, rands, size);
+
+         mpz_urandomb (bs, rands, size_range);
+         size = mpz_get_ui (bs);
+         mpz_rrandomb (in3, rands, size);
+       }
  
        mpz_urandomb (bs, rands, 3);
        bsi = mpz_get_ui (bs);
        if ((bsi & 1) != 0)
         mpz_neg (in1, in1);
-      if ((bsi & 1) != 0)
+      if ((bsi & 2) != 0)
         mpz_neg (in2, in2);
-      if ((bsi & 1) != 0)
+      if ((bsi & 4) != 0)
         mpz_neg (in3, in3);
  
-      for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+      for (i = 0; i < numberof (dss); i++)
         {
-         if (dss_funcs[i] == 0)
+         if (dss[i].isdivision && mpz_sgn (in2) == 0)
             continue;
-         if (dss_func_division[i] && mpz_sgn (in2) == 0)
+         if (dss[i].isslow && size_range > 19)
             continue;
  
-         (dss_funcs[i]) (ref1, in1, in2);
+         (dss[i].fptr) (ref1, in1, in2);
           MPZ_CHECK_FORMAT (ref1);
  
           mpz_set (res1, in1);
-         (dss_funcs[i]) (res1, res1, in2);
+         INVOKE_RSS (dss[i], res1, res1, in2);
           MPZ_CHECK_FORMAT (res1);
           if (mpz_cmp (ref1, res1) != 0)
             FAIL (dss, i, in1, in2, NULL);
  
           mpz_set (res1, in2);
-         (dss_funcs[i]) (res1, in1, res1);
+         INVOKE_RSS (dss[i], res1, in1, res1);
           MPZ_CHECK_FORMAT (res1);
           if (mpz_cmp (ref1, res1) != 0)
             FAIL (dss, i, in1, in2, NULL);
         }
  
-      for (i = 0; i < sizeof (ddss_div_funcs) / sizeof (ddss_div_func); i++)
+      for (i = 0; i < numberof (ddss_div); i++)
         {
-         if (ddss_div_funcs[i] == 0)
-           continue;
           if (mpz_sgn (in2) == 0)
             continue;
  
-         (ddss_div_funcs[i]) (ref1, ref2, in1, in2);
+         (ddss_div[i].fptr) (ref1, ref2, in1, in2);
           MPZ_CHECK_FORMAT (ref1);
           MPZ_CHECK_FORMAT (ref2);
  
           mpz_set (res1, in1);
-         (ddss_div_funcs[i]) (res1, res2, res1, in2);
+         INVOKE_RRSS (ddss_div[i], res1, res2, res1, in2);
           MPZ_CHECK_FORMAT (res1);
           MPZ_CHECK_FORMAT (res2);
           if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
             FAIL (ddss_div, i, in1, in2, NULL);
  
           mpz_set (res2, in1);
-         (ddss_div_funcs[i]) (res1, res2, res2, in2);
+         INVOKE_RRSS (ddss_div[i], res1, res2, res2, in2);
           MPZ_CHECK_FORMAT (res1);
           MPZ_CHECK_FORMAT (res2);
           if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
             FAIL (ddss_div, i, in1, in2, NULL);
  
           mpz_set (res1, in2);
-         (ddss_div_funcs[i]) (res1, res2, in1, res1);
+         INVOKE_RRSS (ddss_div[i], res1, res2, in1, res1);
           MPZ_CHECK_FORMAT (res1);
           MPZ_CHECK_FORMAT (res2);
           if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
             FAIL (ddss_div, i, in1, in2, NULL);
  
           mpz_set (res2, in2);
-         (ddss_div_funcs[i]) (res1, res2, in1, res2);
+         INVOKE_RRSS (ddss_div[i], res1, res2, in1, res2);
           MPZ_CHECK_FORMAT (res1);
           MPZ_CHECK_FORMAT (res2);
           if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
             FAIL (ddss_div, i, in1, in2, NULL);
         }
  
-      for (i = 0; i < sizeof (ds_funcs) / sizeof (ds_func); i++)
+      for (i = 0; i < numberof (ds); i++)
         {
-         if (ds_funcs[i] == 0)
-           continue;
-         if (strcmp (ds_func_names[i], "mpz_sqrt") == 0
-             && mpz_sgn (in1) < 0)
+         if (ds[i].nonneg && mpz_sgn (in1) < 0)
             continue;
  
-         (ds_funcs[i]) (ref1, in1);
+         (ds[i].fptr) (ref1, in1);
           MPZ_CHECK_FORMAT (ref1);
  
           mpz_set (res1, in1);
-         (ds_funcs[i]) (res1, res1);
+         INVOKE_RS (ds[i], res1, res1);
           MPZ_CHECK_FORMAT (res1);
           if (mpz_cmp (ref1, res1) != 0)
             FAIL (ds, i, in1, in2, NULL);
@@ -325,27 +361,16 @@ main (int argc, char **argv)
  
        in2i = mpz_get_ui (in2);
  
-      for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+      for (i = 0; i < numberof (dsi); i++)
         {
-         if (dsi_funcs[i] == 0)
-           continue;
-         if (strcmp (dsi_func_names[i], "mpz_fdiv_q_2exp") == 0)
-           /* Limit exponent to something reasonable for the division
-              functions.  Without this, we'd  normally shift things off
-              the end and just generate the trivial values 1, 0, -1.  */
-           in2i %= 0x1000;
-         if (strcmp (dsi_func_names[i], "mpz_mul_2exp") == 0)
-           /* Limit exponent more for mpz_mul_2exp to save time.  */
-           in2i %= 0x100;
-         if (strcmp (dsi_func_names[i], "mpz_pow_ui") == 0)
-           /* Limit exponent yet more for mpz_pow_ui to save time.  */
-           in2i %= 0x10;
-
-         (dsi_funcs[i]) (ref1, in1, in2i);
+         if (dsi[i].mod != 0)
+           in2i = mpz_get_ui (in2) % dsi[i].mod;
+
+         (dsi[i].fptr) (ref1, in1, in2i);
           MPZ_CHECK_FORMAT (ref1);
  
           mpz_set (res1, in1);
-         (dsi_funcs[i]) (res1, res1, in2i);
+         INVOKE_RRS (dsi[i], res1, res1, in2i);
           MPZ_CHECK_FORMAT (res1);
           if (mpz_cmp (ref1, res1) != 0)
             FAIL (dsi, i, in1, in2, NULL);
@@ -353,31 +378,31 @@ main (int argc, char **argv)
  
        if (in2i != 0)     /* Don't divide by 0.  */
         {
-         for (i = 0; i < sizeof (dsi_div_funcs) / sizeof (dsi_div_funcs); i++)
+         for (i = 0; i < numberof (dsi_div); i++)
             {
-             r1 = (dsi_div_funcs[i]) (ref1, in1, in2i);
+             r1 = (dsi_div[i].fptr) (ref1, in1, in2i);
               MPZ_CHECK_FORMAT (ref1);
  
               mpz_set (res1, in1);
-             r2 = (dsi_div_funcs[i]) (res1, res1, in2i);
+             r2 = (dsi_div[i].fptr) (res1, res1, in2i);
               MPZ_CHECK_FORMAT (res1);
               if (mpz_cmp (ref1, res1) != 0 || r1 != r2)
                 FAIL (dsi_div, i, in1, in2, NULL);
             }
  
-         for (i = 0; i < sizeof (ddsi_div_funcs) / sizeof (ddsi_div_funcs); i++)
+         for (i = 0; i < numberof (ddsi_div); i++)
             {
-             r1 = (ddsi_div_funcs[i]) (ref1, ref2, in1, in2i);
+             r1 = (ddsi_div[i].fptr) (ref1, ref2, in1, in2i);
               MPZ_CHECK_FORMAT (ref1);
  
               mpz_set (res1, in1);
-             r2 = (ddsi_div_funcs[i]) (res1, res2, res1, in2i);
+             r2 = (ddsi_div[i].fptr) (res1, res2, res1, in2i);
               MPZ_CHECK_FORMAT (res1);
               if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
                 FAIL (ddsi_div, i, in1, in2, NULL);
  
               mpz_set (res2, in1);
-             (ddsi_div_funcs[i]) (res1, res2, res2, in2i);
+             (ddsi_div[i].fptr) (res1, res2, res2, in2i);
               MPZ_CHECK_FORMAT (res1);
               if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
                 FAIL (ddsi_div, i, in1, in2, NULL);
@@ -403,15 +428,21 @@ main (int argc, char **argv)
           MPZ_CHECK_FORMAT (res2);
           if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
             FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+
+         mpz_set (res1, in1);
+         mpz_sqrtrem (res1, res1, res1);
+         MPZ_CHECK_FORMAT (res1);
+         if (mpz_cmp (ref2, res1) != 0)
+           FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
         }
  
        if (mpz_sgn (in1) >= 0)
         {
-         mpz_root (ref1, in1, in2i % 0x1000 + 1);
+         mpz_root (ref1, in1, in2i % 0x100 + 1);
           MPZ_CHECK_FORMAT (ref1);
  
           mpz_set (res1, in1);
-         mpz_root (res1, res1, in2i % 0x1000 + 1);
+         mpz_root (res1, res1, in2i % 0x100 + 1);
           MPZ_CHECK_FORMAT (res1);
           if (mpz_cmp (ref1, res1) != 0)
             FAIL2 (mpz_root, in1, in2, NULL);
@@ -419,26 +450,26 @@ main (int argc, char **argv)
  
        if (mpz_sgn (in1) >= 0)
         {
-         mpz_rootrem (ref1, ref2, in1, in2i % 0x1000 + 1);
+         mpz_rootrem (ref1, ref2, in1, in2i % 0x100 + 1);
           MPZ_CHECK_FORMAT (ref1);
           MPZ_CHECK_FORMAT (ref2);
  
           mpz_set (res1, in1);
-         mpz_rootrem (res1, res2, res1, in2i % 0x1000 + 1);
+         mpz_rootrem (res1, res2, res1, in2i % 0x100 + 1);
           MPZ_CHECK_FORMAT (res1);
           MPZ_CHECK_FORMAT (res2);
           if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
             FAIL2 (mpz_rootrem, in1, in2, NULL);
  
           mpz_set (res2, in1);
-         mpz_rootrem (res1, res2, res2, in2i % 0x1000 + 1);
+         mpz_rootrem (res1, res2, res2, in2i % 0x100 + 1);
           MPZ_CHECK_FORMAT (res1);
           MPZ_CHECK_FORMAT (res2);
           if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
             FAIL2 (mpz_rootrem, in1, in2, NULL);
         }
  
-      if (pass < reps / 2)     /* run fewer tests since gcdext lots of time */
+      if (size_range < 18)     /* run fewer tests since gcdext lots of time */
         {
           mpz_gcdext (ref1, ref2, ref3, in1, in2);
           MPZ_CHECK_FORMAT (ref1);
@@ -533,7 +564,7 @@ main (int argc, char **argv)
         }
  
        /* Don't run mpz_powm for huge exponents or when undefined.  */
-      if (mpz_sizeinbase (in2, 2) < 250 && mpz_sgn (in3) != 0
+      if (size_range < 17 && mpz_sizeinbase (in2, 2) < 250 && mpz_sgn (in3) != 0
           && (mpz_sgn (in2) >= 0 || mpz_invert (t, in1, in3)))
         {
           mpz_powm (ref1, in1, in2, in3);
@@ -559,7 +590,7 @@ main (int argc, char **argv)
         }
  
        /* Don't run mpz_powm_ui when undefined.  */
-      if (mpz_sgn (in3) != 0)
+      if (size_range < 17 && mpz_sgn (in3) != 0)
         {
           mpz_powm_ui (ref1, in1, in2i, in3);
           MPZ_CHECK_FORMAT (ref1);
@@ -588,22 +619,23 @@ main (int argc, char **argv)
           FAIL2 (mpz_gcd_ui, in1, in2, NULL);
        }
  
-      if (mpz_cmp_ui (in2, 1L) > 0 && mpz_sgn (in1) != 0)
+      if (mpz_sgn (in2) != 0)
         {
           /* Test mpz_remove */
-         mpz_remove (ref1, in1, in2);
+         mp_bitcnt_t refretval, retval;
+         refretval = mpz_remove (ref1, in1, in2);
           MPZ_CHECK_FORMAT (ref1);
  
           mpz_set (res1, in1);
-         mpz_remove (res1, res1, in2);
+         retval = mpz_remove (res1, res1, in2);
           MPZ_CHECK_FORMAT (res1);
-         if (mpz_cmp (ref1, res1) != 0)
+         if (mpz_cmp (ref1, res1) != 0 || refretval != retval)
             FAIL2 (mpz_remove, in1, in2, NULL);
  
           mpz_set (res1, in2);
-         mpz_remove (res1, in1, res1);
+         retval = mpz_remove (res1, in1, res1);
           MPZ_CHECK_FORMAT (res1);
-         if (mpz_cmp (ref1, res1) != 0)
+         if (mpz_cmp (ref1, res1) != 0 || refretval != retval)
             FAIL2 (mpz_remove, in1, in2, NULL);
         }
  
@@ -648,11 +680,8 @@ main (int argc, char **argv)
         }
      }
  
-  if (failures != 0)
-    {
-      fprintf (stderr, "mpz/reuse: %ld error%s\n", failures, "s" + (failures == 1));
-      exit (1);
-    }
+  if (isatty (fileno (stdout)))
+    printf ("\r%20s", "");
  
    mpz_clear (bs);
    mpz_clear (in1);
@@ -666,24 +695,27 @@ main (int argc, char **argv)
    mpz_clear (res3);
    mpz_clear (t);
  
+  if (isatty (fileno (stdout)))
+    printf ("\r");
+
    tests_end ();
    exit (0);
  }
  
  void
-dump (char *name, mpz_t in1, mpz_t in2, mpz_t in3)
+dump (const char *name, mpz_t in1, mpz_t in2, mpz_t in3)
  {
    printf ("failure in %s (", name);
-  mpz_out_str (stdout, -16, in1);
+  0 && mpz_out_str (stdout, -16, in1);
    if (in2 != NULL)
      {
        printf (" ");
-      mpz_out_str (stdout, -16, in2);
+      0 && mpz_out_str (stdout, -16, in2);
      }
    if (in3 != NULL)
      {
        printf (" ");
-      mpz_out_str (stdout, -16, in3);
+      0 && mpz_out_str (stdout, -16, in3);
      }
    printf (")\n");
  }
diff --git a/tests/mpz/t-addsub.c b/tests/mpz/t-addsub.c

index 815a8397311730c2ddfee9c6691685a056943f2b..e19c21623deb27681604a322f14964539dff34a2 100644 (file)
--- a/tests/mpz/t-addsub.c
+++ b/tests/mpz/t-addsub.c
@@ -2,20 +2,20 @@
  
  Copyright 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "longlong.h"
  #include "tests.h"
  
-void debug_mp __GMP_PROTO ((mpz_t, int));
-void dump_abort __GMP_PROTO ((int, char *, mpz_t, mpz_t));
+void debug_mp (mpz_t, int);
+void dump_abort (int, const char *, mpz_t, mpz_t);
  
  int
  main (int argc, char **argv)
@@ -107,7 +107,7 @@ main (int argc, char **argv)
  }
  
  void
-dump_abort (int i, char *s, mpz_t op1, mpz_t op2)
+dump_abort (int i, const char *s, mpz_t op1, mpz_t op2)
  {
    fprintf (stderr, "ERROR: %s in test %d\n", s, i);
    fprintf (stderr, "op1 = "); debug_mp (op1, -16);
diff --git a/tests/mpz/t-aorsmul.c b/tests/mpz/t-aorsmul.c

index ecec5d570b422189224407f05259baa1e04507bb..93573776c944f1d3f11f86f01215e3b30f392ca9 100644 (file)
--- a/tests/mpz/t-aorsmul.c
+++ b/tests/mpz/t-aorsmul.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #include <stdio.h>
diff --git a/tests/mpz/t-bin.c b/tests/mpz/t-bin.c

index 6e9a34b8037e9adfb1af5432116d1c57d083497d..af6b4df3c4558cc6207b7ce03b87c363fe2734f4 100644 (file)
--- a/tests/mpz/t-bin.c
+++ b/tests/mpz/t-bin.c
@@ -1,21 +1,21 @@
  /* Exercise mpz_bin_ui and mpz_bin_uiui.
  
-Copyright 2000, 2001 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2010, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -23,6 +23,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
+/* Default number of generated tests. */
+#define COUNT 700
  
  void
  try_mpz_bin_ui (mpz_srcptr want, mpz_srcptr n, unsigned long k)
@@ -75,58 +77,11 @@ samples (void)
      const char     *want;
    } data[] = {
  
-    {   "0",  0, "1"   },
-    {   "0",  1, "0"   },
-    {   "0",  2, "0"   },
-    {   "0",  3, "0"   },
-    {   "0",  4, "0"   },
      {   "0", 123456, "0" },
-
-    {   "1",  0, "1"   },
-    {   "1",  1, "1"   },
-    {   "1",  2, "0"   },
-    {   "1",  3, "0"   },
-    {   "1",  4, "0"   },
-    {   "1", 123456, "0" },
-
-    {   "2",  0, "1"   },
-    {   "2",  1, "2"   },
-    {   "2",  2, "1"   },
-    {   "2",  3, "0"   },
-    {   "2",  4, "0"   },
-    {   "2", 123456, "0" },
-
-    {   "3",  0, "1"   },
-    {   "3",  1, "3"   },
-    {   "3",  2, "3"   },
-    {   "3",  3, "1"   },
-    {   "3",  4, "0"   },
-    {   "3",  5, "0"   },
-    {   "3", 123456, "0" },
-
-    {   "4",  0, "1"   },
-    {   "4",  1, "4"   },
-    {   "4",  2, "6"   },
-    {   "4",  3, "4"   },
-    {   "4",  4, "1"   },
-    {   "4",  5, "0"   },
-    {   "4",  6, "0"   },
-    {   "4", 123456, "0" },
-
-    {   "10",  0, "1"   },
-    {   "10",  1, "10"  },
-    {   "10",  2, "45"  },
-    {   "10",  3, "120" },
-    {   "10",  4, "210" },
-    {   "10",  5, "252" },
-    {   "10",  6, "210" },
-    {   "10",  7, "120" },
-    {   "10",  8, "45"  },
-    {   "10",  9, "10"  },
-    {   "10", 10, "1"   },
-    {   "10", 11,     "0" },
-    {   "10", 12,     "0" },
-    {   "10", 123456, "0" },
+    {   "1", 543210, "0" },
+    {   "2", 123321, "0" },
+    {   "3", 234567, "0" },
+    {   "10", 23456, "0" },
  
      /* negatives, using bin(-n,k)=bin(n+k-1,k) */
      {   "-1",  0,  "1"  },
@@ -151,8 +106,11 @@ samples (void)
      {   "-3",  5, "-21"  },
      {   "-3",  6,  "28"  },
  
-    {   "40", 20,  "137846528820" },
-    {   "60", 30,  "118264581564861424" },
+    /* A few random values */
+    {   "41", 20,  "269128937220" },
+    {   "62", 37,  "147405545359541742" },
+    {   "50", 18,  "18053528883775" },
+    {  "149", 21,  "19332950844468483467894649" },
    };
  
    mpz_t  n, want;
@@ -180,7 +138,7 @@ samples (void)
  /* Test some bin(2k,k) cases.  This produces some biggish numbers to
     exercise the limb accumulating code.  */
  void
-twos (void)
+twos (int count)
  {
    mpz_t          n, want;
    unsigned long  k;
@@ -189,7 +147,7 @@ twos (void)
    mpz_init (want);
  
    mpz_set_ui (want, (unsigned long) 2);
-  for (k = 1; k < 200; k++)
+  for (k = 1; k < count; k++)
      {
        mpz_set_ui (n, 2*k);
        try_mpz_bin_ui (want, n, k);
@@ -204,14 +162,106 @@ twos (void)
    mpz_clear (want);
  }
  
+/* Test some random bin(n,k) cases.  This produces some biggish
+   numbers to exercise the limb accumulating code.  */
+void
+randomwalk (int count)
+{
+  mpz_t          n_z, want;
+  unsigned long  n, k, i, r;
+  int            tests;
+  gmp_randstate_ptr rands;
+
+  rands = RANDS;
+  mpz_init (n_z);
+  mpz_init (want);
+
+  k = 3;
+  n = 12;
+  mpz_set_ui (want, (unsigned long) 220); /* binomial(12,3) = 220 */
+
+  for (tests = 1; tests < count; tests++)
+    {
+      r = gmp_urandomm_ui (rands, 62) + 1;
+      for (i = r & 7; i > 0; i--)
+       {
+         n++; k++;
+         mpz_mul_ui (want, want, n);
+         mpz_fdiv_q_ui (want, want, k);
+       }
+      for (i = r >> 3; i > 0; i--)
+       {
+         n++;
+         mpz_mul_ui (want, want, n);
+         mpz_fdiv_q_ui (want, want, n - k);
+       }
+
+      mpz_set_ui (n_z, n);
+      try_mpz_bin_ui (want, n_z, k);
+
+      try_mpz_bin_uiui (want, n, k);
+    }
+
+  mpz_clear (n_z);
+  mpz_clear (want);
+}
+
+
+/* Test all bin(n,k) cases, with 0 <= k <= n + 1 <= count.  */
+void
+smallexaustive (unsigned int count)
+{
+  mpz_t          n_z, want;
+  unsigned long  n, k, i, r;
+  int            tests;
+  gmp_randstate_ptr rands;
+
+  mpz_init (n_z);
+  mpz_init (want);
+
+  for (n = 0; n < count; n++)
+    {
+      mpz_set_ui (want, (unsigned long) 1);
+      mpz_set_ui (n_z, n);
+      for (k = 0; k <= n; k++)
+       {
+         try_mpz_bin_ui (want, n_z, k);
+         try_mpz_bin_uiui (want, n, k);
+         mpz_mul_ui (want, want, n - k);
+         mpz_fdiv_q_ui (want, want, k + 1);
+       }
+      try_mpz_bin_ui (want, n_z, k);
+      try_mpz_bin_uiui (want, n, k);
+    }
+
+  mpz_clear (n_z);
+  mpz_clear (want);
+}
  
  int
-main (void)
+main (int argc, char **argv)
  {
+  int count;
+
+  if (argc > 1)
+    {
+      char *end;
+      count = strtol (argv[1], &end, 0);
+      if (*end || count <= 0)
+       {
+         fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+         return 1;
+       }
+    }
+  else
+    count = COUNT;
+
    tests_start ();
  
    samples ();
-  twos ();
+  smallexaustive (count >> 4);
+  twos (count >> 1);
+  randomwalk (count - (count >> 1));
  
    tests_end ();
    exit (0);
diff --git a/tests/mpz/t-cdiv_ui.c b/tests/mpz/t-cdiv_ui.c

index 1f6be7fe7a4f3c58346e8aa9a1e6dc0d0f480582..807d93bb1bf1282bae306e4f48ecce4fd283b4cd 100644 (file)
--- a/tests/mpz/t-cdiv_ui.c
+++ b/tests/mpz/t-cdiv_ui.c
@@ -3,20 +3,20 @@
  
  Copyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void dump_abort __GMP_PROTO ((char *, mpz_t, unsigned long));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (const char *, mpz_t, unsigned long);
+void debug_mp (mpz_t, int);
  
  int
  main (int argc, char **argv)
@@ -144,7 +144,7 @@ main (int argc, char **argv)
  }
  
  void
-dump_abort (char *str, mpz_t dividend, unsigned long divisor)
+dump_abort (const char *str, mpz_t dividend, unsigned long divisor)
  {
    fprintf (stderr, "ERROR: %s\n", str);
    fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
diff --git a/tests/mpz/t-cmp.c b/tests/mpz/t-cmp.c

index 40c54a1ff9814ffc87d94c7ba5034212573dc4da..277c19471040753c40d9c7ff5b346aef2ed61752 100644 (file)
--- a/tests/mpz/t-cmp.c
+++ b/tests/mpz/t-cmp.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-cmp_d.c b/tests/mpz/t-cmp_d.c

index 9cefd20c14c706345142fef31cfa4fbccc3601cc..cc86340d0d89b118b1ceda4d4d839c47a3dc9507 100644 (file)
--- a/tests/mpz/t-cmp_d.c
+++ b/tests/mpz/t-cmp_d.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -84,11 +84,14 @@ check_data (void)
      {  "1",  0.0,  1,  1 },
      { "-1",  0.0, -1,  1 },
  
+    {  "1",  0.5,  1,  1 },
+    { "-1", -0.5, -1,  1 },
+
      {  "0",  1.0, -1, -1 },
      {  "0", -1.0,  1, -1 },
  
-    {  "0x1000000000000000000000000000000000000000000000000", 0.0,  1, 1 },
-    { "-0x1000000000000000000000000000000000000000000000000", 0.0, -1, 1 },
+    {  "0x1000000000000000000000000000000000000000000000000", 1.0,  1, 1 },
+    { "-0x1000000000000000000000000000000000000000000000000", 1.0, -1, 1 },
  
      {  "0",  1e100, -1, -1 },
      {  "0", -1e100,  1, -1 },
@@ -161,7 +164,7 @@ check_low_z_one (void)
    mpz_init (x);
  
    /* FIXME: It'd be better to base this on the float format. */
-#ifdef __vax
+#if defined (__vax) || defined (__vax__)
  #define LIM 127                        /* vax fp numbers have limited range */
  #else
  #define LIM 512
diff --git a/tests/mpz/t-cmp_si.c b/tests/mpz/t-cmp_si.c

index 25e8a3b381877f0f285cfbcddaf3c964743eeca8..ced5a3b059f673d14155ee54b86d061f9e15518e 100644 (file)
--- a/tests/mpz/t-cmp_si.c
+++ b/tests/mpz/t-cmp_si.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-cong.c b/tests/mpz/t-cong.c

index f263f2496b282f2a3e8df0400adb0cce0b56379a..0f6a787c0bea3a4aea98a5cce51488aa72ec359a 100644 (file)
--- a/tests/mpz/t-cong.c
+++ b/tests/mpz/t-cong.c
@@ -1,21 +1,21 @@
  /* test mpz_congruent_p and mpz_congruent_ui_p
  
-Copyright 2001, 2002 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -34,38 +34,38 @@ check_one (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d, int want)
      {
        got = (mpz_congruent_p (a, c, d) != 0);
        if (want != got)
-        {
-          printf ("mpz_congruent_p wrong\n");
-          printf ("   expected %d got %d\n", want, got);
-          mpz_trace ("   a", a);
-          mpz_trace ("   c", c);
-          mpz_trace ("   d", d);
-          mp_trace_base = -16;
-          mpz_trace ("   a", a);
-          mpz_trace ("   c", c);
-          mpz_trace ("   d", d);
-          abort ();
-        }
+       {
+         printf ("mpz_congruent_p wrong\n");
+         printf ("   expected %d got %d\n", want, got);
+         mpz_trace ("   a", a);
+         mpz_trace ("   c", c);
+         mpz_trace ("   d", d);
+         mp_trace_base = -16;
+         mpz_trace ("   a", a);
+         mpz_trace ("   c", c);
+         mpz_trace ("   d", d);
+         abort ();
+       }
  
        if (mpz_fits_ulong_p (c) && mpz_fits_ulong_p (d))
-        {
-          unsigned long  uc = mpz_get_ui (c);
-          unsigned long  ud = mpz_get_ui (d);
-          got = (mpz_congruent_ui_p (a, uc, ud) != 0);
-          if (want != got)
-            {
-              printf    ("mpz_congruent_ui_p wrong\n");
-              printf    ("   expected %d got %d\n", want, got);
-              mpz_trace ("   a", a);
-              printf    ("   c=%lu\n", uc);
-              printf    ("   d=%lu\n", ud);
-              mp_trace_base = -16;
-              mpz_trace ("   a", a);
-              printf    ("   c=0x%lX\n", uc);
-              printf    ("   d=0x%lX\n", ud);
-              abort ();
-            }
-        }
+       {
+         unsigned long  uc = mpz_get_ui (c);
+         unsigned long  ud = mpz_get_ui (d);
+         got = (mpz_congruent_ui_p (a, uc, ud) != 0);
+         if (want != got)
+           {
+             printf    ("mpz_congruent_ui_p wrong\n");
+             printf    ("   expected %d got %d\n", want, got);
+             mpz_trace ("   a", a);
+             printf    ("   c=%lu\n", uc);
+             printf    ("   d=%lu\n", ud);
+             mp_trace_base = -16;
+             mpz_trace ("   a", a);
+             printf    ("   c=0x%lX\n", uc);
+             printf    ("   d=0x%lX\n", ud);
+             abort ();
+           }
+       }
  
        MPZ_SRCPTR_SWAP (a, c);
      }
@@ -83,6 +83,11 @@ check_data (void)
  
    } data[] = {
  
+    /* strict equality mod 0 */
+    { "0", "0", "0", 1 },
+    { "11", "11", "0", 1 },
+    { "3", "11", "0", 0 },
+
      /* anything congruent mod 1 */
      { "0", "0", "1", 1 },
      { "1", "0", "1", 1 },
@@ -133,11 +138,15 @@ check_random (int argc, char *argv[])
    mpz_t   a, c, d, ra, rc;
    int     i;
    int     want;
-  int     reps = 50000;
+  int     reps = 10000;
+  mpz_t bs;
+  unsigned long size_range, size;
  
    if (argc >= 2)
      reps = atoi (argv[1]);
  
+  mpz_init (bs);
+
    mpz_init (a);
    mpz_init (c);
    mpz_init (d);
@@ -146,11 +155,30 @@ check_random (int argc, char *argv[])
  
    for (i = 0; i < reps; i++)
      {
-      mpz_errandomb (a, rands, 8*GMP_LIMB_BITS);
-      MPZ_CHECK_FORMAT (a);
-      mpz_errandomb (c, rands, 8*GMP_LIMB_BITS);
-      MPZ_CHECK_FORMAT (c);
-      mpz_errandomb_nonzero (d, rands, 8*GMP_LIMB_BITS);
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 1; /* 0..65536 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (a, rands, size);
+
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 16 + 1; /* 0..65536 bit operands */
+
+      mpz_urandomb (bs, rands, size_range);
+      size = mpz_get_ui (bs);
+      mpz_rrandomb (c, rands, size);
+
+      do
+       {
+         mpz_urandomb (bs, rands, 32);
+         size_range = mpz_get_ui (bs) % 16 + 1; /* 0..65536 bit operands */
+
+         mpz_urandomb (bs, rands, size_range);
+         size = mpz_get_ui (bs);
+         mpz_rrandomb (d, rands, size);
+       }
+      while (SIZ(d) == 0);
  
        mpz_negrandom (a, rands);
        MPZ_CHECK_FORMAT (a);
@@ -171,11 +199,13 @@ check_random (int argc, char *argv[])
  
        if (! mpz_pow2abs_p (d))
          {
-          refmpz_combit (a, urandom() % (8*GMP_LIMB_BITS));
-          check_one (a, c, d, 0);
+         refmpz_combit (a, urandom() % (8*GMP_LIMB_BITS));
+         check_one (a, c, d, 0);
          }
      }
  
+  mpz_clear (bs);
+
    mpz_clear (a);
    mpz_clear (c);
    mpz_clear (d);
diff --git a/tests/mpz/t-cong_2exp.c b/tests/mpz/t-cong_2exp.c

index fedcdf91fe827117f49b2674e5d4376ead0699de..6f1719c23ecf1ab693e5293654bc8dd9c98890a9 100644 (file)
--- a/tests/mpz/t-cong_2exp.c
+++ b/tests/mpz/t-cong_2exp.c
@@ -3,20 +3,20 @@
  /*
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-div_2exp.c b/tests/mpz/t-div_2exp.c

index 934ef18e4743ba61e098ac948dfb05e1061ada86..fcb31db2ce6177c64c4fb911ae36d1df01e37948 100644 (file)
--- a/tests/mpz/t-div_2exp.c
+++ b/tests/mpz/t-div_2exp.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-divis.c b/tests/mpz/t-divis.c

index ba99a485048d1cd736dc4c9ca1d61a0264d84681..c34a0fae416df516eb79545430dad6d8d64c5064 100644 (file)
--- a/tests/mpz/t-divis.c
+++ b/tests/mpz/t-divis.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2009 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -71,6 +71,8 @@ check_data (void)
  
    } data[] = {
  
+    { "0",    "0", 1 },
+    { "17",   "0", 0 },
      { "0",    "1", 1 },
      { "123",  "1", 1 },
      { "-123", "1", 1 },
diff --git a/tests/mpz/t-divis_2exp.c b/tests/mpz/t-divis_2exp.c

index 88588a3d7e70910c04d140cc99b45faf0a0d9af5..11b5bc1c7206536ada5f3a0918674d20b1717ed3 100644 (file)
--- a/tests/mpz/t-divis_2exp.c
+++ b/tests/mpz/t-divis_2exp.c
@@ -3,20 +3,20 @@
  /*
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-export.c b/tests/mpz/t-export.c

index f2fb3bd3a7b98616741cff2491a628246d331b2d..07d3382cb5c63fd81fc90fdebe52c265959ba964 100644 (file)
--- a/tests/mpz/t-export.c
+++ b/tests/mpz/t-export.c
@@ -2,20 +2,20 @@
  
  Copyright 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-fac_ui.c b/tests/mpz/t-fac_ui.c

index 1b494d1afd01258d01e30e2fec7c7043552e7ab6..61e444774e2240027f6fc0676599c994ff65aa86 100644 (file)
--- a/tests/mpz/t-fac_ui.c
+++ b/tests/mpz/t-fac_ui.c
@@ -1,21 +1,21 @@
-/* Exercise mpz_fac_ui.
+/* Exercise mpz_fac_ui and mpz_2fac_ui.
  
-Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -35,9 +35,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  int
  main (int argc, char *argv[])
  {
-  unsigned long  n;
-  unsigned long  limit = 1500;
-  mpz_t          f, r;
+  unsigned long  n, m;
+  unsigned long  limit = 2222;
+  mpz_t          df[2], f, r;
  
    tests_start ();
  
@@ -49,10 +49,12 @@ main (int argc, char *argv[])
    /* for small limb testing */
    limit = MIN (limit, MP_LIMB_T_MAX);
  
+  mpz_init_set_ui (df[0], 1);  /* 0!! = 1 */
+  mpz_init_set_ui (df[1], 1);  /* -1!! = 1 */
    mpz_init_set_ui (f, 1);  /* 0! = 1 */
    mpz_init (r);
  
-  for (n = 0; n < limit; n++)
+  for (n = 0, m = 0; n < limit; n++)
      {
        mpz_fac_ui (r, n);
        MPZ_CHECK_FORMAT (r);
@@ -65,9 +67,36 @@ main (int argc, char *argv[])
            abort ();
          }
  
+      mpz_2fac_ui (r, n);
+      MPZ_CHECK_FORMAT (r);
+
+      if (mpz_cmp (df[m], r) != 0)
+        {
+          printf ("mpz_2fac_ui(%lu) wrong\n", n);
+          printf ("  got  "); mpz_out_str (stdout, 10, r); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, df[m]); printf("\n");
+          abort ();
+        }
+
+      m ^= 1;
+      mpz_mul_ui (df[m], df[m], n+1);  /* (n+1)!! = (n-1)!! * (n+1) */
        mpz_mul_ui (f, f, n+1);  /* (n+1)! = n! * (n+1) */
      }
  
+  n = 1048573; /* a prime */
+  if (n > MP_LIMB_T_MAX)
+    n = 65521; /* a smaller prime :-) */
+  mpz_fac_ui (f, n - 1);
+  m = mpz_fdiv_ui (f, n);
+  if ( m != n - 1)
+    {
+      printf ("mpz_fac_ui(%lu) wrong\n", n - 1);
+      printf ("  Wilson's theorem not verified: got %lu, expected %lu.\n",m ,n - 1);
+      abort ();
+    }
+
+  mpz_clear (df[0]);
+  mpz_clear (df[1]);
    mpz_clear (f);
    mpz_clear (r);
  
diff --git a/tests/mpz/t-fdiv.c b/tests/mpz/t-fdiv.c

index c4053fab900b1c8ca1395187d7089d295c2d1945..8a17f05672f8f080c4f61b2f5d42a8d6a5616444 100644 (file)
--- a/tests/mpz/t-fdiv.c
+++ b/tests/mpz/t-fdiv.c
@@ -3,20 +3,20 @@
  
  Copyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void dump_abort __GMP_PROTO ((mpz_t, mpz_t));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (mpz_t, mpz_t);
+void debug_mp (mpz_t, int);
  
  int
  main (int argc, char **argv)
diff --git a/tests/mpz/t-fdiv_ui.c b/tests/mpz/t-fdiv_ui.c

index 3012d9b112a6d3440ee5e4e50be5fb5461df07de..09a413999f3da15820f985e11c54b3a200b384bc 100644 (file)
--- a/tests/mpz/t-fdiv_ui.c
+++ b/tests/mpz/t-fdiv_ui.c
@@ -3,20 +3,20 @@
  
  Copyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void dump_abort __GMP_PROTO ((char *, mpz_t, unsigned long));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (const char *, mpz_t, unsigned long);
+void debug_mp (mpz_t, int);
  
  int
  main (int argc, char **argv)
@@ -144,7 +144,7 @@ main (int argc, char **argv)
  }
  
  void
-dump_abort (char *str, mpz_t dividend, unsigned long divisor)
+dump_abort (const char *str, mpz_t dividend, unsigned long divisor)
  {
    fprintf (stderr, "ERROR: %s\n", str);
    fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
diff --git a/tests/mpz/t-fib_ui.c b/tests/mpz/t-fib_ui.c

index 77e52f5a3eca1568f5bcf1d3e6d95efdcbb7d10a..9b40a5fec4bebf6328481a919fee7b51968af402 100644 (file)
--- a/tests/mpz/t-fib_ui.c
+++ b/tests/mpz/t-fib_ui.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-fits.c b/tests/mpz/t-fits.c

index 3f08802ad28c991258cbd1c62dba2b4e01adef49..de349fe4c7a6d0a1d86a66b915cbef32cc876d25 100644 (file)
--- a/tests/mpz/t-fits.c
+++ b/tests/mpz/t-fits.c
@@ -3,20 +3,20 @@
  /*
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-gcd.c b/tests/mpz/t-gcd.c

index 687e0556c5689b32acfcad8b7166916112806b94..f0dd62853a1520ef1484abd3f887a03dc7653c23 100644 (file)
--- a/tests/mpz/t-gcd.c
+++ b/tests/mpz/t-gcd.c
@@ -1,22 +1,22 @@
  /* Test mpz_gcd, mpz_gcdext, and mpz_gcd_ui.
  
  Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005,
-2008, 2009 Free Software Foundation, Inc.
+2008, 2009, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,10 +25,29 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void one_test __GMP_PROTO ((mpz_t, mpz_t, mpz_t, int));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void one_test (mpz_t, mpz_t, mpz_t, int);
+void debug_mp (mpz_t, int);
+
+static int gcdext_valid_p (const mpz_t, const mpz_t, const mpz_t, const mpz_t);
+
+/* Keep one_test's variables global, so that we don't need
+   to reinitialize them for each test.  */
+mpz_t gcd1, gcd2, s, temp1, temp2, temp3;
+
+#define MAX_SCHOENHAGE_THRESHOLD HGCD_REDUCE_THRESHOLD
+
+/* Define this to make all operands be large enough for Schoenhage gcd
+   to be used.  */
+#ifndef WHACK_SCHOENHAGE
+#define WHACK_SCHOENHAGE 0
+#endif
+
+#if WHACK_SCHOENHAGE
+#define MIN_OPERAND_BITSIZE (MAX_SCHOENHAGE_THRESHOLD * GMP_NUMB_BITS)
+#else
+#define MIN_OPERAND_BITSIZE 1
+#endif
  
-static int gcdext_valid_p __GMP_PROTO ((const mpz_t a, const mpz_t b, const mpz_t g, const mpz_t s));
  
  void
  check_data (void)
@@ -47,10 +66,7 @@ check_data (void)
    mpz_t  a, b, got, want;
    int    i;
  
-  mpz_init (a);
-  mpz_init (b);
-  mpz_init (got);
-  mpz_init (want);
+  mpz_inits (a, b, got, want, NULL);
  
    for (i = 0; i < numberof (data); i++)
      {
@@ -72,76 +88,162 @@ check_data (void)
         }
      }
  
-  mpz_clear (a);
-  mpz_clear (b);
-  mpz_clear (got);
-  mpz_clear (want);
+  mpz_clears (a, b, got, want, NULL);
  }
  
-/* Keep one_test's variables global, so that we don't need
-   to reinitialize them for each test.  */
-mpz_t gcd1, gcd2, s, t, temp1, temp2, temp3;
+void
+make_chain_operands (mpz_t ref, mpz_t a, mpz_t b, gmp_randstate_t rs, int nb1, int nb2, int chain_len)
+{
+  mpz_t bs, temp1, temp2;
+  int j;
  
-#if GCD_DC_THRESHOLD > GCDEXT_DC_THRESHOLD
-#define MAX_SCHOENHAGE_THRESHOLD GCD_DC_THRESHOLD
-#else
-#define MAX_SCHOENHAGE_THRESHOLD GCDEXT_DC_THRESHOLD
-#endif
+  mpz_inits (bs, temp1, temp2, NULL);
  
-/* Define this to make all operands be large enough for Schoenhage gcd
-   to be used.  */
-#ifndef WHACK_SCHOENHAGE
-#define WHACK_SCHOENHAGE 0
-#endif
+  /* Generate a division chain backwards, allowing otherwise unlikely huge
+     quotients.  */
  
-#if WHACK_SCHOENHAGE
-#define MIN_OPERAND_BITSIZE (MAX_SCHOENHAGE_THRESHOLD * GMP_NUMB_BITS)
-#else
-#define MIN_OPERAND_BITSIZE 1
-#endif
+  mpz_set_ui (a, 0);
+  mpz_urandomb (bs, rs, 32);
+  mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb1 + 1);
+  mpz_rrandomb (b, rs, mpz_get_ui (bs));
+  mpz_add_ui (b, b, 1);
+  mpz_set (ref, b);
+
+  for (j = 0; j < chain_len; j++)
+    {
+      mpz_urandomb (bs, rs, 32);
+      mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb2 + 1);
+      mpz_rrandomb (temp2, rs, mpz_get_ui (bs) + 1);
+      mpz_add_ui (temp2, temp2, 1);
+      mpz_mul (temp1, b, temp2);
+      mpz_add (a, a, temp1);
+
+      mpz_urandomb (bs, rs, 32);
+      mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb2 + 1);
+      mpz_rrandomb (temp2, rs, mpz_get_ui (bs) + 1);
+      mpz_add_ui (temp2, temp2, 1);
+      mpz_mul (temp1, a, temp2);
+      mpz_add (b, b, temp1);
+    }
+
+  mpz_clears (bs, temp1, temp2, NULL);
+}
+
+/* Test operands from a table of seed data.  This variant creates the operands
+   using plain ol' mpz_rrandomb.  This is a hack for better coverage of the gcd
+   code, which depends on that the random number generators give the exact
+   numbers we expect.  */
+void
+check_kolmo1 (void)
+{
+  static const struct {
+    unsigned int seed;
+    int nb;
+    const char *want;
+  } data[] = {
+    { 59618, 38208, "5"},
+    { 76521, 49024, "3"},
+    { 85869, 54976, "1"},
+    { 99449, 63680, "1"},
+    {112453, 72000, "1"}
+  };
+
+  gmp_randstate_t rs;
+  mpz_t  bs, a, b, want;
+  int    i, unb, vnb, nb;
+
+  gmp_randinit_default (rs);
+
+  mpz_inits (bs, a, b, want, NULL);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      nb = data[i].nb;
+
+      gmp_randseed_ui (rs, data[i].seed);
+
+      mpz_urandomb (bs, rs, 32);
+      unb = mpz_get_ui (bs) % nb;
+      mpz_urandomb (bs, rs, 32);
+      vnb = mpz_get_ui (bs) % nb;
+
+      mpz_rrandomb (a, rs, unb);
+      mpz_rrandomb (b, rs, vnb);
+
+      mpz_set_str_or_abort (want, data[i].want, 0);
+
+      one_test (a, b, want, -1);
+    }
+
+  mpz_clears (bs, a, b, want, NULL);
+  gmp_randclear (rs);
+}
+
+/* Test operands from a table of seed data.  This variant creates the operands
+   using a division chain.  This is a hack for better coverage of the gcd
+   code, which depends on that the random number generators give the exact
+   numbers we expect.  */
+void
+check_kolmo2 (void)
+{
+  static const struct {
+    unsigned int seed;
+    int nb, chain_len;
+  } data[] = {
+    {  917, 15, 5 },
+    { 1032, 18, 6 },
+    { 1167, 18, 6 },
+    { 1174, 18, 6 },
+    { 1192, 18, 6 },
+  };
+
+  gmp_randstate_t rs;
+  mpz_t  bs, a, b, want;
+  int    i;
+
+  gmp_randinit_default (rs);
+
+  mpz_inits (bs, a, b, want, NULL);
+
+  for (i = 0; i < numberof (data); i++)
+    {
+      gmp_randseed_ui (rs, data[i].seed);
+      make_chain_operands (want, a, b, rs, data[i].nb, data[i].nb, data[i].chain_len);
+      one_test (a, b, want, -1);
+    }
+
+  mpz_clears (bs, a, b, want, NULL);
+  gmp_randclear (rs);
+}
  
  int
  main (int argc, char **argv)
  {
    mpz_t op1, op2, ref;
-  int i, j, chain_len;
+  int i, chain_len;
    gmp_randstate_ptr rands;
    mpz_t bs;
    unsigned long bsi, size_range;
-  int reps = 200;
+  long int reps = 200;
  
    tests_start ();
    TESTS_REPS (reps, argv, argc);
  
    rands = RANDS;
  
-  check_data ();
+  mpz_inits (bs, op1, op2, ref, gcd1, gcd2, temp1, temp2, temp3, s, NULL);
  
-  mpz_init (bs);
-  mpz_init (op1);
-  mpz_init (op2);
-  mpz_init (ref);
-  mpz_init (gcd1);
-  mpz_init (gcd2);
-  mpz_init (temp1);
-  mpz_init (temp2);
-  mpz_init (temp3);
-  mpz_init (s);
-  mpz_init (t);
+  check_data ();
+  check_kolmo1 ();
+  check_kolmo2 ();
  
    /* Testcase to exercise the u0 == u1 case in mpn_gcdext_lehmer_n. */
-  mpz_set_ui (op2, GMP_NUMB_MAX);
+  mpz_set_ui (op2, GMP_NUMB_MAX); /* FIXME: Huge limb doesn't always fit */
    mpz_mul_2exp (op1, op2, 100);
    mpz_add (op1, op1, op2);
    mpz_mul_ui (op2, op2, 2);
    one_test (op1, op2, NULL, -1);
  
-#if 0
-  mpz_set_str (op1, "4da8e405e0d2f70d6d679d3de08a5100a81ec2cff40f97b313ae75e1183f1df2b244e194ebb02a4ece50d943640a301f0f6cc7f539117b783c3f3a3f91649f8a00d2e1444d52722810562bce02fccdbbc8fe3276646e306e723dd3b", 16);
-  mpz_set_str (op2, "76429e12e4fdd8929d89c21657097fbac09d1dc08cf7f1323a34e78ca34226e1a7a29b86fee0fa7fe2cc2a183d46d50df1fe7029590974ad7da77605f35f902cb8b9b8d22dd881eaae5919675d49a337145a029c3b33fc2b0", 16);
-  one_test (op1, op2, NULL, -1);
-#endif
-
    for (i = 0; i < reps; i++)
      {
        /* Generate plain operands with unknown gcd.  These types of operands
@@ -176,58 +278,17 @@ main (int argc, char **argv)
        /* Generate a division chain backwards, allowing otherwise unlikely huge
          quotients.  */
  
-      mpz_set_ui (op1, 0);
        mpz_urandomb (bs, rands, 32);
-      mpz_urandomb (bs, rands, mpz_get_ui (bs) % 16 + 1);
-      mpz_rrandomb (op2, rands, mpz_get_ui (bs));
-      mpz_add_ui (op2, op2, 1);
-      mpz_set (ref, op2);
-
-#if WHACK_SCHOENHAGE
-      chain_len = 1000000;
-#else
+      chain_len = mpz_get_ui (bs) % LOG2C (GMP_NUMB_BITS * MAX_SCHOENHAGE_THRESHOLD);
        mpz_urandomb (bs, rands, 32);
-      chain_len = mpz_get_ui (bs) % (GMP_NUMB_BITS * MAX_SCHOENHAGE_THRESHOLD / 256);
-#endif
+      chain_len = mpz_get_ui (bs) % (1 << chain_len) / 32;
+
+      make_chain_operands (ref, op1, op2, rands, 16, 12, chain_len);
  
-      for (j = 0; j < chain_len; j++)
-       {
-         mpz_urandomb (bs, rands, 32);
-         mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
-         mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
-         mpz_add_ui (temp2, temp2, 1);
-         mpz_mul (temp1, op2, temp2);
-         mpz_add (op1, op1, temp1);
-
-         /* Don't generate overly huge operands.  */
-         if (SIZ (op1) > 3 * MAX_SCHOENHAGE_THRESHOLD)
-           break;
-
-         mpz_urandomb (bs, rands, 32);
-         mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
-         mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
-         mpz_add_ui (temp2, temp2, 1);
-         mpz_mul (temp1, op1, temp2);
-         mpz_add (op2, op2, temp1);
-
-         /* Don't generate overly huge operands.  */
-         if (SIZ (op2) > 3 * MAX_SCHOENHAGE_THRESHOLD)
-           break;
-       }
        one_test (op1, op2, ref, i);
      }
  
-  mpz_clear (bs);
-  mpz_clear (op1);
-  mpz_clear (op2);
-  mpz_clear (ref);
-  mpz_clear (gcd1);
-  mpz_clear (gcd2);
-  mpz_clear (temp1);
-  mpz_clear (temp2);
-  mpz_clear (temp3);
-  mpz_clear (s);
-  mpz_clear (t);
+  mpz_clears (bs, op1, op2, ref, gcd1, gcd2, temp1, temp2, temp3, s, NULL);
  
    tests_end ();
    exit (0);
@@ -243,7 +304,7 @@ void
  one_test (mpz_t op1, mpz_t op2, mpz_t ref, int i)
  {
    /*
-  printf ("%ld %ld %ld\n", SIZ (op1), SIZ (op2), SIZ (ref));
+  printf ("%d %d %d\n", SIZ (op1), SIZ (op2), ref != NULL ? SIZ (ref) : 0);
    fflush (stdout);
    */
  
@@ -370,7 +431,7 @@ gcdext_valid_p (const mpz_t a, const mpz_t b, const mpz_t g, const mpz_t s)
    if (mpz_cmpabs_ui (s, 1) > 0)
      {
        mpz_mul_2exp (temp3, s, 1);
-      if (mpz_cmpabs (temp3, temp2) > 0)
+      if (mpz_cmpabs (temp3, temp2) >= 0)
         return 0;
      }
  
@@ -386,7 +447,7 @@ gcdext_valid_p (const mpz_t a, const mpz_t b, const mpz_t g, const mpz_t s)
    if (mpz_cmpabs_ui (temp2, 1) > 0)
      {
        mpz_mul_2exp (temp2, temp2, 1);
-      if (mpz_cmpabs (temp2, temp1) > 0)
+      if (mpz_cmpabs (temp2, temp1) >= 0)
         return 0;
      }
    return 1;
diff --git a/tests/mpz/t-gcd_ui.c b/tests/mpz/t-gcd_ui.c

index ac6431ceb26c79adc5bae3bc64d44c50a4e1ea8d..d4de2ad262236a8fad522b8c4e14e38017cc4803 100644 (file)
--- a/tests/mpz/t-gcd_ui.c
+++ b/tests/mpz/t-gcd_ui.c
@@ -2,20 +2,20 @@
  
  Copyright 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-get_d.c b/tests/mpz/t-get_d.c

index 41c0ba8d20c7cc8489e37f5890f1b61ebeb201da..c9f2a90d98c1c4afec25b4d51b150336ac6e82e1 100644 (file)
--- a/tests/mpz/t-get_d.c
+++ b/tests/mpz/t-get_d.c
@@ -1,21 +1,21 @@
  /* Test mpz_get_d.
  
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -31,8 +31,8 @@ check_onebit (void)
    mpz_t   z;
    double  got, want;
    /* FIXME: It'd be better to base this on the float format. */
-#ifdef __vax
-  int     limit = 127;  /* vax fp numbers have limited range */
+#if defined (__vax) || defined (__vax__)
+  int     limit = 127 - 1;  /* vax fp numbers have limited range */
  #else
    int     limit = 512;
  #endif
diff --git a/tests/mpz/t-get_d_2exp.c b/tests/mpz/t-get_d_2exp.c

index 3b659f82c9834f778290052d3cad23775ae24a09..105e869728c61c9c1c56ba47b1c9a1ffff31bd22 100644 (file)
--- a/tests/mpz/t-get_d_2exp.c
+++ b/tests/mpz/t-get_d_2exp.c
@@ -1,21 +1,21 @@
  /* Test mpz_get_d_2exp.
  
-Copyright 2002, 2003 Free Software Foundation, Inc.
+Copyright 2002, 2003, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -24,6 +24,32 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "tests.h"
  
  
+static void
+check_zero (void)
+{
+  mpz_t   z;
+  double  got, want;
+  long    got_exp, want_exp;
+
+  mpz_init_set_ui (z, 0);
+
+  want = 0.0;
+  want_exp = 0;
+  got = mpz_get_d_2exp (&got_exp, z);
+  if (got != want || got_exp != want_exp)
+    {
+      printf    ("mpz_get_d_2exp wrong on zero\n");
+      mpz_trace ("   z    ", z);
+      d_trace   ("   want ", want);
+      d_trace   ("   got  ", got);
+      printf    ("   want exp %ld\n", want_exp);
+      printf    ("   got exp  %ld\n", got_exp);
+      abort();
+    }
+
+  mpz_clear (z);
+}
+
  static void
  check_onebit (void)
  {
@@ -187,6 +213,7 @@ main (void)
    tests_start ();
    mp_trace_base = -16;
  
+  check_zero ();
    check_onebit ();
    check_round ();
    check_rand ();
diff --git a/tests/mpz/t-get_si.c b/tests/mpz/t-get_si.c

index 0a9739bf0efa90997761a10e760ec83b8accc841..37bab0b19e6ca9217557a4842c632443cdc930ba 100644 (file)
--- a/tests/mpz/t-get_si.c
+++ b/tests/mpz/t-get_si.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-hamdist.c b/tests/mpz/t-hamdist.c

index 8109365966cf83896a7609dd1efe3575149d50f6..992034caa8a711d8c726c0d5b16fd80c5ac57129 100644 (file)
--- a/tests/mpz/t-hamdist.c
+++ b/tests/mpz/t-hamdist.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-import.c b/tests/mpz/t-import.c

index 0c7b929e332f1c314db924d4955daf2bd84b4ccb..59d682d6bc74ee5213ecd60932aecb4a994d1f76 100644 (file)
--- a/tests/mpz/t-import.c
+++ b/tests/mpz/t-import.c
@@ -2,20 +2,20 @@
  
  Copyright 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-inp_str.c b/tests/mpz/t-inp_str.c

index 7c0893752be248aedd642ddc5cc43f3f8febc6e8..f2656ac6aedbd2d9525c57588faac699ee12e07f 100644 (file)
--- a/tests/mpz/t-inp_str.c
+++ b/tests/mpz/t-inp_str.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
@@ -48,21 +48,31 @@ check_data (void)
      { "0",   10, "0", 1 },
  
      { "abc", 10, "0", 0 },
+    { "0xf", 10, "0", 1 },
      { "ghi", 16, "0", 0 },
+    { "100", 90, "0", 0 },
  
      {  "ff", 16,  "255", 2 },
      { "-ff", 16, "-255", 3 },
      {  "FF", 16,  "255", 2 },
      { "-FF", 16, "-255", 3 },
  
-    { "z", 36, "35", 1 },
-    { "Z", 36, "35", 1 },
+    {  "z", 36, "35", 1 },
+    {  "Z", 36, "35", 1 },
+    { "1B", 59, "70", 2 },
+    {  "a", 60, "36", 1 },
+    {  "A", 61, "10", 1 },
  
      {  "0x0",    0,   "0", 3 },
-    {  "0x10",   0,  "16", 4 },
-    { "-0x0",    0,   "0", 4 },
+    {  "0X10",   0,  "16", 4 },
+    { "-0X0",    0,   "0", 4 },
      { "-0x10",   0, "-16", 5 },
  
+    {  "0b0",    0,  "0", 3 },
+    {  "0B10",   0,  "2", 4 },
+    { "-0B0",    0,  "0", 4 },
+    { "-0b10",   0, "-2", 5 },
+
      {  "00",   0,  "0", 2 },
      {  "010",  0,  "8", 3 },
      { "-00",   0,  "0", 3 },
@@ -70,6 +80,7 @@ check_data (void)
  
      {  "0x",     0,   "0", 2 },
      {  "0",      0,   "0", 1 },
+    { " 030",   10,  "30", 4 },
    };
  
    mpz_t  got, want;
diff --git a/tests/mpz/t-invert.c b/tests/mpz/t-invert.c

index 842310de2be021dc369cda7e3d35fb72d4bc7cc5..223a743cb400212073ef7fda52ec5a519e214e92 100644 (file)
--- a/tests/mpz/t-invert.c
+++ b/tests/mpz/t-invert.c
@@ -3,20 +3,20 @@
  Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005,
  2008, 2009, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-io_raw.c b/tests/mpz/t-io_raw.c

index 433d7d64bd45446d1b6307b3a7d300e96f910118..4148eac32c67cb54bda49291f04025be753a003d 100644 (file)
--- a/tests/mpz/t-io_raw.c
+++ b/tests/mpz/t-io_raw.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
diff --git a/tests/mpz/t-jac.c b/tests/mpz/t-jac.c

index 1b3e092888aa64354961fd09d0e1d54a6c50da36..327a739a89532f37d334dbac19aa957ab3652546 100644 (file)
--- a/tests/mpz/t-jac.c
+++ b/tests/mpz/t-jac.c
@@ -2,20 +2,20 @@
  
  Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* With no arguments the various Kronecker/Jacobi symbol routines are
@@ -41,7 +41,6 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-
  #ifdef _LONG_LONG_LIMB
  #define LL(l,ll)  ll
  #else
@@ -199,6 +198,10 @@ try_pari (mpz_srcptr a, mpz_srcptr b, int answer)
  void
  try_each (mpz_srcptr a, mpz_srcptr b, int answer)
  {
+#if 0
+  fprintf(stderr, "asize = %d, bsize = %d\n",
+         mpz_sizeinbase (a, 2), mpz_sizeinbase (b, 2));
+#endif
    if (option_pari)
      {
        try_pari (a, b, answer);
@@ -613,6 +616,33 @@ check_data (void)
      /* special values inducing a==b==1 at the end of jac_or_kron() */
      { "0x10000000000000000000000000000000000000000000000001",
        "0x10000000000000000000000000000000000000000000000003", 1 },
+
+    /* Test for previous bugs in jacobi_2. */
+    { "0x43900000000", "0x42400000439", -1 }, /* 32-bit limbs */
+    { "0x4390000000000000000", "0x4240000000000000439", -1 }, /* 64-bit limbs */
+
+    { "198158408161039063", "198158360916398807", -1 },
+
+    /* Some tests involving large quotients in the continued fraction
+       expansion. */
+    { "37200210845139167613356125645445281805",
+      "451716845976689892447895811408978421929", -1 },
+    { "67674091930576781943923596701346271058970643542491743605048620644676477275152701774960868941561652032482173612421015",
+      "4902678867794567120224500687210807069172039735", 0 },
+    { "2666617146103764067061017961903284334497474492754652499788571378062969111250584288683585223600172138551198546085281683283672592", "2666617146103764067061017961903284334497474492754652499788571378062969111250584288683585223600172138551198546085281683290481773", 1 },
+
+    /* Exersizes the case asize == 1, btwos > 0 in mpz_jacobi. */
+    { "804609", "421248363205206617296534688032638102314410556521742428832362659824", 1 } ,
+    { "4190209", "2239744742177804210557442048984321017460028974602978995388383905961079286530650825925074203175536427000", 1 },
+
+    /* Exersizes the case asize == 1, btwos = 63 in mpz_jacobi
+       (relevant when GMP_LIMB_BITS == 64). */
+    { "17311973299000934401", "1675975991242824637446753124775689449936871337036614677577044717424700351103148799107651171694863695242089956242888229458836426332300124417011114380886016", 1 },
+    { "3220569220116583677", "41859917623035396746", -1 },
+
+    /* Other test cases that triggered bugs during development. */
+    { "37200210845139167613356125645445281805", "340116213441272389607827434472642576514", -1 },
+    { "74400421690278335226712251290890563610", "451716845976689892447895811408978421929", -1 },
    };
  
    int    i;
@@ -652,7 +682,7 @@ check_squares_zi (void)
    for (i = 0; i < 50; i++)
      {
        mpz_urandomb (bs, rands, 32);
-      size_range = mpz_get_ui (bs) % 10 + 2;
+      size_range = mpz_get_ui (bs) % 10 + i/8 + 2;
  
        mpz_urandomb (bs, rands, size_range);
        an = mpz_get_ui (bs);
@@ -719,6 +749,242 @@ check_a_zero (void)
  }
  
  
+/* Assumes that b = prod p_k^e_k */
+int
+ref_jacobi (mpz_srcptr a, mpz_srcptr b, unsigned nprime,
+           mpz_t prime[], unsigned *exp)
+{
+  unsigned i;
+  int res;
+
+  for (i = 0, res = 1; i < nprime; i++)
+    if (exp[i])
+      {
+       int legendre = refmpz_legendre (a, prime[i]);
+       if (!legendre)
+         return 0;
+       if (exp[i] & 1)
+         res *= legendre;
+      }
+  return res;
+}
+
+void
+check_jacobi_factored (void)
+{
+#define PRIME_N 10
+#define PRIME_MAX_SIZE 50
+#define PRIME_MAX_EXP 4
+#define PRIME_A_COUNT 10
+#define PRIME_B_COUNT 5
+#define PRIME_MAX_B_SIZE 2000
+
+  gmp_randstate_ptr rands = RANDS;
+  mpz_t prime[PRIME_N];
+  unsigned exp[PRIME_N];
+  mpz_t a, b, t, bs;
+  unsigned i;
+
+  mpz_init (a);
+  mpz_init (b);
+  mpz_init (t);
+  mpz_init (bs);
+
+  /* Generate primes */
+  for (i = 0; i < PRIME_N; i++)
+    {
+      mp_size_t size;
+      mpz_init (prime[i]);
+      mpz_urandomb (bs, rands, 32);
+      size = mpz_get_ui (bs) % PRIME_MAX_SIZE + 2;
+      mpz_rrandomb (prime[i], rands, size);
+      if (mpz_cmp_ui (prime[i], 3) <= 0)
+       mpz_set_ui (prime[i], 3);
+      else
+       mpz_nextprime (prime[i], prime[i]);
+    }
+
+  for (i = 0; i < PRIME_B_COUNT; i++)
+    {
+      unsigned j, k;
+      mp_bitcnt_t bsize;
+
+      mpz_set_ui (b, 1);
+      bsize = 1;
+
+      for (j = 0; j < PRIME_N && bsize < PRIME_MAX_B_SIZE; j++)
+       {
+         mpz_urandomb (bs, rands, 32);
+         exp[j] = mpz_get_ui (bs) % PRIME_MAX_EXP;
+         mpz_pow_ui (t, prime[j], exp[j]);
+         mpz_mul (b, b, t);
+         bsize = mpz_sizeinbase (b, 2);
+       }
+      for (k = 0; k < PRIME_A_COUNT; k++)
+       {
+         int answer;
+         mpz_rrandomb (a, rands, bsize + 2);
+         answer = ref_jacobi (a, b, j, prime, exp);
+         try_all (a, b, answer);
+       }
+    }
+  for (i = 0; i < PRIME_N; i++)
+    mpz_clear (prime[i]);
+
+  mpz_clear (a);
+  mpz_clear (b);
+  mpz_clear (t);
+  mpz_clear (bs);
+
+#undef PRIME_N
+#undef PRIME_MAX_SIZE
+#undef PRIME_MAX_EXP
+#undef PRIME_A_COUNT
+#undef PRIME_B_COUNT
+#undef PRIME_MAX_B_SIZE
+}
+
+/* These tests compute (a|n), where the quotient sequence includes
+   large quotients, and n has a known factorization. Such inputs are
+   generated as follows. First, construct a large n, as a power of a
+   prime p of moderate size.
+
+   Next, compute a matrix from factors (q,1;1,0), with q chosen with
+   uniformly distributed size. We must stop with matrix elements of
+   roughly half the size of n. Denote elements of M as M = (m00, m01;
+   m10, m11).
+
+   We now look for solutions to
+
+     n = m00 x + m01 y
+     a = m10 x + m11 y
+
+   with x,y > 0. Since n >= m00 * m01, there exists a positive
+   solution to the first equation. Find those x, y, and substitute in
+   the second equation to get a. Then the quotient sequence for (a|n)
+   is precisely the quotients used when constructing M, followed by
+   the quotient sequence for (x|y).
+
+   Numbers should also be large enough that we exercise hgcd_jacobi,
+   which means that they should be larger than
+
+     max (GCD_DC_THRESHOLD, 3 * HGCD_THRESHOLD)
+
+   With an n of roughly 40000 bits, this should hold on most machines.
+*/
+
+void
+check_large_quotients (void)
+{
+#define COUNT 50
+#define PBITS 200
+#define PPOWER 201
+#define MAX_QBITS 500
+
+  gmp_randstate_ptr rands = RANDS;
+
+  mpz_t p, n, q, g, s, t, x, y, bs;
+  mpz_t M[2][2];
+  mp_bitcnt_t nsize;
+  unsigned i;
+
+  mpz_init (p);
+  mpz_init (n);
+  mpz_init (q);
+  mpz_init (g);
+  mpz_init (s);
+  mpz_init (t);
+  mpz_init (x);
+  mpz_init (y);
+  mpz_init (bs);
+  mpz_init (M[0][0]);
+  mpz_init (M[0][1]);
+  mpz_init (M[1][0]);
+  mpz_init (M[1][1]);
+
+  /* First generate a number with known factorization, as a random
+     smallish prime raised to an odd power. Then (a|n) = (a|p). */
+  mpz_rrandomb (p, rands, PBITS);
+  mpz_nextprime (p, p);
+  mpz_pow_ui (n, p, PPOWER);
+
+  nsize = mpz_sizeinbase (n, 2);
+
+  for (i = 0; i < COUNT; i++)
+    {
+      unsigned j;
+      unsigned chain_len;
+      int answer;
+      mp_bitcnt_t msize;
+
+      mpz_set_ui (M[0][0], 1);
+      mpz_set_ui (M[0][1], 0);
+      mpz_set_ui (M[1][0], 0);
+      mpz_set_ui (M[1][1], 1);
+
+      for (msize = 1; 2*(msize + MAX_QBITS) + 1 < nsize ;)
+       {
+         unsigned i;
+         mpz_rrandomb (bs, rands, 32);
+         mpz_rrandomb (q, rands, 1 + mpz_get_ui (bs) % MAX_QBITS);
+
+         /* Multiply by (q, 1; 1,0) from the right */
+         for (i = 0; i < 2; i++)
+           {
+             mp_bitcnt_t size;
+             mpz_swap (M[i][0], M[i][1]);
+             mpz_addmul (M[i][0], M[i][1], q);
+             size = mpz_sizeinbase (M[i][0], 2);
+             if (size > msize)
+               msize = size;
+           }
+       }
+      mpz_gcdext (g, s, t, M[0][0], M[0][1]);
+      ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);
+
+      /* Solve n = M[0][0] * x + M[0][1] * y */
+      if (mpz_sgn (s) > 0)
+       {
+         mpz_mul (x, n, s);
+         mpz_fdiv_qr (q, x, x, M[0][1]);
+         mpz_mul (y, q, M[0][0]);
+         mpz_addmul (y, t, n);
+         ASSERT_ALWAYS (mpz_sgn (y) > 0);
+       }
+      else
+       {
+         mpz_mul (y, n, t);
+         mpz_fdiv_qr (q, y, y, M[0][0]);
+         mpz_mul (x, q, M[0][1]);
+         mpz_addmul (x, s, n);
+         ASSERT_ALWAYS (mpz_sgn (x) > 0);
+       }
+      mpz_mul (x, x, M[1][0]);
+      mpz_addmul (x, y, M[1][1]);
+
+      /* Now (x|n) has the selected large quotients */
+      answer = refmpz_legendre (x, p);
+      try_zi_zi (x, n, answer);
+    }
+  mpz_clear (p);
+  mpz_clear (n);
+  mpz_clear (q);
+  mpz_clear (g);
+  mpz_clear (s);
+  mpz_clear (t);
+  mpz_clear (x);
+  mpz_clear (y);
+  mpz_clear (bs);
+  mpz_clear (M[0][0]);
+  mpz_clear (M[0][1]);
+  mpz_clear (M[1][0]);
+  mpz_clear (M[1][1]);
+#undef COUNT
+#undef PBITS
+#undef PPOWER
+#undef MAX_QBITS
+}
+
  int
  main (int argc, char *argv[])
  {
@@ -741,7 +1007,8 @@ try(a,b,answer) =\n\
    check_data ();
    check_squares_zi ();
    check_a_zero ();
-
+  check_jacobi_factored ();
+  check_large_quotients ();
    tests_end ();
    exit (0);
  }
diff --git a/tests/mpz/t-lcm.c b/tests/mpz/t-lcm.c

index 195f0ee2f8249bae0751fafc09f0fec50032b9d0..c4342ba6d6c44d2e6e462c8f57e91f2fabcbd721 100644 (file)
--- a/tests/mpz/t-lcm.c
+++ b/tests/mpz/t-lcm.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #include <stdio.h>
@@ -111,6 +111,13 @@ check_primes (void)
    mpz_init (x);
    mpz_init (y);
  
+  /* Check zeros. */
+  mpz_set_ui (want, 0);
+  mpz_set_ui (x, 1);
+  check_all (want, want, want);
+  check_all (want, want, x);
+  check_all (want, x, want);
+
    /* New prime each time. */
    mpz_set_ui (want, 1L);
    for (i = 0; i < numberof (prime); i++)
diff --git a/tests/mpz/t-lucnum_ui.c b/tests/mpz/t-lucnum_ui.c

index 814034f13ed5eef5a9ca616a2179a95c23a19b7c..e2fc2804ccea989842e15631c45eec5d91bfa55c 100644 (file)
--- a/tests/mpz/t-lucnum_ui.c
+++ b/tests/mpz/t-lucnum_ui.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-mfac_uiui.c b/tests/mpz/t-mfac_uiui.c

new file mode 100644 (file)

index 0000000..932065b
--- /dev/null
+++ b/tests/mpz/t-mfac_uiui.c
@@ -0,0 +1,136 @@
+/* Exercise mpz_mfac_uiui.
+
+Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-mfac_uiui [x|num]
+
+   With no arguments testing goes up to the initial value of "limit" below.
+   With a number argument tests are carried that far, or with a literal "x"
+   tests are continued without limit (this being meant only for development
+   purposes).  */
+
+#define MULTIFAC_WHEEL (2*3*11)
+#define MULTIFAC_WHEEL2 (5*13)
+
+int
+main (int argc, char *argv[])
+{
+  mpz_t ref[MULTIFAC_WHEEL], ref2[MULTIFAC_WHEEL2], res;
+  unsigned long n, j, m, m2;
+  unsigned long limit = 2222, step = 1;
+
+  tests_start ();
+
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else if (argc > 1)
+    limit = atoi (argv[1]);
+
+  /* for small limb testing */
+  limit = MIN (limit, MP_LIMB_T_MAX);
+
+  for (m = 0; m < MULTIFAC_WHEEL; m++)
+    mpz_init_set_ui(ref [m],1);
+  for (m2 = 0; m2 < MULTIFAC_WHEEL2; m2++)
+    mpz_init_set_ui(ref2 [m2],1);
+
+  mpz_init (res);
+
+  m = 0;
+  m2 = 0;
+  for (n = 0; n <= limit;)
+    {
+      mpz_mfac_uiui (res, n, MULTIFAC_WHEEL);
+      MPZ_CHECK_FORMAT (res);
+      if (mpz_cmp (ref[m], res) != 0)
+        {
+          printf ("mpz_mfac_uiui(%lu,%d) wrong\n", n, MULTIFAC_WHEEL);
+          printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, ref[m]); printf("\n");
+          abort ();
+        }
+      mpz_mfac_uiui (res, n, MULTIFAC_WHEEL2);
+      MPZ_CHECK_FORMAT (res);
+      if (mpz_cmp (ref2[m2], res) != 0)
+        {
+          printf ("mpz_mfac_uiui(%lu,%d) wrong\n", n, MULTIFAC_WHEEL2);
+          printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, ref2[m2]); printf("\n");
+          abort ();
+        }
+      if (n + step <= limit)
+       for (j = 0; j < step; j++) {
+         n++; m++; m2++;
+         if (m >= MULTIFAC_WHEEL) m -= MULTIFAC_WHEEL;
+         if (m2 >= MULTIFAC_WHEEL2) m2 -= MULTIFAC_WHEEL2;
+         mpz_mul_ui (ref[m], ref[m], n); /* Compute a reference, with current library */
+         mpz_mul_ui (ref2[m2], ref2[m2], n); /* Compute a reference, with current library */
+       }
+      else n += step;
+    }
+  mpz_fac_ui (ref[0], n);
+  mpz_mfac_uiui (res, n, 1);
+  MPZ_CHECK_FORMAT (res);
+  if (mpz_cmp (ref[0], res) != 0)
+    {
+      printf ("mpz_mfac_uiui(%lu,1) wrong\n", n);
+      printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+      printf ("  want "); mpz_out_str (stdout, 10, ref[0]); printf("\n");
+      abort ();
+    }
+
+  mpz_2fac_ui (ref[0], n);
+  mpz_mfac_uiui (res, n, 2);
+  MPZ_CHECK_FORMAT (res);
+  if (mpz_cmp (ref[0], res) != 0)
+    {
+      printf ("mpz_mfac_uiui(%lu,1) wrong\n", n);
+      printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+      printf ("  want "); mpz_out_str (stdout, 10, ref[0]); printf("\n");
+      abort ();
+    }
+
+  n++;
+  mpz_2fac_ui (ref[0], n);
+  mpz_mfac_uiui (res, n, 2);
+  MPZ_CHECK_FORMAT (res);
+  if (mpz_cmp (ref[0], res) != 0)
+    {
+      printf ("mpz_mfac_uiui(%lu,2) wrong\n", n);
+      printf ("  got  "); mpz_out_str (stdout, 10, res); printf("\n");
+      printf ("  want "); mpz_out_str (stdout, 10, ref[0]); printf("\n");
+      abort ();
+    }
+
+  for (m = 0; m < MULTIFAC_WHEEL; m++)
+    mpz_clear (ref[m]);
+  for (m2 = 0; m2 < MULTIFAC_WHEEL2; m2++)
+    mpz_clear (ref2[m2]);
+  mpz_clear (res);
+
+  tests_end ();
+
+  exit (0);
+}
diff --git a/tests/mpz/t-mul.c b/tests/mpz/t-mul.c

index fc718bb2622ceb2dfb0163920cc7d06af6581e6f..d496628fd9e477becddc11b6100f621fa52e6055 100644 (file)
--- a/tests/mpz/t-mul.c
+++ b/tests/mpz/t-mul.c
@@ -3,20 +3,20 @@
  Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004 Free
  Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -26,9 +26,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "longlong.h"
  #include "tests.h"
  
-void debug_mp __GMP_PROTO ((mpz_t));
-static void refmpz_mul __GMP_PROTO ((mpz_t, const mpz_t, const mpz_t));
-void dump_abort __GMP_PROTO ((int, char *, mpz_t, mpz_t, mpz_t, mpz_t));
+void debug_mp (mpz_t);
+static void refmpz_mul (mpz_t, const mpz_t, const mpz_t);
+void dump_abort (int, const char *, mpz_t, mpz_t, mpz_t, mpz_t);
  
  #define FFT_MIN_BITSIZE 100000
  
@@ -178,7 +178,7 @@ refmpz_mul (mpz_t w, const mpz_t u, const mpz_t v)
  }
  
  void
-dump_abort (int i, char *s,
+dump_abort (int i, const char *s,
              mpz_t op1, mpz_t op2, mpz_t product, mpz_t ref_product)
  {
    mp_size_t b, e;
diff --git a/tests/mpz/t-mul_i.c b/tests/mpz/t-mul_i.c

index bf95a153acf49096a65f46ff52ee63a8f82a21eb..dd6208f1f979e1f93ca0d597993bfe4a205905a9 100644 (file)
--- a/tests/mpz/t-mul_i.c
+++ b/tests/mpz/t-mul_i.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-nextprime.c b/tests/mpz/t-nextprime.c

index 1734f6149678984ef00f67615b78aaac10b7204f..6799fc0cb9d9e908b7faea1e107a13827efa6e8f 100644 (file)
--- a/tests/mpz/t-nextprime.c
+++ b/tests/mpz/t-nextprime.c
@@ -2,20 +2,20 @@
  
  Copyright 2009 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #include <stdio.h>
@@ -34,7 +34,7 @@ refmpz_nextprime (mpz_ptr p, mpz_srcptr t)
  }
  
  void
-run (char *start, int reps, char *end, short diffs[])
+run (const char *start, int reps, const char *end, short diffs[])
  {
    mpz_t x, y;
    int i;
diff --git a/tests/mpz/t-oddeven.c b/tests/mpz/t-oddeven.c

index 09e2c938718290fd138830ae01b8cb7937f0f8f1..f7568e55806a4a43706f7ccc9585d2a37891a2da 100644 (file)
--- a/tests/mpz/t-oddeven.c
+++ b/tests/mpz/t-oddeven.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-perfpow.c b/tests/mpz/t-perfpow.c

index f603089fa5861e3beaed39b8819011a18ed07067..14a3e2201f28845bcc0c336d6c18679c7d02a08d 100644 (file)
--- a/tests/mpz/t-perfpow.c
+++ b/tests/mpz/t-perfpow.c
@@ -4,20 +4,20 @@
  
  Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -28,7 +28,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  struct
  {
-  char *num_as_str;
+  const char *num_as_str;
    char want;
  } tests[] =
    {
@@ -233,7 +233,7 @@ main (int argc, char **argv)
  
    check_tests ();
  
-  n_tests = 1000;
+  n_tests = 500;
    if (argc == 2)
      n_tests = atoi (argv[1]);
    check_random (n_tests);
diff --git a/tests/mpz/t-perfsqr.c b/tests/mpz/t-perfsqr.c

index f5fa15bfaa41c6628c64ce8580ae496aab308a32..92eff66198907a097c7d6b02b3efd46008c2658c 100644 (file)
--- a/tests/mpz/t-perfsqr.c
+++ b/tests/mpz/t-perfsqr.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-popcount.c b/tests/mpz/t-popcount.c

index 313bc076b253f2076159f567134a58381dbb131d..81f2366e515ebf1e6e6796174ff553423cced487 100644 (file)
--- a/tests/mpz/t-popcount.c
+++ b/tests/mpz/t-popcount.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2005 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -84,7 +84,7 @@ check_data (void)
           printf ("         0x"); mpz_out_str (stdout, 16, n); printf ("\n");
           printf ("   got   %lu\n", got);
           printf ("   want  %lu\n", data[i].want);
-         abort();
+         abort ();
         }
      }
    mpz_clear (n);
@@ -147,7 +147,6 @@ check_random (void)
           printf ("         0x"); mpz_out_str (stdout, 16, arg); printf ("\n");
           printf ("   got   %lu\n", got);
           printf ("   want  %lu\n", ref);
-         abort();
           abort ();
         }
      }
diff --git a/tests/mpz/t-pow.c b/tests/mpz/t-pow.c

index d6373ea9af1a13332a71ed5449187ec1623c83c6..36418e4847b8e03c2aa25bf124a3a40711e64429 100644 (file)
--- a/tests/mpz/t-pow.c
+++ b/tests/mpz/t-pow.c
@@ -2,20 +2,20 @@
  
  Copyright 1997, 1999, 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-powm.c b/tests/mpz/t-powm.c

index b14d0984ccad27e2e04b9149a4943adf796abee4..29c513ab63d1c05fb1d642fd51a1d2f47e96ec4d 100644 (file)
--- a/tests/mpz/t-powm.c
+++ b/tests/mpz/t-powm.c
@@ -3,20 +3,20 @@
  Copyright 1991, 1993, 1994, 1996, 1999, 2000, 2001, 2009, 2012 Free Software
  Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -26,14 +26,14 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void debug_mp (mpz_t, int);
  
  #define SIZEM 13
  
  /* Check that all sizes up to just above MUL_TOOM22_THRESHOLD have been tested
     a few times.  FIXME: If SIZEM is set too low, this will never happen.  */
  int
-allsizes_seen (int *allsizes)
+allsizes_seen (unsigned int *allsizes)
  {
    mp_size_t i;
  
diff --git a/tests/mpz/t-powm_ui.c b/tests/mpz/t-powm_ui.c

index 224e60481efdfd1c61198b71073041cb80fdd06f..bfa7433c6011e78eb28aeb39eb8b3083a0c63a09 100644 (file)
--- a/tests/mpz/t-powm_ui.c
+++ b/tests/mpz/t-powm_ui.c
@@ -1,22 +1,22 @@
  /* Test mpz_powm_ui, mpz_mul, mpz_mod.
  
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2013 Free Software
  Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,8 +25,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void dump_abort __GMP_PROTO ((mpz_t, mpz_t));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void debug_mp (mpz_t, int);
  
  int
  main (int argc, char **argv)
@@ -36,7 +35,7 @@ main (int argc, char **argv)
    mp_size_t base_size, exp_size, mod_size;
    unsigned long int exp2;
    int i;
-  int reps = 1000;
+  int reps = 100;
    gmp_randstate_ptr rands;
    mpz_t bs;
    unsigned long bsi, size_range;
@@ -59,7 +58,7 @@ main (int argc, char **argv)
    for (i = 0; i < reps; i++)
      {
        mpz_urandomb (bs, rands, 32);
-      size_range = mpz_get_ui (bs) % 13 + 2;
+      size_range = mpz_get_ui (bs) % 18 + 2;
  
        do  /* Loop until mathematically well-defined.  */
         {
@@ -145,15 +144,6 @@ main (int argc, char **argv)
    exit (0);
  }
  
-void
-dump_abort (mpz_t dividend, mpz_t divisor)
-{
-  fprintf (stderr, "ERROR\n");
-  fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
-  fprintf (stderr, "divisor  = "); debug_mp (divisor, -16);
-  abort();
-}
-
  void
  debug_mp (mpz_t x, int base)
  {
diff --git a/tests/mpz/t-pprime_p.c b/tests/mpz/t-pprime_p.c

index 09073796815ee899a3fdb18b3620aae9f48bffed..7ef7c77cbbcecb767f4f67bfcf20b9f0f5ca3124 100644 (file)
--- a/tests/mpz/t-pprime_p.c
+++ b/tests/mpz/t-pprime_p.c
@@ -2,20 +2,20 @@
  
  Copyright 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-primorial_ui.c b/tests/mpz/t-primorial_ui.c

new file mode 100644 (file)

index 0000000..9fa5527
--- /dev/null
+++ b/tests/mpz/t-primorial_ui.c
@@ -0,0 +1,97 @@
+/* Exercise mpz_primorial_ui.
+
+Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-primorial_ui [x|num]
+
+   With no arguments testing goes up to the initial value of "limit" below.
+   With a number argument tests are carried that far, or with a literal "x"
+   tests are continued without limit (this being meant only for development
+   purposes).  */
+
+static int isprime (unsigned long int t);
+
+int
+main (int argc, char *argv[])
+{
+  unsigned long  n;
+  unsigned long  limit = 2222;
+  mpz_t          f, r;
+
+  tests_start ();
+
+  if (argc > 1 && argv[1][0] == 'x')
+    limit = ULONG_MAX;
+  else if (argc > 1)
+    limit = atoi (argv[1]);
+
+  /* for small limb testing */
+  limit = MIN (limit, MP_LIMB_T_MAX);
+
+  mpz_init_set_ui (f, 1);  /* 0# = 1 */
+  mpz_init (r);
+
+  for (n = 0; n < limit; n++)
+    {
+      mpz_primorial_ui (r, n);
+      MPZ_CHECK_FORMAT (r);
+
+      if (mpz_cmp (f, r) != 0)
+        {
+          printf ("mpz_primorial_ui(%lu) wrong\n", n);
+          printf ("  got  "); mpz_out_str (stdout, 10, r); printf("\n");
+          printf ("  want "); mpz_out_str (stdout, 10, f); printf("\n");
+          abort ();
+        }
+
+      if (isprime (n+1))
+       mpz_mul_ui (f, f, n+1);  /* p# = (p-1)# * (p) */
+    }
+
+  mpz_clear (f);
+  mpz_clear (r);
+
+  tests_end ();
+
+  exit (0);
+}
+
+static int
+isprime (unsigned long int t)
+{
+  unsigned long int q, r, d;
+
+  if (t < 3 || (t & 1) == 0)
+    return t == 2;
+
+  for (d = 3, r = 1; r != 0; d += 2)
+    {
+      q = t / d;
+      r = t - q * d;
+      if (q < d)
+       return 1;
+    }
+  return 0;
+}
diff --git a/tests/mpz/t-remove.c b/tests/mpz/t-remove.c

new file mode 100644 (file)

index 0000000..49635ee
--- /dev/null
+++ b/tests/mpz/t-remove.c
@@ -0,0 +1,147 @@
+/* Test mpz_remove.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2009, 2012, 2013
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp (mpz_t);
+unsigned long int mpz_refremove (mpz_t, const mpz_t, const mpz_t);
+
+int
+main (int argc, char **argv)
+{
+  unsigned long int exp;
+  mpz_t t, dest, refdest, dividend, divisor;
+  mp_size_t dividend_size, divisor_size;
+  int i;
+  int reps = 1000;
+  unsigned long int pwr, refpwr;
+  gmp_randstate_ptr rands;
+  mpz_t bs;
+  unsigned long size_range;
+
+  tests_start ();
+  rands = RANDS;
+
+  if (argc == 2)
+    reps = atoi (argv[1]);
+
+  mpz_inits (bs, t, dest, refdest, dividend, divisor, NULL);
+
+  for (i = 0; i < reps; i++)
+    {
+      mpz_urandomb (bs, rands, 32);
+      size_range = mpz_get_ui (bs) % 18 + 1; /* 1..524288 bit operands */
+
+      do
+       {
+         mpz_urandomb (bs, rands, size_range);
+         divisor_size = mpz_get_ui (bs);
+         mpz_rrandomb (divisor, rands, divisor_size);
+       }
+      while (mpz_sgn (divisor) == 0);
+
+      mpz_urandomb (bs, rands, size_range);
+      dividend_size = mpz_get_ui (bs) + divisor_size;
+      mpz_rrandomb (dividend, rands, dividend_size);
+
+      mpz_urandomb (bs, rands, 32);
+      exp = mpz_get_ui (bs) % (5 + 10000 / mpz_sizeinbase (divisor, 2));
+      if (mpz_get_ui (bs) & 2)
+       mpz_neg (divisor, divisor);
+      mpz_pow_ui (t, divisor, exp);
+      mpz_mul (dividend, dividend, t);
+
+      refpwr = mpz_refremove (refdest, dividend, divisor);
+      pwr = mpz_remove (dest, dividend, divisor);
+
+      if (refpwr != pwr || mpz_cmp (refdest, dest) != 0)
+       {
+         fprintf (stderr, "ERROR after %d tests\n", i);
+         fprintf (stderr, "refpower = %lu\n", refpwr);
+         fprintf (stderr, "   power = %lu\n", pwr);
+         fprintf (stderr, "    op1 = "); debug_mp (dividend);
+         fprintf (stderr, "    op2 = "); debug_mp (divisor);
+         fprintf (stderr, "refdest = "); debug_mp (refdest);
+         fprintf (stderr, "   dest = "); debug_mp (dest);
+         abort ();
+       }
+    }
+
+  mpz_clears (bs, t, dest, refdest, dividend, divisor, NULL);
+
+  tests_end ();
+  exit (0);
+}
+
+unsigned long int
+mpz_refremove (mpz_t dest, const mpz_t src, const mpz_t f)
+{
+  unsigned long int pwr;
+
+  pwr = 0;
+
+  mpz_set (dest, src);
+  if (mpz_cmpabs_ui (f, 1) > 0)
+    {
+      mpz_t rem, x;
+
+      mpz_init (x);
+      mpz_init (rem);
+
+      for (;; pwr++)
+       {
+         mpz_tdiv_qr (x, rem, dest, f);
+         if (mpz_cmp_ui (rem, 0) != 0)
+           break;
+         mpz_swap (dest, x);
+       }
+
+      mpz_clear (x);
+      mpz_clear (rem);
+    }
+
+  return pwr;
+}
+
+void
+debug_mp (mpz_t x)
+{
+  size_t siz = mpz_sizeinbase (x, 16);
+
+  if (siz > 65)
+    {
+      mpz_t q;
+      mpz_init (q);
+      mpz_tdiv_q_2exp (q, x, 4 * (mpz_sizeinbase (x, 16) - 25));
+      gmp_fprintf (stderr, "%ZX...", q);
+      mpz_tdiv_r_2exp (q, x, 4 * 25);
+      gmp_fprintf (stderr, "%025ZX [%d]\n", q, (int) siz);
+      mpz_clear (q);
+    }
+  else
+    {
+      gmp_fprintf (stderr, "%ZX\n", x);
+    }
+}
diff --git a/tests/mpz/t-root.c b/tests/mpz/t-root.c

index c1a2b64330cd3765f44417be1616452c75d66dc7..9e6a5849e8307f8975e9d3eb575d493ba734efa2 100644 (file)
--- a/tests/mpz/t-root.c
+++ b/tests/mpz/t-root.c
@@ -2,20 +2,20 @@
  
  Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2009 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -24,7 +24,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void debug_mp (mpz_t, int);
  
  void
  check_one (mpz_t root1, mpz_t x2, unsigned long nth, int i)
@@ -49,7 +49,7 @@ check_one (mpz_t root1, mpz_t x2, unsigned long nth, int i)
    mpz_add (temp2, temp, rem2);
  
    /* Is power of result > argument?  */
-  if (mpz_cmp (root1, root2) != 0 || mpz_cmp (x2, temp2) != 0 || mpz_cmp (temp, x2) > 0)
+  if (mpz_cmp (root1, root2) != 0 || mpz_cmp (x2, temp2) != 0 || mpz_cmpabs (temp, x2) > 0)
      {
        fprintf (stderr, "ERROR after test %d\n", i);
        debug_mp (x2, 10);
@@ -68,7 +68,7 @@ check_one (mpz_t root1, mpz_t x2, unsigned long nth, int i)
        abort ();
      }
  
-  if (nth <= 10000)            /* skip too expensive test */
+  if (nth <= 10000 && mpz_sgn(x2) > 0)         /* skip too expensive test */
      {
        mpz_add_ui (temp2, root1, 1L);
        mpz_pow_ui (temp2, temp2, nth);
@@ -150,6 +150,13 @@ main (int argc, char **argv)
         }
  
        check_one (root1, x2, nth, i);
+
+      if (((nth & 1) != 0) && ((bsi & 2) != 0))
+       {
+         mpz_neg (x2, x2);
+         mpz_neg (root1, root1);
+         check_one (root1, x2, nth, i);
+       }
      }
  
    mpz_clear (bs);
diff --git a/tests/mpz/t-scan.c b/tests/mpz/t-scan.c

index 8ae97cbea3a1b6c34951fd582557da325fb1e058..90901df9fb2c8b2c2812cee2fc56f72b6a6ace80 100644 (file)
--- a/tests/mpz/t-scan.c
+++ b/tests/mpz/t-scan.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-set_d.c b/tests/mpz/t-set_d.c

index c4d646dad1ef0f785e9e0ef3942d5ddb3c9fbbb5..e5f2f9d996ac9a846855e33651ad2befb9c5bf32 100644 (file)
--- a/tests/mpz/t-set_d.c
+++ b/tests/mpz/t-set_d.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-set_f.c b/tests/mpz/t-set_f.c

index 35cb61c87324528461ae34f2283472813a3d5a72..9c0699a7626d295c0a2af182ac6f4d59db43f01d 100644 (file)
--- a/tests/mpz/t-set_f.c
+++ b/tests/mpz/t-set_f.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-set_si.c b/tests/mpz/t-set_si.c

index 7af40e1b727a28173b986dab2dcf04f7ed72ca53..e9da0c77e59b34b9555259f15135591ccbcc064f 100644 (file)
--- a/tests/mpz/t-set_si.c
+++ b/tests/mpz/t-set_si.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-set_str.c b/tests/mpz/t-set_str.c

index cb589ca9da4a8b45e1cf62306304ec76cc453742..0eb27e747664ea099eefb5469ba3617cc4c88d7b 100644 (file)
--- a/tests/mpz/t-set_str.c
+++ b/tests/mpz/t-set_str.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,7 +25,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  
  void
-check_one (mpz_srcptr want, int base, const char *str)
+check_one (mpz_srcptr want, int fail, int base, const char *str)
  {
    mpz_t   got;
  
@@ -34,7 +34,7 @@ check_one (mpz_srcptr want, int base, const char *str)
  
    mpz_init (got);
  
-  if (mpz_set_str (got, str, base) != 0)
+  if (mpz_set_str (got, str, base) != fail)
      {
        printf ("mpz_set_str unexpectedly failed\n");
        printf ("  base %d\n", base);
@@ -43,7 +43,7 @@ check_one (mpz_srcptr want, int base, const char *str)
      }
    MPZ_CHECK_FORMAT (got);
  
-  if (mpz_cmp (got, want) != 0)
+  if (fail == 0 && mpz_cmp (got, want) != 0)
      {
        printf ("mpz_set_str wrong\n");
        printf ("  base %d\n", base);
@@ -64,21 +64,35 @@ check_samples (void)
    mpz_init (z);
  
    mpz_set_ui (z, 0L);
-  check_one (z, 0, "0 ");
-  check_one (z, 0, "0    ");
-  check_one (z, 10, "0 ");
-  check_one (z, 10, "0    ");
-  check_one (z, 10, "0000000    ");
+  check_one (z, 0, 0, "0 ");
+  check_one (z, 0, 0, " 0 0 0 ");
+  check_one (z, 0, 0, " -0B 0 ");
+  check_one (z, 0, 0, "  0X 0 ");
+  check_one (z, 0, 10, "0 ");
+  check_one (z, 0, 10, "-0   ");
+  check_one (z, 0, 10, " 0 000 000    ");
  
    mpz_set_ui (z, 123L);
-  check_one (z, 0, "123 ");
-  check_one (z, 0, "123    ");
-  check_one (z, 10, "123 ");
-  check_one (z, 10, "123    ");
-  check_one (z, 0, " 123 ");
-  check_one (z, 0, "  123    ");
-  check_one (z, 10, "  0000123 ");
-  check_one (z, 10, "  123    ");
+  check_one (z, 0, 0, "123 ");
+  check_one (z, 0, 0, "123    ");
+  check_one (z, 0, 0, "0173   ");
+  check_one (z, 0, 0, " 0b 1 11 10 11  ");
+  check_one (z, 0, 0, " 0x 7b ");
+  check_one (z, 0, 0, "0x7B");
+  check_one (z, 0, 10, "123 ");
+  check_one (z, 0, 10, "123    ");
+  check_one (z, 0, 0, " 123 ");
+  check_one (z, 0, 0, "  123    ");
+  check_one (z, 0, 10, "  0000123 ");
+  check_one (z, 0, 10, "  123    ");
+  check_one (z,-1, 10, "1%");
+  check_one (z,-1, 0, "3!");
+  check_one (z,-1, 0, "0123456789");
+  check_one (z,-1, 0, "13579BDF");
+  check_one (z,-1, 0, "0b0102");
+  check_one (z,-1, 0, "0x010G");
+  check_one (z,-1, 37,"0x010G");
+  check_one (z,-1, 99,"0x010G");
  
    mpz_clear (z);
  }
diff --git a/tests/mpz/t-sizeinbase.c b/tests/mpz/t-sizeinbase.c

index c9d0f6c98c2f56fce1050fc4e02b072f1ade5977..a328a4a6205993219c77a76ca25e18a79cdef2d6 100644 (file)
--- a/tests/mpz/t-sizeinbase.c
+++ b/tests/mpz/t-sizeinbase.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/mpz/t-sqrtrem.c b/tests/mpz/t-sqrtrem.c

index ce97532c728a914fa36908ebb9434c53b1a40eea..815887f96ad5f460fdf84cdc534113e90e56e6aa 100644 (file)
--- a/tests/mpz/t-sqrtrem.c
+++ b/tests/mpz/t-sqrtrem.c
@@ -3,20 +3,20 @@
  Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
  Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void dump_abort __GMP_PROTO ((mpz_t, mpz_t, mpz_t));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (mpz_t, mpz_t, mpz_t);
+void debug_mp (mpz_t, int);
  
  int
  main (int argc, char **argv)
diff --git a/tests/mpz/t-tdiv.c b/tests/mpz/t-tdiv.c

index c94eff8d45cdac6190c263fd2eec1d5f1f600df6..f215f093999b556d0c5fbe68be9be786159cded8 100644 (file)
--- a/tests/mpz/t-tdiv.c
+++ b/tests/mpz/t-tdiv.c
@@ -3,20 +3,20 @@
  
  Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void dump_abort __GMP_PROTO ((mpz_t, mpz_t));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (mpz_t, mpz_t);
+void debug_mp (mpz_t, int);
  
  int
  main (int argc, char **argv)
diff --git a/tests/mpz/t-tdiv_ui.c b/tests/mpz/t-tdiv_ui.c

index 8ceaac1e7c5156799eb0f6d4612eb0c507f640be..ccb5814650c2d01e9f9205b6c640cd69934eb9e7 100644 (file)
--- a/tests/mpz/t-tdiv_ui.c
+++ b/tests/mpz/t-tdiv_ui.c
@@ -3,20 +3,20 @@
  
  Copyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -25,8 +25,8 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "gmp-impl.h"
  #include "tests.h"
  
-void dump_abort __GMP_PROTO ((char *, mpz_t, unsigned long));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (const char *, mpz_t, unsigned long);
+void debug_mp (mpz_t, int);
  
  int
  main (int argc, char **argv)
@@ -144,7 +144,7 @@ main (int argc, char **argv)
  }
  
  void
-dump_abort (char *str, mpz_t dividend, unsigned long divisor)
+dump_abort (const char *str, mpz_t dividend, unsigned long divisor)
  {
    fprintf (stderr, "ERROR: %s\n", str);
    fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
diff --git a/tests/rand/Makefile.am b/tests/rand/Makefile.am

index dd995db1b2281ef6e243b870ce389709ce121287..0f1371c90e8dc928682d7250d56d82321ed3dcb0 100644 (file)
--- a/tests/rand/Makefile.am
+++ b/tests/rand/Makefile.am
@@ -2,20 +2,20 @@
  
  # Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  
  
  INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
diff --git a/tests/rand/Makefile.in b/tests/rand/Makefile.in

index 5fb32356d9c4a1247ee458091aaf29ed76f1ec82..6d160160dfcc26930d1411c4918358b1e144579e 100644 (file)
--- a/tests/rand/Makefile.in
+++ b/tests/rand/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -17,21 +17,38 @@
  
  # Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
  #
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
  #
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
  #
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
  #
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -50,7 +67,6 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  check_PROGRAMS = t-iset$(EXEEXT) t-lc2exp$(EXEEXT) t-mt$(EXEEXT) \
         t-rand$(EXEEXT) t-urbui$(EXEEXT) t-urmui$(EXEEXT) \
         t-urndmm$(EXEEXT)
@@ -60,7 +76,7 @@ subdir = tests/rand
  DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -70,17 +86,17 @@ CONFIG_CLEAN_VPATH_FILES =
  am__DEPENDENCIES_1 =
  libstat_la_DEPENDENCIES = $(top_builddir)/libgmp.la \
         $(am__DEPENDENCIES_1)
-am_libstat_la_OBJECTS = statlib$U.lo zdiv_round$U.lo
+am_libstat_la_OBJECTS = statlib.lo zdiv_round.lo
  libstat_la_OBJECTS = $(am_libstat_la_OBJECTS)
  findlc_SOURCES = findlc.c
-findlc_OBJECTS = findlc$U.$(OBJEXT)
+findlc_OBJECTS = findlc.$(OBJEXT)
  findlc_DEPENDENCIES = libstat.la
  gen_SOURCES = gen.c
-gen_OBJECTS = gen$U.$(OBJEXT)
+gen_OBJECTS = gen.$(OBJEXT)
  gen_LDADD = $(LDADD)
  gen_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
-am_gen_static_OBJECTS = gen$U.$(OBJEXT)
+am_gen_static_OBJECTS = gen.$(OBJEXT)
  gen_static_OBJECTS = $(am_gen_static_OBJECTS)
  gen_static_LDADD = $(LDADD)
  gen_static_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
@@ -89,43 +105,43 @@ gen_static_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
         $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
         $(gen_static_LDFLAGS) $(LDFLAGS) -o $@
  spect_SOURCES = spect.c
-spect_OBJECTS = spect$U.$(OBJEXT)
+spect_OBJECTS = spect.$(OBJEXT)
  spect_DEPENDENCIES = libstat.la
  stat_SOURCES = stat.c
-stat_OBJECTS = stat$U.$(OBJEXT)
+stat_OBJECTS = stat.$(OBJEXT)
  stat_DEPENDENCIES = libstat.la
  t_iset_SOURCES = t-iset.c
-t_iset_OBJECTS = t-iset$U.$(OBJEXT)
+t_iset_OBJECTS = t-iset.$(OBJEXT)
  t_iset_LDADD = $(LDADD)
  t_iset_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_lc2exp_SOURCES = t-lc2exp.c
-t_lc2exp_OBJECTS = t-lc2exp$U.$(OBJEXT)
+t_lc2exp_OBJECTS = t-lc2exp.$(OBJEXT)
  t_lc2exp_LDADD = $(LDADD)
  t_lc2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_mt_SOURCES = t-mt.c
-t_mt_OBJECTS = t-mt$U.$(OBJEXT)
+t_mt_OBJECTS = t-mt.$(OBJEXT)
  t_mt_LDADD = $(LDADD)
  t_mt_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_rand_SOURCES = t-rand.c
-t_rand_OBJECTS = t-rand$U.$(OBJEXT)
+t_rand_OBJECTS = t-rand.$(OBJEXT)
  t_rand_LDADD = $(LDADD)
  t_rand_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_urbui_SOURCES = t-urbui.c
-t_urbui_OBJECTS = t-urbui$U.$(OBJEXT)
+t_urbui_OBJECTS = t-urbui.$(OBJEXT)
  t_urbui_LDADD = $(LDADD)
  t_urbui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_urmui_SOURCES = t-urmui.c
-t_urmui_OBJECTS = t-urmui$U.$(OBJEXT)
+t_urmui_OBJECTS = t-urmui.$(OBJEXT)
  t_urmui_LDADD = $(LDADD)
  t_urmui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
  t_urndmm_SOURCES = t-urndmm.c
-t_urndmm_OBJECTS = t-urndmm$U.$(OBJEXT)
+t_urndmm_OBJECTS = t-urndmm.$(OBJEXT)
  t_urndmm_LDADD = $(LDADD)
  t_urndmm_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
         $(top_builddir)/libgmp.la
@@ -147,6 +163,11 @@ SOURCES = $(libstat_la_SOURCES) findlc.c gen.c $(gen_static_SOURCES) \
  DIST_SOURCES = $(libstat_la_SOURCES) findlc.c gen.c \
         $(gen_static_SOURCES) spect.c stat.c t-iset.c t-lc2exp.c \
         t-mt.c t-rand.c t-urbui.c t-urmui.c t-urndmm.c
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  ETAGS = etags
  CTAGS = ctags
  am__tty_colors = \
@@ -250,8 +271,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -298,7 +319,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -358,7 +378,7 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
  $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
         cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
  $(am__aclocal_m4_deps):
-libstat.la: $(libstat_la_OBJECTS) $(libstat_la_DEPENDENCIES) 
+libstat.la: $(libstat_la_OBJECTS) $(libstat_la_DEPENDENCIES) $(EXTRA_libstat_la_DEPENDENCIES) 
         $(LINK)  $(libstat_la_OBJECTS) $(libstat_la_LIBADD) $(LIBS)
  
  clean-checkPROGRAMS:
@@ -369,40 +389,40 @@ clean-checkPROGRAMS:
         list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
         echo " rm -f" $$list; \
         rm -f $$list
-findlc$(EXEEXT): $(findlc_OBJECTS) $(findlc_DEPENDENCIES) 
+findlc$(EXEEXT): $(findlc_OBJECTS) $(findlc_DEPENDENCIES) $(EXTRA_findlc_DEPENDENCIES) 
         @rm -f findlc$(EXEEXT)
         $(LINK) $(findlc_OBJECTS) $(findlc_LDADD) $(LIBS)
-gen$(EXEEXT): $(gen_OBJECTS) $(gen_DEPENDENCIES) 
+gen$(EXEEXT): $(gen_OBJECTS) $(gen_DEPENDENCIES) $(EXTRA_gen_DEPENDENCIES) 
         @rm -f gen$(EXEEXT)
         $(LINK) $(gen_OBJECTS) $(gen_LDADD) $(LIBS)
-gen.static$(EXEEXT): $(gen_static_OBJECTS) $(gen_static_DEPENDENCIES) 
+gen.static$(EXEEXT): $(gen_static_OBJECTS) $(gen_static_DEPENDENCIES) $(EXTRA_gen_static_DEPENDENCIES) 
         @rm -f gen.static$(EXEEXT)
         $(gen_static_LINK) $(gen_static_OBJECTS) $(gen_static_LDADD) $(LIBS)
-spect$(EXEEXT): $(spect_OBJECTS) $(spect_DEPENDENCIES) 
+spect$(EXEEXT): $(spect_OBJECTS) $(spect_DEPENDENCIES) $(EXTRA_spect_DEPENDENCIES) 
         @rm -f spect$(EXEEXT)
         $(LINK) $(spect_OBJECTS) $(spect_LDADD) $(LIBS)
-stat$(EXEEXT): $(stat_OBJECTS) $(stat_DEPENDENCIES) 
+stat$(EXEEXT): $(stat_OBJECTS) $(stat_DEPENDENCIES) $(EXTRA_stat_DEPENDENCIES) 
         @rm -f stat$(EXEEXT)
         $(LINK) $(stat_OBJECTS) $(stat_LDADD) $(LIBS)
-t-iset$(EXEEXT): $(t_iset_OBJECTS) $(t_iset_DEPENDENCIES) 
+t-iset$(EXEEXT): $(t_iset_OBJECTS) $(t_iset_DEPENDENCIES) $(EXTRA_t_iset_DEPENDENCIES) 
         @rm -f t-iset$(EXEEXT)
         $(LINK) $(t_iset_OBJECTS) $(t_iset_LDADD) $(LIBS)
-t-lc2exp$(EXEEXT): $(t_lc2exp_OBJECTS) $(t_lc2exp_DEPENDENCIES) 
+t-lc2exp$(EXEEXT): $(t_lc2exp_OBJECTS) $(t_lc2exp_DEPENDENCIES) $(EXTRA_t_lc2exp_DEPENDENCIES) 
         @rm -f t-lc2exp$(EXEEXT)
         $(LINK) $(t_lc2exp_OBJECTS) $(t_lc2exp_LDADD) $(LIBS)
-t-mt$(EXEEXT): $(t_mt_OBJECTS) $(t_mt_DEPENDENCIES) 
+t-mt$(EXEEXT): $(t_mt_OBJECTS) $(t_mt_DEPENDENCIES) $(EXTRA_t_mt_DEPENDENCIES) 
         @rm -f t-mt$(EXEEXT)
         $(LINK) $(t_mt_OBJECTS) $(t_mt_LDADD) $(LIBS)
-t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES) 
+t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES) $(EXTRA_t_rand_DEPENDENCIES) 
         @rm -f t-rand$(EXEEXT)
         $(LINK) $(t_rand_OBJECTS) $(t_rand_LDADD) $(LIBS)
-t-urbui$(EXEEXT): $(t_urbui_OBJECTS) $(t_urbui_DEPENDENCIES) 
+t-urbui$(EXEEXT): $(t_urbui_OBJECTS) $(t_urbui_DEPENDENCIES) $(EXTRA_t_urbui_DEPENDENCIES) 
         @rm -f t-urbui$(EXEEXT)
         $(LINK) $(t_urbui_OBJECTS) $(t_urbui_LDADD) $(LIBS)
-t-urmui$(EXEEXT): $(t_urmui_OBJECTS) $(t_urmui_DEPENDENCIES) 
+t-urmui$(EXEEXT): $(t_urmui_OBJECTS) $(t_urmui_DEPENDENCIES) $(EXTRA_t_urmui_DEPENDENCIES) 
         @rm -f t-urmui$(EXEEXT)
         $(LINK) $(t_urmui_OBJECTS) $(t_urmui_LDADD) $(LIBS)
-t-urndmm$(EXEEXT): $(t_urndmm_OBJECTS) $(t_urndmm_DEPENDENCIES) 
+t-urndmm$(EXEEXT): $(t_urndmm_OBJECTS) $(t_urndmm_DEPENDENCIES) $(EXTRA_t_urndmm_DEPENDENCIES) 
         @rm -f t-urndmm$(EXEEXT)
         $(LINK) $(t_urndmm_OBJECTS) $(t_urndmm_LDADD) $(LIBS)
  
@@ -411,11 +431,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -425,39 +440,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-findlc_.c: findlc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/findlc.c; then echo $(srcdir)/findlc.c; else echo findlc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gen_.c: gen.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gen.c; then echo $(srcdir)/gen.c; else echo gen.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-spect_.c: spect.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/spect.c; then echo $(srcdir)/spect.c; else echo spect.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-stat_.c: stat.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/stat.c; then echo $(srcdir)/stat.c; else echo stat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-statlib_.c: statlib.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/statlib.c; then echo $(srcdir)/statlib.c; else echo statlib.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-iset_.c: t-iset.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-iset.c; then echo $(srcdir)/t-iset.c; else echo t-iset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-lc2exp_.c: t-lc2exp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-lc2exp.c; then echo $(srcdir)/t-lc2exp.c; else echo t-lc2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mt_.c: t-mt.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mt.c; then echo $(srcdir)/t-mt.c; else echo t-mt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-rand_.c: t-rand.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-rand.c; then echo $(srcdir)/t-rand.c; else echo t-rand.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-urbui_.c: t-urbui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-urbui.c; then echo $(srcdir)/t-urbui.c; else echo t-urbui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-urmui_.c: t-urmui.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-urmui.c; then echo $(srcdir)/t-urmui.c; else echo t-urmui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-urndmm_.c: t-urndmm.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-urndmm.c; then echo $(srcdir)/t-urndmm.c; else echo t-urndmm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-zdiv_round_.c: zdiv_round.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/zdiv_round.c; then echo $(srcdir)/zdiv_round.c; else echo zdiv_round.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-findlc_.$(OBJEXT) findlc_.lo gen_.$(OBJEXT) gen_.lo spect_.$(OBJEXT) \
-spect_.lo stat_.$(OBJEXT) stat_.lo statlib_.$(OBJEXT) statlib_.lo \
-t-iset_.$(OBJEXT) t-iset_.lo t-lc2exp_.$(OBJEXT) t-lc2exp_.lo \
-t-mt_.$(OBJEXT) t-mt_.lo t-rand_.$(OBJEXT) t-rand_.lo \
-t-urbui_.$(OBJEXT) t-urbui_.lo t-urmui_.$(OBJEXT) t-urmui_.lo \
-t-urndmm_.$(OBJEXT) t-urndmm_.lo zdiv_round_.$(OBJEXT) zdiv_round_.lo \
-: $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -598,14 +580,15 @@ check-TESTS: $(TESTS)
           fi; \
           dashes=`echo "$$dashes" | sed s/./=/g`; \
           if test "$$failed" -eq 0; then \
-           echo "$$grn$$dashes"; \
+           col="$$grn"; \
           else \
-           echo "$$red$$dashes"; \
+           col="$$red"; \
           fi; \
-         echo "$$banner"; \
-         test -z "$$skipped" || echo "$$skipped"; \
-         test -z "$$report" || echo "$$report"; \
-         echo "$$dashes$$std"; \
+         echo "$${col}$$dashes$${std}"; \
+         echo "$${col}$$banner$${std}"; \
+         test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+         test -z "$$report" || echo "$${col}$$report$${std}"; \
+         echo "$${col}$$dashes$${std}"; \
           test "$$failed" -eq 0; \
         else :; fi
  
@@ -655,10 +638,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -727,7 +715,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -740,7 +728,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
         clean-checkPROGRAMS clean-generic clean-libtool ctags \
@@ -753,8 +741,8 @@ uninstall-am:
         install-ps install-ps-am install-strip installcheck \
         installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  allprogs: $(EXTRA_PROGRAMS)
diff --git a/tests/rand/findlc.c b/tests/rand/findlc.c

index c0279707c41e9aaa678dbef2e5c65f5dfbd58033..89a452c5aca05e038e854f11932887ca19090501 100644 (file)
--- a/tests/rand/findlc.c
+++ b/tests/rand/findlc.c
@@ -1,20 +1,20 @@
  /*
  Copyright 2000 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/rand/gen.c b/tests/rand/gen.c

index 511a3e2845f6ed03d3590bd06acf4a5ba7c4966b..ea6d0e75104162e81495eccdd2e381d170cae6f8 100644 (file)
--- a/tests/rand/gen.c
+++ b/tests/rand/gen.c
@@ -2,20 +2,20 @@
  
  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  /* Examples:
  
diff --git a/tests/rand/gmpstat.h b/tests/rand/gmpstat.h

index 1e4d6afe7d6ffa7bd284af6fc49b2604a6cdc0b9..4b9af70be8083b22fcad86316d9029f03f598ce1 100644 (file)
--- a/tests/rand/gmpstat.h
+++ b/tests/rand/gmpstat.h
@@ -3,20 +3,20 @@
  /*
  Copyright 1999 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  /* This file requires the following header files: gmp.h */
  
diff --git a/tests/rand/spect.c b/tests/rand/spect.c

index f6f872b09969030a5e52f53ecb18b7cf61319f10..545374daebe85afbbaaf78c78f879b56a3b31f01 100644 (file)
--- a/tests/rand/spect.c
+++ b/tests/rand/spect.c
@@ -3,20 +3,20 @@
  /*
  Copyright 1999 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  /* T is upper dimension.  Z_A is the LC multiplier, which is
     relatively prime to Z_M, the LC modulus.  The result is put in
diff --git a/tests/rand/stat.c b/tests/rand/stat.c

index e6917d93540804a8980fcdbaee973be912ee0d43..a68486ac7d55ef50dfc27493134d3f9d176d16da 100644 (file)
--- a/tests/rand/stat.c
+++ b/tests/rand/stat.c
@@ -3,20 +3,20 @@
  /*
  Copyright 1999, 2000 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  /* Examples:
  
diff --git a/tests/rand/statlib.c b/tests/rand/statlib.c

index 0bf22e39b33ea58df53830e47c37f0b5deaf766c..a5f1b3d206e8a67ce73d686b7f4060cf791962ec 100644 (file)
--- a/tests/rand/statlib.c
+++ b/tests/rand/statlib.c
@@ -4,20 +4,20 @@
  /*
  Copyright 1999, 2000 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  /* The theories for these functions are taken from D. Knuth's "The Art
  of Computer Programming: Volume 2, Seminumerical Algorithms", Third
diff --git a/tests/rand/t-iset.c b/tests/rand/t-iset.c

index b1e51e4e1965645a394c831646271e9a4dd47eef..f84c3a63c46d44cb7773e31e8b78f08a4424a33d 100644 (file)
--- a/tests/rand/t-iset.c
+++ b/tests/rand/t-iset.c
@@ -2,20 +2,20 @@
  
  Copyright 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/rand/t-lc2exp.c b/tests/rand/t-lc2exp.c

index ce7d4c49499fc7382dab55bf825b2ac9ffad1553..b2e3e13e80f19d9511d4a89b29cc387ef887b4ec 100644 (file)
--- a/tests/rand/t-lc2exp.c
+++ b/tests/rand/t-lc2exp.c
@@ -2,20 +2,20 @@
  
  Copyright 2002, 2011 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/rand/t-mt.c b/tests/rand/t-mt.c

index 71f1e11bb2dc0040194c382b7634c2966e8fb0e0..7bb0d4f69a01d653dc282b74ebfd16e42b047fc1 100644 (file)
--- a/tests/rand/t-mt.c
+++ b/tests/rand/t-mt.c
@@ -2,20 +2,20 @@
  
  Copyright 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include "gmp.h"
diff --git a/tests/rand/t-rand.c b/tests/rand/t-rand.c

index d3e3c252fe0d9150a9069fee1cd5bb7fbc998ba3..1eb33c1efbe6c983981e080dd49d34e1e47e5ab1 100644 (file)
--- a/tests/rand/t-rand.c
+++ b/tests/rand/t-rand.c
@@ -3,20 +3,20 @@
  /*
  Copyright 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdlib.h>
  #include <stdio.h>
@@ -29,57 +29,57 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  /* These were generated by this very program.  Do not edit!  */
  /* Integers.  */
-char *z1[ENTS] = {"0", "1", "1", "1", "1", "0", "1", "1", "1", "1"};
-char *z2[ENTS] = {"0", "3", "1", "3", "3", "0", "3", "3", "3", "1"};
-char *z3[ENTS] = {"4", "3", "1", "7", "3", "0", "3", "3", "3", "1"};
-char *z4[ENTS] = {"c", "3", "1", "f", "b", "8", "3", "3", "3", "1"};
-char *z5[ENTS] = {"1c", "13", "11", "1f", "b", "18", "3", "13", "3", "1"};
+const char *z1[ENTS] = {"0", "1", "1", "1", "1", "0", "1", "1", "1", "1"};
+const char *z2[ENTS] = {"0", "3", "1", "3", "3", "0", "3", "3", "3", "1"};
+const char *z3[ENTS] = {"4", "3", "1", "7", "3", "0", "3", "3", "3", "1"};
+const char *z4[ENTS] = {"c", "3", "1", "f", "b", "8", "3", "3", "3", "1"};
+const char *z5[ENTS] = {"1c", "13", "11", "1f", "b", "18", "3", "13", "3", "1"};
  
-char *z10[ENTS] = {"29c", "213", "f1", "17f", "12b", "178", "383", "d3", "3a3", "281"};
+const char *z10[ENTS] = {"29c", "213", "f1", "17f", "12b", "178", "383", "d3", "3a3", "281"};
  
-char *z15[ENTS] = {"29c", "1a13", "74f1", "257f", "592b", "4978", "4783", "7cd3", "5ba3", "4681"};
-char *z16[ENTS] = {"29c", "9a13", "74f1", "a57f", "d92b", "4978", "c783", "fcd3", "5ba3", "c681"};
-char *z17[ENTS] = {"51e", "f17a", "54ff", "1a335", "cf65", "5d6f", "583f", "618f", "1bc6", "98ff"};
+const char *z15[ENTS] = {"29c", "1a13", "74f1", "257f", "592b", "4978", "4783", "7cd3", "5ba3", "4681"};
+const char *z16[ENTS] = {"29c", "9a13", "74f1", "a57f", "d92b", "4978", "c783", "fcd3", "5ba3", "c681"};
+const char *z17[ENTS] = {"51e", "f17a", "54ff", "1a335", "cf65", "5d6f", "583f", "618f", "1bc6", "98ff"};
  
-char *z31[ENTS] = {"3aecd515", "13ae8ec6", "518c8090", "81ca077", "70b7134", "7ee78d71", "323a7636", "2122cb1a", "19811941", "41fd605"};
-char *z32[ENTS] = {"baecd515", "13ae8ec6", "518c8090", "881ca077", "870b7134", "7ee78d71", "323a7636", "a122cb1a", "99811941", "841fd605"};
-char *z33[ENTS] = {"1faf4cca", "15d6ef83b", "9095fe72", "1b6a3dff6", "b17cbddd", "16e5209d4", "6f65b12c", "493bbbc6", "abf2a5d5", "6d491a3c"};
+const char *z31[ENTS] = {"3aecd515", "13ae8ec6", "518c8090", "81ca077", "70b7134", "7ee78d71", "323a7636", "2122cb1a", "19811941", "41fd605"};
+const char *z32[ENTS] = {"baecd515", "13ae8ec6", "518c8090", "881ca077", "870b7134", "7ee78d71", "323a7636", "a122cb1a", "99811941", "841fd605"};
+const char *z33[ENTS] = {"1faf4cca", "15d6ef83b", "9095fe72", "1b6a3dff6", "b17cbddd", "16e5209d4", "6f65b12c", "493bbbc6", "abf2a5d5", "6d491a3c"};
  
-char *z63[ENTS] = {"48a74f367fa7b5c8", "3ba9e9dc1b263076", "1e0ac84e7678e0fb", "11416581728b3e35", "36ab610523f0f1f7", "3e540e8e95c0eb4b", "439ae16057dbc9d3", "734fb260db243950", "7d3a317effc289bf", "1d80301fb3d1a0d1"};
-char *z64[ENTS] = {"48a74f367fa7b5c8", "bba9e9dc1b263076", "9e0ac84e7678e0fb", "11416581728b3e35", "b6ab610523f0f1f7", "be540e8e95c0eb4b", "439ae16057dbc9d3", "f34fb260db243950", "fd3a317effc289bf", "1d80301fb3d1a0d1"};
-char *z65[ENTS] = {"1ff77710d846d49f0", "1b1411701d709ee10", "31ffa81a208b6af4", "446638d431d3c681", "df5c569d5baa8b55", "197d99ea9bf28e5a0", "191ade09edd94cfae", "194acefa6dde5e18d", "1afc1167c56272d92", "d092994da72f206f"};
+const char *z63[ENTS] = {"48a74f367fa7b5c8", "3ba9e9dc1b263076", "1e0ac84e7678e0fb", "11416581728b3e35", "36ab610523f0f1f7", "3e540e8e95c0eb4b", "439ae16057dbc9d3", "734fb260db243950", "7d3a317effc289bf", "1d80301fb3d1a0d1"};
+const char *z64[ENTS] = {"48a74f367fa7b5c8", "bba9e9dc1b263076", "9e0ac84e7678e0fb", "11416581728b3e35", "b6ab610523f0f1f7", "be540e8e95c0eb4b", "439ae16057dbc9d3", "f34fb260db243950", "fd3a317effc289bf", "1d80301fb3d1a0d1"};
+const char *z65[ENTS] = {"1ff77710d846d49f0", "1b1411701d709ee10", "31ffa81a208b6af4", "446638d431d3c681", "df5c569d5baa8b55", "197d99ea9bf28e5a0", "191ade09edd94cfae", "194acefa6dde5e18d", "1afc1167c56272d92", "d092994da72f206f"};
  
-char *z127[ENTS] = {"2f66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "2ab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "77848bb991fd0be331adcf1457fbc672"};
-char *z128[ENTS] = {"af66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "8b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "aab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "f7848bb991fd0be331adcf1457fbc672"};
+const char *z127[ENTS] = {"2f66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "2ab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "77848bb991fd0be331adcf1457fbc672"};
+const char *z128[ENTS] = {"af66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "8b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "aab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "f7848bb991fd0be331adcf1457fbc672"};
  
  /* Floats.  */
-char *f1[ENTS] = {"0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0", "0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0"};
-char *f2[ENTS] = {"0.@0", "0.c@0", "0.4@0", "0.c@0", "0.c@0", "0.@0", "0.c@0", "0.c@0", "0.c@0", "0.4@0"};
-char *f3[ENTS] = {"0.8@0", "0.6@0", "0.2@0", "0.e@0", "0.6@0", "0.@0", "0.6@0", "0.6@0", "0.6@0", "0.2@0"};
-char *f4[ENTS] = {"0.c@0", "0.3@0", "0.1@0", "0.f@0", "0.b@0", "0.8@0", "0.3@0", "0.3@0", "0.3@0", "0.1@0"};
-char *f5[ENTS] = {"0.e@0", "0.98@0", "0.88@0", "0.f8@0", "0.58@0", "0.c@0", "0.18@0", "0.98@0", "0.18@0", "0.8@-1"};
+const char *f1[ENTS] = {"0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0", "0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0"};
+const char *f2[ENTS] = {"0.@0", "0.c@0", "0.4@0", "0.c@0", "0.c@0", "0.@0", "0.c@0", "0.c@0", "0.c@0", "0.4@0"};
+const char *f3[ENTS] = {"0.8@0", "0.6@0", "0.2@0", "0.e@0", "0.6@0", "0.@0", "0.6@0", "0.6@0", "0.6@0", "0.2@0"};
+const char *f4[ENTS] = {"0.c@0", "0.3@0", "0.1@0", "0.f@0", "0.b@0", "0.8@0", "0.3@0", "0.3@0", "0.3@0", "0.1@0"};
+const char *f5[ENTS] = {"0.e@0", "0.98@0", "0.88@0", "0.f8@0", "0.58@0", "0.c@0", "0.18@0", "0.98@0", "0.18@0", "0.8@-1"};
  
-char *f10[ENTS] = {"0.a7@0", "0.84c@0", "0.3c4@0", "0.5fc@0", "0.4ac@0", "0.5e@0", "0.e0c@0", "0.34c@0", "0.e8c@0", "0.a04@0"};
+const char *f10[ENTS] = {"0.a7@0", "0.84c@0", "0.3c4@0", "0.5fc@0", "0.4ac@0", "0.5e@0", "0.e0c@0", "0.34c@0", "0.e8c@0", "0.a04@0"};
  
-char *f15[ENTS] = {"0.538@-1", "0.3426@0", "0.e9e2@0", "0.4afe@0", "0.b256@0", "0.92f@0", "0.8f06@0", "0.f9a6@0", "0.b746@0", "0.8d02@0"};
-char *f16[ENTS] = {"0.29c@-1", "0.9a13@0", "0.74f1@0", "0.a57f@0", "0.d92b@0", "0.4978@0", "0.c783@0", "0.fcd3@0", "0.5ba3@0", "0.c681@0"};
-char *f17[ENTS] = {"0.28f@-1", "0.78bd@0", "0.2a7f8@0", "0.d19a8@0", "0.67b28@0", "0.2eb78@0", "0.2c1f8@0", "0.30c78@0", "0.de3@-1", "0.4c7f8@0"};
+const char *f15[ENTS] = {"0.538@-1", "0.3426@0", "0.e9e2@0", "0.4afe@0", "0.b256@0", "0.92f@0", "0.8f06@0", "0.f9a6@0", "0.b746@0", "0.8d02@0"};
+const char *f16[ENTS] = {"0.29c@-1", "0.9a13@0", "0.74f1@0", "0.a57f@0", "0.d92b@0", "0.4978@0", "0.c783@0", "0.fcd3@0", "0.5ba3@0", "0.c681@0"};
+const char *f17[ENTS] = {"0.28f@-1", "0.78bd@0", "0.2a7f8@0", "0.d19a8@0", "0.67b28@0", "0.2eb78@0", "0.2c1f8@0", "0.30c78@0", "0.de3@-1", "0.4c7f8@0"};
  
-char *f31[ENTS] = {"0.75d9aa2a@0", "0.275d1d8c@0", "0.a319012@0", "0.103940ee@0", "0.e16e268@-1", "0.fdcf1ae2@0", "0.6474ec6c@0", "0.42459634@0", "0.33023282@0", "0.83fac0a@-1"};
-char *f32[ENTS] = {"0.baecd515@0", "0.13ae8ec6@0", "0.518c809@0", "0.881ca077@0", "0.870b7134@0", "0.7ee78d71@0", "0.323a7636@0", "0.a122cb1a@0", "0.99811941@0", "0.841fd605@0"};
-char *f33[ENTS] = {"0.fd7a665@-1", "0.aeb77c1d8@0", "0.484aff39@0", "0.db51effb@0", "0.58be5eee8@0", "0.b72904ea@0", "0.37b2d896@0", "0.249ddde3@0", "0.55f952ea8@0", "0.36a48d1e@0"};
+const char *f31[ENTS] = {"0.75d9aa2a@0", "0.275d1d8c@0", "0.a319012@0", "0.103940ee@0", "0.e16e268@-1", "0.fdcf1ae2@0", "0.6474ec6c@0", "0.42459634@0", "0.33023282@0", "0.83fac0a@-1"};
+const char *f32[ENTS] = {"0.baecd515@0", "0.13ae8ec6@0", "0.518c809@0", "0.881ca077@0", "0.870b7134@0", "0.7ee78d71@0", "0.323a7636@0", "0.a122cb1a@0", "0.99811941@0", "0.841fd605@0"};
+const char *f33[ENTS] = {"0.fd7a665@-1", "0.aeb77c1d8@0", "0.484aff39@0", "0.db51effb@0", "0.58be5eee8@0", "0.b72904ea@0", "0.37b2d896@0", "0.249ddde3@0", "0.55f952ea8@0", "0.36a48d1e@0"};
  
-char *f63[ENTS] = {"0.914e9e6cff4f6b9@0", "0.7753d3b8364c60ec@0", "0.3c15909cecf1c1f6@0", "0.2282cb02e5167c6a@0", "0.6d56c20a47e1e3ee@0", "0.7ca81d1d2b81d696@0", "0.8735c2c0afb793a6@0", "0.e69f64c1b64872a@0", "0.fa7462fdff85137e@0", "0.3b00603f67a341a2@0"};
-char *f64[ENTS] = {"0.48a74f367fa7b5c8@0", "0.bba9e9dc1b263076@0", "0.9e0ac84e7678e0fb@0", "0.11416581728b3e35@0", "0.b6ab610523f0f1f7@0", "0.be540e8e95c0eb4b@0", "0.439ae16057dbc9d3@0", "0.f34fb260db24395@0", "0.fd3a317effc289bf@0", "0.1d80301fb3d1a0d1@0"};
-char *f65[ENTS] = {"0.ffbbb886c236a4f8@0", "0.d8a08b80eb84f708@0", "0.18ffd40d1045b57a@0", "0.22331c6a18e9e3408@0", "0.6fae2b4eadd545aa8@0", "0.cbeccf54df9472d@0", "0.c8d6f04f6eca67d7@0", "0.ca5677d36ef2f0c68@0", "0.d7e08b3e2b1396c9@0", "0.68494ca6d39790378@0"};
+const char *f63[ENTS] = {"0.914e9e6cff4f6b9@0", "0.7753d3b8364c60ec@0", "0.3c15909cecf1c1f6@0", "0.2282cb02e5167c6a@0", "0.6d56c20a47e1e3ee@0", "0.7ca81d1d2b81d696@0", "0.8735c2c0afb793a6@0", "0.e69f64c1b64872a@0", "0.fa7462fdff85137e@0", "0.3b00603f67a341a2@0"};
+const char *f64[ENTS] = {"0.48a74f367fa7b5c8@0", "0.bba9e9dc1b263076@0", "0.9e0ac84e7678e0fb@0", "0.11416581728b3e35@0", "0.b6ab610523f0f1f7@0", "0.be540e8e95c0eb4b@0", "0.439ae16057dbc9d3@0", "0.f34fb260db24395@0", "0.fd3a317effc289bf@0", "0.1d80301fb3d1a0d1@0"};
+const char *f65[ENTS] = {"0.ffbbb886c236a4f8@0", "0.d8a08b80eb84f708@0", "0.18ffd40d1045b57a@0", "0.22331c6a18e9e3408@0", "0.6fae2b4eadd545aa8@0", "0.cbeccf54df9472d@0", "0.c8d6f04f6eca67d7@0", "0.ca5677d36ef2f0c68@0", "0.d7e08b3e2b1396c9@0", "0.68494ca6d39790378@0"};
  
-char *f127[ENTS] = {"0.5ecd7526555eb140e3fb1e0e85533418@0", "0.e79f478cc99382ea6a0f94c1d8d7084a@0", "0.a7d40e994263bd8259ad171551c404f@0", "0.79eb5918686a65f14a7981d68eb03ee6@0", "0.a1823ab0d3c411543735462f7185a152@0", "0.16462c791250ec8e563de32c85d59c12@0", "0.913e9807a83f0ea1391ad9219cce9f2a@0", "0.5570e9192d54cec5d4326568993ae2c8@0", "0.13196ab23f80b5a635f7783acf721dba@0", "0.ef09177323fa17c6635b9e28aff78ce4@0"};
-char *f128[ENTS] = {"0.af66ba932aaf58a071fd8f0742a99a0c@0", "0.73cfa3c664c9c1753507ca60ec6b8425@0", "0.53ea074ca131dec12cd68b8aa8e20278@0", "0.3cf5ac8c343532f8a53cc0eb47581f73@0", "0.50c11d5869e208aa1b9aa317b8c2d0a9@0", "0.8b23163c892876472b1ef19642eace09@0", "0.489f4c03d41f87509c8d6c90ce674f95@0", "0.aab8748c96aa6762ea1932b44c9d7164@0", "0.98cb5591fc05ad31afbbc1d67b90edd@-1", "0.f7848bb991fd0be331adcf1457fbc672@0"};
+const char *f127[ENTS] = {"0.5ecd7526555eb140e3fb1e0e85533418@0", "0.e79f478cc99382ea6a0f94c1d8d7084a@0", "0.a7d40e994263bd8259ad171551c404f@0", "0.79eb5918686a65f14a7981d68eb03ee6@0", "0.a1823ab0d3c411543735462f7185a152@0", "0.16462c791250ec8e563de32c85d59c12@0", "0.913e9807a83f0ea1391ad9219cce9f2a@0", "0.5570e9192d54cec5d4326568993ae2c8@0", "0.13196ab23f80b5a635f7783acf721dba@0", "0.ef09177323fa17c6635b9e28aff78ce4@0"};
+const char *f128[ENTS] = {"0.af66ba932aaf58a071fd8f0742a99a0c@0", "0.73cfa3c664c9c1753507ca60ec6b8425@0", "0.53ea074ca131dec12cd68b8aa8e20278@0", "0.3cf5ac8c343532f8a53cc0eb47581f73@0", "0.50c11d5869e208aa1b9aa317b8c2d0a9@0", "0.8b23163c892876472b1ef19642eace09@0", "0.489f4c03d41f87509c8d6c90ce674f95@0", "0.aab8748c96aa6762ea1932b44c9d7164@0", "0.98cb5591fc05ad31afbbc1d67b90edd@-1", "0.f7848bb991fd0be331adcf1457fbc672@0"};
  
  
  struct rt
  {
-  char **s;
+  const char **s;
    int nbits;
  };
  
@@ -129,13 +129,7 @@ static struct rt farr[] =
  
  
  int
-#if __STDC__
  main (int argc, char *argv[])
-#else
-main (argc, argv)
-     int argc;
-     char *argv[];
-#endif
  {
    static char usage[] = "\
  usage: t-rand [function nbits]\n\
diff --git a/tests/rand/t-urbui.c b/tests/rand/t-urbui.c

index 0251f4c3356e43bcf213d6e3ea932a3390dd3dc2..23ef0a01dd06c682dbe90c010a9ee27e2f445ae6 100644 (file)
--- a/tests/rand/t-urbui.c
+++ b/tests/rand/t-urbui.c
@@ -2,20 +2,20 @@
  
  Copyright 2003, 2005 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/rand/t-urmui.c b/tests/rand/t-urmui.c

index 798286bc8e465d1d6ff44fdc04fcdea9529462fa..c32688225c6f103e60e2b3cc2f4eec39efa207a1 100644 (file)
--- a/tests/rand/t-urmui.c
+++ b/tests/rand/t-urmui.c
@@ -2,20 +2,20 @@
  
  Copyright 2003, 2005 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/rand/t-urndmm.c b/tests/rand/t-urndmm.c

index 11eeef261efe4b9bf380b1a3dbccfe6e1d29ee2f..99eba0d40e7fc50229bdf3a4656a9b8fa30e546e 100644 (file)
--- a/tests/rand/t-urndmm.c
+++ b/tests/rand/t-urndmm.c
@@ -2,20 +2,20 @@
  
  Copyright 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include "gmp.h"
diff --git a/tests/rand/zdiv_round.c b/tests/rand/zdiv_round.c

index 362d19c11fa492051c52cbbc0b0c282f03dbe829..7a200515f9186d7a735efa685a08248da7983fab 100644 (file)
--- a/tests/rand/zdiv_round.c
+++ b/tests/rand/zdiv_round.c
@@ -3,20 +3,20 @@
  /*
  Copyright 1999 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "gmp.h"
  
diff --git a/tests/refmpf.c b/tests/refmpf.c

index 3dbbd8a6a6d0a739bcf3ef47d943cf010e4021c0..a48f30d6d40c809e3b4bc0de689b7f9e9219bbe3 100644 (file)
--- a/tests/refmpf.c
+++ b/tests/refmpf.c
@@ -2,20 +2,20 @@
  
  Copyright 1996, 2001, 2004, 2005 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/refmpn.c b/tests/refmpn.c

index 2f245e509607d2d86554e12f17601568f16b84e8..9106a81a9d457e70e89e4e89d1b920bf11344a62 100644 (file)
--- a/tests/refmpn.c
+++ b/tests/refmpn.c
@@ -2,22 +2,22 @@
     of the normal gmp code.  Speed isn't a consideration.
  
  Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009, 2012 Free Software Foundation, Inc.
+2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* Most routines have assertions representing what the mpn routines are
@@ -46,8 +46,8 @@ int
  byte_overlap_p (const void *v_xp, mp_size_t xsize,
                 const void *v_yp, mp_size_t ysize)
  {
-  const char *xp = v_xp;
-  const char *yp = v_yp;
+  const char *xp = (const char *) v_xp;
+  const char *yp = (const char *) v_yp;
  
    ASSERT (xsize >= 0);
    ASSERT (ysize >= 0);
@@ -533,7 +533,7 @@ sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
  
  #define AORS_1(operation)                               \
    {                                                     \
-    mp_limb_t  i;                                       \
+    mp_size_t  i;                                       \
                                                         \
      ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
      ASSERT (size >= 1);                                 \
@@ -596,6 +596,201 @@ refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
    return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
  }
  
+mp_limb_t
+refmpn_addcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
+{
+  if (cnd != 0)
+    return refmpn_add_n (rp, s1p, s2p, size);
+  else
+    {
+      refmpn_copyi (rp, s1p, size);
+      return 0;
+    }
+}
+mp_limb_t
+refmpn_subcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
+{
+  if (cnd != 0)
+    return refmpn_sub_n (rp, s1p, s2p, size);
+  else
+    {
+      refmpn_copyi (rp, s1p, size);
+      return 0;
+    }
+}
+
+
+#define AORS_ERR1_N(operation)                                         \
+  {                                                                     \
+    mp_size_t  i;                                                       \
+    mp_limb_t carry2;                                                  \
+                                                                       \
+    ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));                        \
+    ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));                        \
+    ASSERT (! refmpn_overlap_p (rp, size, yp, size));                  \
+    ASSERT (! refmpn_overlap_p (ep, 2, s1p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 2, s2p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 2, yp, size));                     \
+    ASSERT (! refmpn_overlap_p (ep, 2, rp, size));                     \
+                                                                       \
+    ASSERT (carry == 0 || carry == 1);                                 \
+    ASSERT (size >= 1);                                                        \
+    ASSERT_MPN (s1p, size);                                            \
+    ASSERT_MPN (s2p, size);                                            \
+    ASSERT_MPN (yp, size);                                             \
+                                                                       \
+    ep[0] = ep[1] = CNST_LIMB(0);                                      \
+                                                                       \
+    for (i = 0; i < size; i++)                                          \
+      {                                                                        \
+       carry = operation (&rp[i], s1p[i], s2p[i], carry);              \
+       if (carry == 1)                                                 \
+         {                                                             \
+           carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]);   \
+           carry2 = ref_addc_limb (&ep[1], ep[1], carry2);             \
+           ASSERT (carry2 == 0);                                       \
+         }                                                             \
+      }                                                                        \
+    return carry;                                                       \
+  }
+
+mp_limb_t
+refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+                  mp_ptr ep, mp_srcptr yp,
+                  mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR1_N (adc);
+}
+mp_limb_t
+refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+                  mp_ptr ep, mp_srcptr yp,
+                  mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR1_N (sbb);
+}
+
+
+#define AORS_ERR2_N(operation)                                         \
+  {                                                                     \
+    mp_size_t  i;                                                       \
+    mp_limb_t carry2;                                                  \
+                                                                       \
+    ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));                        \
+    ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));                        \
+    ASSERT (! refmpn_overlap_p (rp, size, y1p, size));                 \
+    ASSERT (! refmpn_overlap_p (rp, size, y2p, size));                 \
+    ASSERT (! refmpn_overlap_p (ep, 4, s1p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 4, s2p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 4, y1p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 4, y2p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 4, rp, size));                     \
+                                                                       \
+    ASSERT (carry == 0 || carry == 1);                                 \
+    ASSERT (size >= 1);                                                        \
+    ASSERT_MPN (s1p, size);                                            \
+    ASSERT_MPN (s2p, size);                                            \
+    ASSERT_MPN (y1p, size);                                            \
+    ASSERT_MPN (y2p, size);                                            \
+                                                                       \
+    ep[0] = ep[1] = CNST_LIMB(0);                                      \
+    ep[2] = ep[3] = CNST_LIMB(0);                                      \
+                                                                       \
+    for (i = 0; i < size; i++)                                          \
+      {                                                                        \
+       carry = operation (&rp[i], s1p[i], s2p[i], carry);              \
+       if (carry == 1)                                                 \
+         {                                                             \
+           carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);  \
+           carry2 = ref_addc_limb (&ep[1], ep[1], carry2);             \
+           ASSERT (carry2 == 0);                                       \
+           carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);  \
+           carry2 = ref_addc_limb (&ep[3], ep[3], carry2);             \
+           ASSERT (carry2 == 0);                                       \
+         }                                                             \
+      }                                                                        \
+    return carry;                                                       \
+  }
+
+mp_limb_t
+refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+                  mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
+                  mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR2_N (adc);
+}
+mp_limb_t
+refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+                  mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
+                  mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR2_N (sbb);
+}
+
+
+#define AORS_ERR3_N(operation)                                         \
+  {                                                                     \
+    mp_size_t  i;                                                       \
+    mp_limb_t carry2;                                                  \
+                                                                       \
+    ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));                        \
+    ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));                        \
+    ASSERT (! refmpn_overlap_p (rp, size, y1p, size));                 \
+    ASSERT (! refmpn_overlap_p (rp, size, y2p, size));                 \
+    ASSERT (! refmpn_overlap_p (rp, size, y3p, size));                 \
+    ASSERT (! refmpn_overlap_p (ep, 6, s1p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 6, s2p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 6, y1p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 6, y2p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 6, y3p, size));                    \
+    ASSERT (! refmpn_overlap_p (ep, 6, rp, size));                     \
+                                                                       \
+    ASSERT (carry == 0 || carry == 1);                                 \
+    ASSERT (size >= 1);                                                        \
+    ASSERT_MPN (s1p, size);                                            \
+    ASSERT_MPN (s2p, size);                                            \
+    ASSERT_MPN (y1p, size);                                            \
+    ASSERT_MPN (y2p, size);                                            \
+    ASSERT_MPN (y3p, size);                                            \
+                                                                       \
+    ep[0] = ep[1] = CNST_LIMB(0);                                      \
+    ep[2] = ep[3] = CNST_LIMB(0);                                      \
+    ep[4] = ep[5] = CNST_LIMB(0);                                      \
+                                                                       \
+    for (i = 0; i < size; i++)                                          \
+      {                                                                        \
+       carry = operation (&rp[i], s1p[i], s2p[i], carry);              \
+       if (carry == 1)                                                 \
+         {                                                             \
+           carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);  \
+           carry2 = ref_addc_limb (&ep[1], ep[1], carry2);             \
+           ASSERT (carry2 == 0);                                       \
+           carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);  \
+           carry2 = ref_addc_limb (&ep[3], ep[3], carry2);             \
+           ASSERT (carry2 == 0);                                       \
+           carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]);  \
+           carry2 = ref_addc_limb (&ep[5], ep[5], carry2);             \
+           ASSERT (carry2 == 0);                                       \
+         }                                                             \
+      }                                                                        \
+    return carry;                                                       \
+  }
+
+mp_limb_t
+refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+                  mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
+                  mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR3_N (adc);
+}
+mp_limb_t
+refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+                  mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
+                  mp_size_t size, mp_limb_t carry)
+{
+  AORS_ERR3_N (sbb);
+}
+
+
  mp_limb_t
  refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
                  mp_size_t n, unsigned int s)
@@ -625,6 +820,58 @@ refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
  {
    return refmpn_addlsh_n (rp, up, vp, n, 2);
  }
+mp_limb_t
+refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+  return refmpn_addlsh_n (rp, rp, vp, n, s);
+}
+mp_limb_t
+refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, rp, vp, n, 1);
+}
+mp_limb_t
+refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, rp, vp, n, 2);
+}
+mp_limb_t
+refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+  return refmpn_addlsh_n (rp, vp, rp, n, s);
+}
+mp_limb_t
+refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, vp, rp, n, 1);
+}
+mp_limb_t
+refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_addlsh_n (rp, vp, rp, n, 2);
+}
+mp_limb_t
+refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_size_t n, unsigned int s, mp_limb_t carry)
+{
+  mp_limb_t cy;
+
+  ASSERT (carry >= 0 && carry <= (CNST_LIMB(1) << s));
+
+  cy = refmpn_addlsh_n (rp, up, vp, n, s);
+  cy += refmpn_add_1 (rp, rp, n, carry);
+  return cy;
+}
+mp_limb_t
+refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+  return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
+}
+mp_limb_t
+refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+  return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
+}
  
  mp_limb_t
  refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
@@ -650,6 +897,63 @@ refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
  {
    return refmpn_sublsh_n (rp, up, vp, n, 1);
  }
+mp_limb_t
+refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, up, vp, n, 2);
+}
+mp_limb_t
+refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+  return refmpn_sublsh_n (rp, rp, vp, n, s);
+}
+mp_limb_t
+refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, rp, vp, n, 1);
+}
+mp_limb_t
+refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, rp, vp, n, 2);
+}
+mp_limb_t
+refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+  return refmpn_sublsh_n (rp, vp, rp, n, s);
+}
+mp_limb_t
+refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, vp, rp, n, 1);
+}
+mp_limb_t
+refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+  return refmpn_sublsh_n (rp, vp, rp, n, 2);
+}
+mp_limb_t
+refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_size_t n, unsigned int s, mp_limb_t carry)
+{
+  mp_limb_t cy;
+
+  ASSERT (carry >= 0 && carry <= (CNST_LIMB(1) << s));
+
+  cy = refmpn_sublsh_n (rp, up, vp, n, s);
+  cy += refmpn_sub_1 (rp, rp, n, carry);
+  return cy;
+}
+mp_limb_t
+refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+  return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
+}
+mp_limb_t
+refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+  return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
+}
  
  mp_limb_signed_t
  refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
@@ -680,6 +984,31 @@ refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
  {
    return refmpn_rsblsh_n (rp, up, vp, n, 2);
  }
+mp_limb_signed_t
+refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+                mp_size_t n, unsigned int s, mp_limb_signed_t carry)
+{
+  mp_limb_signed_t cy;
+
+  ASSERT (carry == -1 || (carry >> s) == 0);
+
+  cy = refmpn_rsblsh_n (rp, up, vp, n, s);
+  if (carry > 0)
+    cy += refmpn_add_1 (rp, rp, n, carry);
+  else
+    cy -= refmpn_sub_1 (rp, rp, n, -carry);
+  return cy;
+}
+mp_limb_signed_t
+refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
+{
+  return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
+}
+mp_limb_signed_t
+refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
+{
+  return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
+}
  
  mp_limb_t
  refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
@@ -868,6 +1197,16 @@ refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  {
    return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
  }
+mp_limb_t
+refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
+}
+mp_limb_t
+refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+  return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
+}
  
  #define AORSMUL_1C(operation_n)                                 \
    {                                                             \
@@ -1459,8 +1798,63 @@ refmpn_mul_basecase (mp_ptr prodp,
      prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
  }
  
+
+/* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
+void
+refmpn_mulmid_basecase (mp_ptr rp,
+                       mp_srcptr up, mp_size_t un,
+                       mp_srcptr vp, mp_size_t vn)
+{
+  mp_limb_t cy;
+  mp_size_t i;
+
+  ASSERT (un >= vn);
+  ASSERT (vn >= 1);
+  ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
+  ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
+  ASSERT_MPN (up, un);
+  ASSERT_MPN (vp, vn);
+
+  rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
+  rp[un - vn + 2] = CNST_LIMB (0);
+  for (i = 1; i < vn; i++)
+    {
+      cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
+      cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
+      cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
+      ASSERT (cy == 0);
+    }
+}
+
+void
+refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
+                     mp_ptr scratch)
+{
+  refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
+}
+
+void
+refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  /* FIXME: this could be made faster by using refmpn_mul and then subtracting
+     off products near the middle product region boundary */
+  refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
+}
+
+void
+refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
+              mp_srcptr vp, mp_size_t vn)
+{
+  /* FIXME: this could be made faster by using refmpn_mul and then subtracting
+     off products near the middle product region boundary */
+  refmpn_mulmid_basecase (rp, up, un, vp, vn);
+}
+
+
+
  #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
  #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
+#define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
  #if WANT_FFT
  #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
  #else
@@ -1472,12 +1866,10 @@ refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
  {
    mp_ptr tp;
    mp_size_t tn;
-  mp_limb_t cy;
  
    if (vn < TOOM3_THRESHOLD)
      {
-      /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
-        mul_basecase.  */
+      /* In the mpn_mul_basecase and toom2 range, use our own mul_basecase.  */
        if (vn != 0)
         refmpn_mul_basecase (wp, up, un, vp, vn);
        else
@@ -1487,25 +1879,32 @@ refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
  
    if (vn < TOOM4_THRESHOLD)
      {
-      /* In the mpn_toom33_mul range, use mpn_toom22_mul.  */
+      /* In the toom3 range, use mpn_toom22_mul.  */
        tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
        tp = refmpn_malloc_limbs (tn);
        mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
      }
-  else if (vn < FFT_THRESHOLD)
+  else if (vn < TOOM6_THRESHOLD)
      {
-      /* In the mpn_toom44_mul range, use mpn_toom33_mul.  */
+      /* In the toom4 range, use mpn_toom33_mul.  */
        tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
        tp = refmpn_malloc_limbs (tn);
        mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
      }
-  else
+  else if (vn < FFT_THRESHOLD)
      {
-      /* Finally, for the largest operands, use mpn_toom44_mul.  */
+      /* In the toom6 range, use mpn_toom44_mul.  */
        tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
        tp = refmpn_malloc_limbs (tn);
        mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
      }
+  else
+    {
+      /* Finally, for the largest operands, use mpn_toom6h_mul.  */
+      tn = 2 * vn + mpn_toom6h_mul_itch (vn, vn);
+      tp = refmpn_malloc_limbs (tn);
+      mpn_toom6h_mul (tp, up, vn, vp, vn, tp + 2 * vn);
+    }
  
    if (un != vn)
      {
@@ -1515,7 +1914,7 @@ refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
         refmpn_mul (wp + vn, up + vn, un - vn, vp, vn);
  
        MPN_COPY (wp, tp, vn);
-      cy = refmpn_add (wp + vn, wp + vn, un, tp + vn, vn);
+      ASSERT_NOCARRY (refmpn_add (wp + vn, wp + vn, un, tp + vn, vn));
      }
    else
      {
@@ -1920,7 +2319,7 @@ refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
      }
  }
  
-void
+mp_limb_t
  refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
  {
    mp_size_t j;
@@ -1933,12 +2332,11 @@ refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
  
    for (j = n - 1; j >= 0; j--)
      {
-      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+      up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
        up++;
      }
    cy = mpn_add_n (rp, up, up - n, n);
-  if (cy != 0)
-    mpn_sub_n (rp, rp, mp, n);
+  return cy;
  }
  
  size_t
diff --git a/tests/refmpq.c b/tests/refmpq.c

index 460a4801d74c3194ef305f8896a8905c8c238728..1d688a52f49f742420253feba7551fed87d66824 100644 (file)
--- a/tests/refmpq.c
+++ b/tests/refmpq.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "gmp.h"
  #include "gmp-impl.h"
diff --git a/tests/refmpz.c b/tests/refmpz.c

index 29e0a16f861f274ebaa93ac4b49427e6dfe4b0a3..3903ec5939ae0eb0d07a56bfa13c29d03774b08f 100644 (file)
--- a/tests/refmpz.c
+++ b/tests/refmpz.c
@@ -2,20 +2,20 @@
  
  Copyright 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  /* always do assertion checking */
  #define WANT_ASSERT  1
@@ -184,17 +184,46 @@ refmpz_kronecker (mpz_srcptr a_orig, mpz_srcptr b_orig)
  int
  refmpz_jacobi (mpz_srcptr a, mpz_srcptr b)
  {
-  mpz_t  b_odd;
-  mpz_init_set (b_odd, b);
-  if (mpz_sgn (b_odd) != 0)
-    mpz_fdiv_q_2exp (b_odd, b_odd, mpz_scan1 (b_odd, 0L));
-  return refmpz_kronecker (a, b_odd);
+  ASSERT_ALWAYS (mpz_sgn (b) > 0);
+  ASSERT_ALWAYS (mpz_odd_p (b));
+
+  return refmpz_kronecker (a, b);
  }
  
+/* Legendre symbol via powm. p must be an odd prime. */
  int
-refmpz_legendre (mpz_srcptr a, mpz_srcptr b)
+refmpz_legendre (mpz_srcptr a, mpz_srcptr p)
  {
-  return refmpz_jacobi (a, b);
+  int res;
+
+  mpz_t r;
+  mpz_t e;
+
+  ASSERT_ALWAYS (mpz_sgn (p) > 0);
+  ASSERT_ALWAYS (mpz_odd_p (p));
+
+  mpz_init (r);
+  mpz_init (e);
+
+  mpz_fdiv_r (r, a, p);
+
+  mpz_set (e, p);
+  mpz_sub_ui (e, e, 1);
+  mpz_fdiv_q_2exp (e, e, 1);
+  mpz_powm (r, r, e, p);
+
+  /* Normalize to a more or less symmetric range around zero */
+  if (mpz_cmp (r, e) > 0)
+    mpz_sub (r, r, p);
+
+  ASSERT_ALWAYS (mpz_cmpabs_ui (r, 1) <= 0);
+
+  res = mpz_sgn (r);
+
+  mpz_clear (r);
+  mpz_clear (e);
+
+  return res;
  }
  
  
diff --git a/tests/spinner.c b/tests/spinner.c

index 343ecb5503ed7f29bc00f99c7cb6ff5f0077e91c..90b20aca2cd27390d2ecf1719bad660956f86440 100644 (file)
--- a/tests/spinner.c
+++ b/tests/spinner.c
@@ -3,20 +3,20 @@
  
  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include "config.h"
  
diff --git a/tests/t-bswap.c b/tests/t-bswap.c

index 814ddfc5770d008ddd9dca7c71c54c7ecb652e5d..c26ac0258cd70690361eaf8bb2f34f93bbd8c818 100644 (file)
--- a/tests/t-bswap.c
+++ b/tests/t-bswap.c
@@ -2,20 +2,20 @@
  
  Copyright 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/t-constants.c b/tests/t-constants.c

index 1c267e678e2e8648ba877616dd7db0575a28485d..d46299f8af36d7421a32eab6e1e923c780dd2c00 100644 (file)
--- a/tests/t-constants.c
+++ b/tests/t-constants.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -24,36 +24,36 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #ifdef ULONG_MAX
-char *ulong_max_def = "defined";
+const char *ulong_max_def = "defined";
  #else
-char *ulong_max_def = "not defined";
+const char *ulong_max_def = "not defined";
  #endif
  #ifdef LONG_MAX
-char *long_max_def = "defined";
+const char *long_max_def = "defined";
  #else
-char *long_max_def = "not defined";
+const char *long_max_def = "not defined";
  #endif
  
  #ifdef UINT_MAX
-char *uint_max_def = "defined";
+const char *uint_max_def = "defined";
  #else
-char *uint_max_def = "not defined";
+const char *uint_max_def = "not defined";
  #endif
  #ifdef INT_MAX
-char *int_max_def = "defined";
+const char *int_max_def = "defined";
  #else
-char *int_max_def = "not defined";
+const char *int_max_def = "not defined";
  #endif
  
  #ifdef USHRT_MAX
-char *ushrt_max_def = "defined";
+const char *ushrt_max_def = "defined";
  #else
-char *ushrt_max_def = "not defined";
+const char *ushrt_max_def = "not defined";
  #endif
  #ifdef SHRT_MAX
-char *shrt_max_def = "defined";
+const char *shrt_max_def = "defined";
  #else
-char *shrt_max_def = "not defined";
+const char *shrt_max_def = "not defined";
  #endif
  
  #include "gmp-impl.h"
diff --git a/tests/t-count_zeros.c b/tests/t-count_zeros.c

index 8dc34eb03be6e9876b48cae2d3321daef796558b..c63f8ecf0ed14fe9585b7518ed304c566e7db8d1 100644 (file)
--- a/tests/t-count_zeros.c
+++ b/tests/t-count_zeros.c
@@ -2,20 +2,20 @@
  
  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/t-gmpmax.c b/tests/t-gmpmax.c

index efe166f160582f0eb1402e81d343801c1b6312b9..b5c8a1a028e091ea29892c4b7d5c1e5cc5b5d30a 100644 (file)
--- a/tests/t-gmpmax.c
+++ b/tests/t-gmpmax.c
@@ -2,20 +2,20 @@
  
  Copyright 2001 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/t-hightomask.c b/tests/t-hightomask.c

index 9e6c109f0fc0e12d1edc415770e48280c5597289..7cb17cab2d7c4466f472f730aaeb9e8d6b47d71d 100644 (file)
--- a/tests/t-hightomask.c
+++ b/tests/t-hightomask.c
@@ -2,20 +2,20 @@
  
  Copyright 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/t-modlinv.c b/tests/t-modlinv.c

index 2baf6c7effedcad60091309f7adf20b9fbba9c41..77090faa8287b1aefe84386635976b138604b1db 100644 (file)
--- a/tests/t-modlinv.c
+++ b/tests/t-modlinv.c
@@ -2,20 +2,20 @@
  
  Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/t-parity.c b/tests/t-parity.c

index 922dd512a526a76b5d6090c7cab465af473d6255..601a4d39e5af2ec92b084e1f1765a03d4dd9bc4c 100644 (file)
--- a/tests/t-parity.c
+++ b/tests/t-parity.c
@@ -2,20 +2,20 @@
  
  Copyright 2002 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/t-popc.c b/tests/t-popc.c

index 7d0f4e0c88052a2e1c9188348881a11bb56335ea..7f36203827e3b3a1133df149b56f962fbe12de29 100644 (file)
--- a/tests/t-popc.c
+++ b/tests/t-popc.c
@@ -1,21 +1,21 @@
  /* Test popc_limb.
  
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2012 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
@@ -51,6 +51,20 @@ main (void)
          }
      }
  
+  src = 0;
+  want = 0;
+  for (i = 0; i < GMP_LIMB_BITS; i++)
+    {
+      src += CNST_LIMB(1) << i;
+      want += 1;
+
+      popc_limb (got, src);
+      if (got != want)
+        {
+         goto error;
+        }
+    }
+
    for (i = 0; i < 100; i++)
      {
        mpn_random2 (&src, (mp_size_t) 1);
diff --git a/tests/t-sub.c b/tests/t-sub.c

index 5eb78c9fbc73156a8de2c236dd2c1ce9022a324f..42f02c569a1ad701f17bb4e4b09394ab7b90ec2d 100644 (file)
--- a/tests/t-sub.c
+++ b/tests/t-sub.c
@@ -2,20 +2,20 @@
  
  Copyright 2004 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include <stdlib.h>
diff --git a/tests/tests.h b/tests/tests.h

index b12b3d4d84489d5f8c0d1a47eb38a66a2440f5c7..c1762291dab56d9f61f28a7326b83b90cfd7162d 100644 (file)
--- a/tests/tests.h
+++ b/tests/tests.h
@@ -1,22 +1,22 @@
  /* Tests support prototypes etc.
  
-Copyright 2000, 2001, 2002, 2003, 2004, 2008, 2009 Free Software Foundation,
-Inc.
+Copyright 2000, 2001, 2002, 2003, 2004, 2008, 2009, 2010, 2011, 2012 Free
+Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #ifndef __TESTS_H__
@@ -38,28 +38,28 @@ extern "C" {
  #endif
  
  
-void tests_start __GMP_PROTO ((void));
-void tests_end __GMP_PROTO ((void));
+void tests_start (void);
+void tests_end (void);
  
-void tests_memory_start __GMP_PROTO ((void));
-void tests_memory_end __GMP_PROTO ((void));
-void *tests_allocate __GMP_PROTO ((size_t size));
-void *tests_reallocate __GMP_PROTO ((void *ptr, size_t old_size, size_t new_size));
-void tests_free __GMP_PROTO ((void *ptr, size_t size));
-void tests_free_nosize __GMP_PROTO ((void *ptr));
-int tests_memory_valid __GMP_PROTO ((void *ptr));
+void tests_memory_start (void);
+void tests_memory_end (void);
+void *tests_allocate (size_t);
+void *tests_reallocate (void *, size_t, size_t);
+void tests_free (void *, size_t);
+void tests_free_nosize (void *);
+int tests_memory_valid (void *);
  
-void tests_rand_start __GMP_PROTO ((void));
-void tests_rand_end __GMP_PROTO ((void));
+void tests_rand_start (void);
+void tests_rand_end (void);
  
-double tests_infinity_d __GMP_PROTO (());
-int tests_hardware_getround __GMP_PROTO ((void));
-int tests_hardware_setround __GMP_PROTO ((int));
-int tests_isinf __GMP_PROTO ((double));
-int tests_dbl_mant_bits __GMP_PROTO ((void));
+double tests_infinity_d ();
+int tests_hardware_getround (void);
+int tests_hardware_setround (int);
+int tests_isinf (double);
+int tests_dbl_mant_bits (void);
  
-void x86_fldcw __GMP_PROTO ((unsigned short));
-unsigned short x86_fstcw __GMP_PROTO ((void));
+void x86_fldcw (unsigned short);
+unsigned short x86_fstcw (void);
  
  
  /* tests_setjmp_sigfpe is like a setjmp, establishing a trap for SIGFPE.
@@ -74,15 +74,15 @@ unsigned short x86_fstcw __GMP_PROTO ((void));
    (signal (SIGFPE, tests_sigfpe_handler),       \
     setjmp (tests_sigfpe_target))
  
-RETSIGTYPE tests_sigfpe_handler __GMP_PROTO ((int));
-void tests_sigfpe_done __GMP_PROTO ((void));
+RETSIGTYPE tests_sigfpe_handler (int);
+void tests_sigfpe_done (void);
  extern jmp_buf  tests_sigfpe_target;
  
  
  #if HAVE_CALLING_CONVENTIONS
-extern mp_limb_t (*calling_conventions_function) __GMP_PROTO ((ANYARGS));
-mp_limb_t calling_conventions __GMP_PROTO ((ANYARGS));
-int calling_conventions_check __GMP_PROTO ((void));
+extern mp_limb_t (*calling_conventions_function) (ANYARGS);
+mp_limb_t calling_conventions (ANYARGS);
+int calling_conventions_check (void);
  #define CALLING_CONVENTIONS(function) \
    (calling_conventions_function = (function), calling_conventions)
  #define CALLING_CONVENTIONS_CHECK()    (calling_conventions_check())
@@ -93,333 +93,276 @@ int calling_conventions_check __GMP_PROTO ((void));
  
  
  extern int mp_trace_base;
-void mp_limb_trace __GMP_PROTO ((const char *, mp_limb_t));
-void mpn_trace __GMP_PROTO ((const char *name, mp_srcptr ptr, mp_size_t size));
-void mpn_tracea __GMP_PROTO ((const char *name, const mp_ptr *a, int count,
-                 mp_size_t size));
-void mpn_tracen __GMP_PROTO ((const char *name, int num, mp_srcptr ptr,
-                 mp_size_t size));
-void mpn_trace_file __GMP_PROTO ((const char *filename,
-                             mp_srcptr ptr, mp_size_t size));
-void mpn_tracea_file __GMP_PROTO ((const char *filename,
-                              const mp_ptr *a, int count, mp_size_t size));
-void mpf_trace __GMP_PROTO ((const char *name, mpf_srcptr z));
-void mpq_trace __GMP_PROTO ((const char *name, mpq_srcptr q));
-void mpz_trace __GMP_PROTO ((const char *name, mpz_srcptr z));
-void mpz_tracen __GMP_PROTO ((const char *name, int num, mpz_srcptr z));
-void byte_trace __GMP_PROTO ((const char *, const void *, mp_size_t));
-void byte_tracen __GMP_PROTO ((const char *, int, const void *, mp_size_t));
-void d_trace __GMP_PROTO ((const char *, double));
-
-
-void spinner __GMP_PROTO ((void));
+void mp_limb_trace (const char *, mp_limb_t);
+void mpn_trace (const char *, mp_srcptr, mp_size_t);
+void mpn_tracea (const char *, const mp_ptr *, int, mp_size_t);
+void mpn_tracen (const char *, int, mp_srcptr, mp_size_t);
+void mpn_trace_file (const char *, mp_srcptr, mp_size_t);
+void mpn_tracea_file (const char *, const mp_ptr *, int, mp_size_t);
+void mpf_trace (const char *, mpf_srcptr);
+void mpq_trace (const char *, mpq_srcptr);
+void mpz_trace (const char *, mpz_srcptr);
+void mpz_tracen (const char *, int, mpz_srcptr);
+void byte_trace (const char *, const void *, mp_size_t);
+void byte_tracen (const char *, int, const void *, mp_size_t);
+void d_trace (const char *, double);
+
+
+void spinner (void);
  extern unsigned long  spinner_count;
  extern int  spinner_wanted;
  extern int  spinner_tick;
  
  
-void *align_pointer __GMP_PROTO ((void *p, size_t align));
-void *__gmp_allocate_func_aligned __GMP_PROTO ((size_t bytes, size_t align));
-void *__gmp_allocate_or_reallocate __GMP_PROTO ((void *ptr,
-                                          size_t oldsize, size_t newsize));
-char *__gmp_allocate_strdup __GMP_PROTO ((const char *s));
-char *strtoupper __GMP_PROTO ((char *s_orig));
-mp_limb_t urandom __GMP_PROTO ((void));
-void call_rand_algs __GMP_PROTO ((void (*func) (const char *, gmp_randstate_t)));
-
-
-void mpf_set_str_or_abort __GMP_PROTO ((mpf_ptr f, const char *str, int base));
-
-
-void mpq_set_str_or_abort __GMP_PROTO ((mpq_ptr q, const char *str, int base));
-
-
-void mpz_erandomb __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
-                           unsigned long nbits));
-void mpz_erandomb_nonzero __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
-                                   unsigned long nbits));
-void mpz_errandomb __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
-                            unsigned long nbits));
-void mpz_errandomb_nonzero __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
-                                    unsigned long nbits));
-void mpz_init_set_n __GMP_PROTO ((mpz_ptr z, mp_srcptr p, mp_size_t size));
-void mpz_negrandom __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate));
-int mpz_pow2abs_p __GMP_PROTO ((mpz_srcptr z)) __GMP_ATTRIBUTE_PURE;
-void mpz_set_n __GMP_PROTO ((mpz_ptr z, mp_srcptr p, mp_size_t size));
-void mpz_set_str_or_abort __GMP_PROTO ((mpz_ptr z, const char *str, int base));
-
-mp_size_t mpn_diff_highest __GMP_PROTO ((mp_srcptr p1, mp_srcptr p2, mp_size_t n)) __GMP_ATTRIBUTE_PURE;
-mp_size_t mpn_diff_lowest __GMP_PROTO ((mp_srcptr p1, mp_srcptr p2, mp_size_t n)) __GMP_ATTRIBUTE_PURE;
-mp_size_t byte_diff_highest __GMP_PROTO ((const void *p1, const void *p2, mp_size_t size)) __GMP_ATTRIBUTE_PURE;
-mp_size_t byte_diff_lowest __GMP_PROTO ((const void *p1, const void *p2, mp_size_t size)) __GMP_ATTRIBUTE_PURE;
-
-
-mp_limb_t ref_addc_limb __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
-mp_limb_t ref_bswap_limb __GMP_PROTO ((mp_limb_t src));
-unsigned long ref_popc_limb __GMP_PROTO ((mp_limb_t src));
-mp_limb_t ref_subc_limb __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
-
-
-void refmpf_add __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
-void refmpf_add_ulp __GMP_PROTO ((mpf_ptr f));
-void refmpf_fill __GMP_PROTO ((mpf_ptr f, mp_size_t size, mp_limb_t value));
-void refmpf_normalize __GMP_PROTO ((mpf_ptr f));
-void refmpf_set_prec_limbs __GMP_PROTO ((mpf_ptr f, unsigned long prec));
-unsigned long refmpf_set_overlap __GMP_PROTO ((mpf_ptr dst, mpf_srcptr src));
-void refmpf_sub __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
-int refmpf_validate __GMP_PROTO ((const char *name, mpf_srcptr got, mpf_srcptr want));
-int refmpf_validate_division __GMP_PROTO ((const char *name, mpf_srcptr got,
-                                           mpf_srcptr n, mpf_srcptr d));
-
-
-mp_limb_t refmpn_add __GMP_PROTO ((mp_ptr rp,
-                              mp_srcptr s1p, mp_size_t s1size,
-                              mp_srcptr s2p, mp_size_t s2size));
-mp_limb_t refmpn_add_1 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
-                                mp_limb_t n));
-mp_limb_t refmpn_add_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                mp_size_t size));
-mp_limb_t refmpn_add_nc __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                 mp_size_t size, mp_limb_t carry));
-mp_limb_t refmpn_addlsh1_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                mp_size_t size));
-mp_limb_t refmpn_addlsh2_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                mp_size_t size));
-mp_limb_t refmpn_addlsh_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                mp_size_t size, unsigned int));
-mp_limb_t refmpn_addmul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
-                                   mp_limb_t multiplier));
-mp_limb_t refmpn_addmul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
-                                    mp_limb_t multiplier, mp_limb_t carry));
-mp_limb_t refmpn_addmul_2 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
-                                        mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_3 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
-                                        mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_4 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
-                                        mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_5 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
-                                        mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_6 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
-                                        mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_7 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
-                                        mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_8 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
-                                        mp_size_t size, mp_srcptr mult));
-
-mp_limb_t refmpn_add_n_sub_n __GMP_PROTO ((mp_ptr r1p, mp_ptr r2p,
-                                   mp_srcptr s1p, mp_srcptr s2p,
-                                   mp_size_t size));
-mp_limb_t refmpn_add_n_sub_nc __GMP_PROTO ((mp_ptr r1p, mp_ptr r2p,
-                                    mp_srcptr s1p, mp_srcptr s2p,
-                                    mp_size_t size, mp_limb_t carry));
-
-void refmpn_and_n  __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                            mp_size_t size));
-void refmpn_andn_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                            mp_size_t size));
-
-mp_limb_t refmpn_big_base __GMP_PROTO ((int));
-
-int refmpn_chars_per_limb __GMP_PROTO ((int));
-void refmpn_clrbit __GMP_PROTO ((mp_ptr, unsigned long));
-int refmpn_cmp __GMP_PROTO ((mp_srcptr s1p, mp_srcptr s2p, mp_size_t size));
-int refmpn_cmp_allowzero __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
-int refmpn_cmp_twosizes __GMP_PROTO ((mp_srcptr xp, mp_size_t xsize,
-                                 mp_srcptr yp, mp_size_t ysize));
-
-void refmpn_com __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
-void refmpn_copy  __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
-void refmpn_copyi __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
-void refmpn_copyd __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
-void refmpn_copy_extend __GMP_PROTO ((mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize));
-
-unsigned refmpn_count_leading_zeros __GMP_PROTO ((mp_limb_t x));
-unsigned refmpn_count_trailing_zeros __GMP_PROTO ((mp_limb_t x));
-
-mp_limb_t refmpn_divexact_by3 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp,
-                                       mp_size_t size));
-mp_limb_t refmpn_divexact_by3c __GMP_PROTO ((mp_ptr rp, mp_srcptr sp,
-                                       mp_size_t size, mp_limb_t carry));
-
-mp_limb_t refmpn_divmod_1 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
-                                   mp_limb_t divisor));
-mp_limb_t refmpn_divmod_1c __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
-                                    mp_limb_t divisor, mp_limb_t carry));
-mp_limb_t refmpn_divrem_1 __GMP_PROTO ((mp_ptr rp, mp_size_t xsize,
-                                   mp_srcptr sp, mp_size_t size,
-                                   mp_limb_t divisor));
-mp_limb_t refmpn_divrem_1c __GMP_PROTO ((mp_ptr rp, mp_size_t xsize,
-                                    mp_srcptr sp, mp_size_t size,
-                                    mp_limb_t divisor, mp_limb_t carry));
-mp_limb_t refmpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t,
-                                       mp_srcptr));
-
-int refmpn_equal_anynail __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
-
-void refmpn_fill __GMP_PROTO ((mp_ptr p, mp_size_t s, mp_limb_t v));
-
-mp_limb_t refmpn_gcd_1 __GMP_PROTO ((mp_srcptr xp, mp_size_t xsize, mp_limb_t y));
-mp_limb_t refmpn_gcd __GMP_PROTO ((mp_ptr gp, mp_ptr xp, mp_size_t xsize,
-                              mp_ptr yp, mp_size_t ysize));
-
-size_t refmpn_get_str __GMP_PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
-
-unsigned long refmpn_hamdist __GMP_PROTO ((mp_srcptr s1p, mp_srcptr s2p,
-                                      mp_size_t size));
-
-mp_limb_t refmpn_invert_limb __GMP_PROTO ((mp_limb_t d));
-void refmpn_ior_n  __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                            mp_size_t size));
-void refmpn_iorn_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                            mp_size_t size));
-
-mp_limb_t refmpn_lshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
-mp_limb_t refmpn_lshift_or_copy __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
-mp_limb_t refmpn_lshift_or_copy_any __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
-mp_limb_t refmpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
-void refmpn_com __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
-
-mp_ptr refmpn_malloc_limbs __GMP_PROTO ((mp_size_t size));
-mp_ptr refmpn_malloc_limbs_aligned __GMP_PROTO ((mp_size_t n, size_t m));
-void refmpn_free_limbs __GMP_PROTO ((mp_ptr p));
-mp_limb_t refmpn_msbone __GMP_PROTO ((mp_limb_t x));
-mp_limb_t refmpn_msbone_mask __GMP_PROTO ((mp_limb_t x));
-mp_ptr refmpn_memdup_limbs __GMP_PROTO ((mp_srcptr ptr, mp_size_t size));
-
-mp_limb_t refmpn_mod_1 __GMP_PROTO ((mp_srcptr sp, mp_size_t size,
-                                mp_limb_t divisor));
-mp_limb_t refmpn_mod_1c __GMP_PROTO ((mp_srcptr sp, mp_size_t size,
-                                 mp_limb_t divisor, mp_limb_t carry));
-mp_limb_t refmpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr p, mp_size_t n));
-
-mp_limb_t refmpn_mul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
-                                mp_limb_t multiplier));
-mp_limb_t refmpn_mul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
-                                 mp_limb_t multiplier, mp_limb_t carry));
-mp_limb_t refmpn_mul_2 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
-                                    mp_srcptr mult));
-mp_limb_t refmpn_mul_3 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
-                                    mp_srcptr mult));
-mp_limb_t refmpn_mul_4 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
-                                    mp_srcptr mult));
-
-void refmpn_mul_basecase __GMP_PROTO ((mp_ptr prodp,
-                                  mp_srcptr up, mp_size_t usize,
-                                  mp_srcptr vp, mp_size_t vsize));
-void refmpn_mullo_n __GMP_PROTO ((mp_ptr prodp,
-                                 mp_srcptr up, mp_srcptr vp, mp_size_t vsize));
-void refmpn_mul_any __GMP_PROTO ((mp_ptr prodp,
-                             mp_srcptr up, mp_size_t usize,
-                             mp_srcptr vp, mp_size_t vsize));
-void refmpn_mul_n __GMP_PROTO ((mp_ptr prodp, mp_srcptr up, mp_srcptr vp,
-                           mp_size_t size));
-void refmpn_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
-
-void refmpn_nand_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                            mp_size_t size));
-void refmpn_nior_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                            mp_size_t size));
-mp_limb_t refmpn_neg __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size));
-mp_size_t refmpn_normalize __GMP_PROTO ((mp_srcptr, mp_size_t));
-
-unsigned long refmpn_popcount __GMP_PROTO ((mp_srcptr sp, mp_size_t size));
-mp_limb_t refmpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr rp, mp_size_t xsize,
-                                          mp_srcptr sp, mp_size_t size,
-                                          mp_limb_t divisor,
-                                          mp_limb_t inverse, unsigned shift));
-mp_limb_t refmpn_preinv_mod_1 __GMP_PROTO ((mp_srcptr sp, mp_size_t size,
-                                       mp_limb_t divisor,
-                                       mp_limb_t divisor_inverse));
-
-void refmpn_random __GMP_PROTO ((mp_ptr, mp_size_t));
-void refmpn_random2 __GMP_PROTO ((mp_ptr, mp_size_t));
-mp_limb_t refmpn_random_limb __GMP_PROTO ((void));
-
-mp_limb_t refmpn_rsh1add_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                mp_size_t size));
-mp_limb_t refmpn_rsh1sub_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                mp_size_t size));
-mp_limb_t refmpn_rshift __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
-                                 unsigned shift));
-mp_limb_t refmpn_rshift_or_copy __GMP_PROTO ((mp_ptr wp,
-                                         mp_srcptr xp, mp_size_t size,
-                                         unsigned shift));
-mp_limb_t refmpn_rshift_or_copy_any __GMP_PROTO ((mp_ptr wp,
-                                                  mp_srcptr xp, mp_size_t size,
-                                                  unsigned shift));
-
-mp_limb_t refmpn_sb_div_qr __GMP_PROTO ((mp_ptr,
-                                        mp_ptr, mp_size_t,
-                                        mp_srcptr, mp_size_t));
-unsigned long refmpn_scan0 __GMP_PROTO ((mp_srcptr, unsigned long));
-unsigned long refmpn_scan1 __GMP_PROTO ((mp_srcptr, unsigned long));
-void refmpn_setbit __GMP_PROTO ((mp_ptr, unsigned long));
-void refmpn_sqr __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size));
-mp_size_t refmpn_sqrtrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
-
-void refmpn_sub_ddmmss __GMP_PROTO ((mp_limb_t *, mp_limb_t *,
-                                     mp_limb_t, mp_limb_t,
-                                     mp_limb_t, mp_limb_t));
-mp_limb_t refmpn_sub __GMP_PROTO ((mp_ptr rp,
-                              mp_srcptr s1p, mp_size_t s1size,
-                              mp_srcptr s2p, mp_size_t s2size));
-mp_limb_t refmpn_sub_1 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
-                                mp_limb_t n));
-mp_limb_t refmpn_sub_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                mp_size_t size));
-mp_limb_t refmpn_sub_nc __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                 mp_size_t size, mp_limb_t carry));
-mp_limb_t refmpn_sublsh1_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                mp_size_t size));
-mp_limb_t refmpn_sublsh_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                                mp_size_t size, unsigned int));
-mp_limb_t refmpn_submul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
-                                   mp_limb_t multiplier));
-mp_limb_t refmpn_submul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
-                                    mp_limb_t multiplier, mp_limb_t carry));
-
-mp_limb_signed_t refmpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
-mp_limb_signed_t refmpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
-mp_limb_signed_t refmpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
-
-void refmpn_tdiv_qr __GMP_PROTO ((mp_ptr qp, mp_ptr rp, mp_size_t qxn,
-                             mp_ptr np, mp_size_t nsize,
-                             mp_srcptr dp, mp_size_t dsize));
-int refmpn_tstbit __GMP_PROTO ((mp_srcptr, unsigned long));
-
-mp_limb_t refmpn_udiv_qrnnd __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t));
-mp_limb_t refmpn_udiv_qrnnd_r __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *));
-mp_limb_t refmpn_umul_ppmm __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
-mp_limb_t refmpn_umul_ppmm_r __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t *));
-
-void refmpn_xnor_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                            mp_size_t size));
-void refmpn_xor_n  __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
-                            mp_size_t size));
-
-void refmpn_zero __GMP_PROTO ((mp_ptr p, mp_size_t s));
-void refmpn_zero_extend __GMP_PROTO ((mp_ptr, mp_size_t, mp_size_t));
-int refmpn_zero_p __GMP_PROTO ((mp_srcptr ptr, mp_size_t size));
-
-void refmpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
-void refmpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
-
-
-void refmpq_add __GMP_PROTO ((mpq_ptr w, mpq_srcptr x, mpq_srcptr y));
-void refmpq_sub __GMP_PROTO ((mpq_ptr w, mpq_srcptr x, mpq_srcptr y));
-
-
-void refmpz_combit __GMP_PROTO ((mpz_ptr r, unsigned long bit));
-unsigned long refmpz_hamdist __GMP_PROTO ((mpz_srcptr x, mpz_srcptr y));
-int refmpz_kronecker __GMP_PROTO ((mpz_srcptr a_orig, mpz_srcptr b_orig));
-int refmpz_jacobi __GMP_PROTO ((mpz_srcptr a_orig, mpz_srcptr b_orig));
-int refmpz_legendre __GMP_PROTO ((mpz_srcptr a_orig, mpz_srcptr b_orig));
-int refmpz_kronecker_si __GMP_PROTO ((mpz_srcptr, long));
-int refmpz_kronecker_ui __GMP_PROTO ((mpz_srcptr, unsigned long));
-int refmpz_si_kronecker __GMP_PROTO ((long, mpz_srcptr));
-int refmpz_ui_kronecker __GMP_PROTO ((unsigned long, mpz_srcptr));
-
-void refmpz_pow_ui __GMP_PROTO ((mpz_ptr w, mpz_srcptr b, unsigned long e));
+void *align_pointer (void *, size_t);
+void *__gmp_allocate_func_aligned (size_t, size_t);
+void *__gmp_allocate_or_reallocate (void *, size_t, size_t);
+char *__gmp_allocate_strdup (const char *);
+char *strtoupper (char *);
+mp_limb_t urandom (void);
+void call_rand_algs (void (*func) (const char *, gmp_randstate_t));
+
+
+void mpf_set_str_or_abort (mpf_ptr, const char *, int);
+
+
+void mpq_set_str_or_abort (mpq_ptr, const char *, int);
+
+
+void mpz_erandomb (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_erandomb_nonzero (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_errandomb (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_errandomb_nonzero (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_init_set_n (mpz_ptr, mp_srcptr, mp_size_t);
+void mpz_negrandom (mpz_ptr, gmp_randstate_t);
+int mpz_pow2abs_p (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+void mpz_set_n (mpz_ptr, mp_srcptr, mp_size_t);
+void mpz_set_str_or_abort (mpz_ptr, const char *, int);
+
+mp_size_t mpn_diff_highest (mp_srcptr, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+mp_size_t mpn_diff_lowest (mp_srcptr, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+mp_size_t byte_diff_highest (const void *, const void *, mp_size_t) __GMP_ATTRIBUTE_PURE;
+mp_size_t byte_diff_lowest (const void *, const void *, mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+
+mp_limb_t ref_addc_limb (mp_limb_t *, mp_limb_t, mp_limb_t);
+mp_limb_t ref_bswap_limb (mp_limb_t);
+unsigned long ref_popc_limb (mp_limb_t);
+mp_limb_t ref_subc_limb (mp_limb_t *, mp_limb_t, mp_limb_t);
+
+
+void refmpf_add (mpf_ptr, mpf_srcptr, mpf_srcptr);
+void refmpf_add_ulp (mpf_ptr );
+void refmpf_fill (mpf_ptr, mp_size_t, mp_limb_t);
+void refmpf_normalize (mpf_ptr);
+void refmpf_set_prec_limbs (mpf_ptr, unsigned long);
+unsigned long refmpf_set_overlap (mpf_ptr, mpf_srcptr);
+void refmpf_sub (mpf_ptr, mpf_srcptr, mpf_srcptr);
+int refmpf_validate (const char *, mpf_srcptr, mpf_srcptr);
+int refmpf_validate_division (const char *, mpf_srcptr, mpf_srcptr, mpf_srcptr);
+
+
+mp_limb_t refmpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t refmpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_addlsh1_n_ip2 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh2_n_ip2 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh_n_ip2 (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned, mp_limb_t);
+mp_limb_t refmpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+mp_limb_t refmpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+void refmpn_and_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_andn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_big_base (int);
+
+int refmpn_chars_per_limb (int);
+void refmpn_clrbit (mp_ptr, unsigned long);
+int refmpn_cmp (mp_srcptr, mp_srcptr, mp_size_t);
+int refmpn_cmp_allowzero (mp_srcptr, mp_srcptr, mp_size_t);
+int refmpn_cmp_twosizes (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+void refmpn_com (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copy (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copy_extend (mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+
+unsigned refmpn_count_leading_zeros (mp_limb_t);
+unsigned refmpn_count_trailing_zeros (mp_limb_t);
+
+mp_limb_t refmpn_divexact_by3 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_divexact_by3c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t refmpn_divmod_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_divmod_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_divrem_2 (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+
+int refmpn_equal_anynail (mp_srcptr, mp_srcptr, mp_size_t);
+
+void refmpn_fill (mp_ptr, mp_size_t, mp_limb_t);
+
+mp_limb_t refmpn_gcd_1 (mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_gcd (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+
+size_t refmpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
+
+unsigned long refmpn_hamdist (mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_invert_limb (mp_limb_t);
+void refmpn_ior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_iorn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_lshift_or_copy (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_lshift_or_copy_any (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+void refmpn_com (mp_ptr, mp_srcptr, mp_size_t);
+
+mp_ptr refmpn_malloc_limbs (mp_size_t);
+mp_ptr refmpn_malloc_limbs_aligned (mp_size_t, size_t);
+void refmpn_free_limbs (mp_ptr);
+mp_limb_t refmpn_msbone (mp_limb_t);
+mp_limb_t refmpn_msbone_mask (mp_limb_t);
+mp_ptr refmpn_memdup_limbs (mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_mod_1 (mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_mod_34lsub1 (mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+void refmpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
+void refmpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_mul_any (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+void refmpn_nand_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_nior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_neg (mp_ptr, mp_srcptr, mp_size_t);
+mp_size_t refmpn_normalize (mp_srcptr, mp_size_t);
+
+unsigned long refmpn_popcount (mp_srcptr, mp_size_t);
+mp_limb_t refmpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, unsigned);
+mp_limb_t refmpn_preinv_mod_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+void refmpn_random (mp_ptr, mp_size_t);
+void refmpn_random2 (mp_ptr, mp_size_t);
+mp_limb_t refmpn_random_limb (void);
+
+mp_limb_t refmpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_rshift_or_copy (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_rshift_or_copy_any (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+
+mp_limb_t refmpn_sb_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+unsigned long refmpn_scan0 (mp_srcptr, unsigned long);
+unsigned long refmpn_scan1 (mp_srcptr, unsigned long);
+void refmpn_setbit (mp_ptr, unsigned long);
+void refmpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
+mp_size_t refmpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t);
+
+void refmpn_sub_ddmmss (mp_limb_t *, mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t refmpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t refmpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sublsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+mp_limb_t refmpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+mp_limb_signed_t refmpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_signed_t refmpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_signed_t refmpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_signed_t refmpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_signed_t);
+mp_limb_signed_t refmpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_signed_t);
+mp_limb_signed_t refmpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_signed_t);
+
+void refmpn_tdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+int refmpn_tstbit (mp_srcptr, unsigned long);
+
+mp_limb_t refmpn_udiv_qrnnd (mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_udiv_qrnnd_r (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *);
+mp_limb_t refmpn_umul_ppmm (mp_limb_t *, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_umul_ppmm_r (mp_limb_t, mp_limb_t, mp_limb_t *);
+
+void refmpn_xnor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_xor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+void refmpn_zero (mp_ptr, mp_size_t);
+void refmpn_zero_extend (mp_ptr, mp_size_t, mp_size_t);
+int refmpn_zero_p (mp_srcptr, mp_size_t);
+
+void refmpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+void refmpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+
+void refmpq_add (mpq_ptr, mpq_srcptr, mpq_srcptr);
+void refmpq_sub (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+
+void refmpz_combit (mpz_ptr, unsigned long);
+unsigned long refmpz_hamdist (mpz_srcptr, mpz_srcptr);
+int refmpz_kronecker (mpz_srcptr, mpz_srcptr);
+int refmpz_jacobi (mpz_srcptr, mpz_srcptr);
+int refmpz_legendre (mpz_srcptr, mpz_srcptr);
+int refmpz_kronecker_si (mpz_srcptr, long);
+int refmpz_kronecker_ui (mpz_srcptr, unsigned long);
+int refmpz_si_kronecker (long, mpz_srcptr);
+int refmpz_ui_kronecker (unsigned long, mpz_srcptr);
+
+void refmpz_pow_ui (mpz_ptr, mpz_srcptr, unsigned long);
  
  
  #if defined (__cplusplus)
@@ -435,7 +378,7 @@ void refmpz_pow_ui __GMP_PROTO ((mpz_ptr w, mpz_srcptr b, unsigned long e));
     full implementation, just enough for our purposes.  */
  
  #ifdef __cplusplus
-#if HAVE_SSTREAM
+#if 1 || HAVE_SSTREAM
  #include <sstream>
  #else /* ! HAVE_SSTREAM */
  #include <string>
@@ -464,7 +407,8 @@ istringstream : public std::istrstream {
  #define TESTS_REPS(count, argv, argc)                                  \
    do {                                                                 \
    char *envval, *end;                                                  \
-  long repfactor;                                                      \
+  double repfactor;                                                    \
+  int reps_nondefault = 0;                                             \
    if (argc > 1)                                                                \
      {                                                                  \
        count = strtol (argv[1], &end, 0);                               \
@@ -475,18 +419,23 @@ istringstream : public std::istrstream {
         }                                                               \
        argv++;                                                          \
        argc--;                                                          \
+      reps_nondefault = 1;                                             \
      }                                                                  \
    envval = getenv ("GMP_CHECK_REPFACTOR");                             \
    if (envval != NULL)                                                  \
      {                                                                  \
-      repfactor = strtol (envval, &end, 0);                            \
+      repfactor = strtod (envval, &end);                               \
        if (*end || repfactor <= 0)                                      \
         {                                                               \
-         fprintf (stderr, "Invalid repfactor: %ld.\n", repfactor);     \
+         fprintf (stderr, "Invalid repfactor: %f.\n", repfactor);      \
           exit (1);                                                     \
         }                                                               \
        count *= repfactor;                                              \
+      reps_nondefault = 1;                                             \
      }                                                                  \
+  if (reps_nondefault)                                                 \
+    printf ("Running test with %ld repetitions (include this in bug reports)\n",\
+           (long) count);                                              \
    } while (0)
  
  
diff --git a/tests/trace.c b/tests/trace.c

index 93ee7351a7cdfcd3ed18fa79550bcf15ddd42a6f..9ac3028b4ba9dd2964b33867bd709a97902d4a1b 100644 (file)
--- a/tests/trace.c
+++ b/tests/trace.c
@@ -3,20 +3,20 @@
  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
  Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  
  /* Future: Would like commas printed between limbs in hex or binary, but
@@ -265,7 +265,7 @@ mpn_tracea_file (const char *filename,
  void
  byte_trace (const char *name, const void *ptr, mp_size_t size)
  {
-  char       *fmt;
+  const char *fmt;
    mp_size_t  i;
  
    mp_trace_start (name);
diff --git a/tests/x86call.asm b/tests/x86call.asm

index 47af42b7267e4b5dc3966df53abb1c2c5495cf4b..e5fbcb1718cfc0486aa0048c73b6f8d49fe4c397 100644 (file)
--- a/tests/x86call.asm
+++ b/tests/x86call.asm
@@ -1,22 +1,26 @@
  dnl  x86 calling conventions checking.
  
-dnl  Copyright 2000, 2003 Free Software Foundation, Inc.
-dnl
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or
-dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  Copyright 2000, 2003, 2010 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library test suite.
+
+dnl  The GNU MP Library test suite is free software; you can redistribute it
+dnl  and/or modify it under the terms of the GNU General Public License as
  dnl  published by the Free Software Foundation; either version 3 of the
  dnl  License, or (at your option) any later version.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful,
-dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-dnl  Lesser General Public License for more details.
-dnl
-dnl  You should have received a copy of the GNU Lesser General Public License
-dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
+dnl  The GNU MP Library test suite is distributed in the hope that it will be
+dnl  useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+dnl  Public License for more details.
+
+dnl  You should have received a copy of the GNU General Public License along
+dnl  with the GNU MP Library test suite.  If not, see
+dnl  http://www.gnu.org/licenses/.
+
+
+dnl  The current version of the code attempts to keep the call/return
+dnl  prediction stack valid, but matching calls and returns.
  
  include(`../config.m4')
  
@@ -26,8 +30,8 @@ C
  C Execute an fldcw, setting the x87 control word to cw.
  
  PROLOGUE(x86_fldcw)
-        fldcw   4(%esp)
-        ret
+       fldcw   4(%esp)
+       ret
  EPILOGUE()
  
  
@@ -36,18 +40,18 @@ C
  C Execute an fstcw, returning the current x87 control word.
  
  PROLOGUE(x86_fstcw)
-        xorl    %eax, %eax
-        pushl   %eax
-        fstcw   (%esp)
-        popl    %eax
-        ret
+       xorl    %eax, %eax
+       pushl   %eax
+       fstcw   (%esp)
+       popl    %eax
+       ret
  EPILOGUE()
  
  
-dnl  Instrumented profiling doesn't come out quite right below, since we
-dnl  don't do an actual "ret".  There's only a few instructions here, so
-dnl  there's no great need to get them separately accounted, just let them
-dnl  get attributed to the caller.
+dnl  Instrumented profiling doesn't come out quite right below, since we don't
+dnl  do an actual "ret".  There's only a few instructions here, so there's no
+dnl  great need to get them separately accounted, just let them get attributed
+dnl  to the caller.  FIXME this comment might no longer be true.
  
  ifelse(WANT_PROFILING,instrument,
  `define(`WANT_PROFILING',no)')
@@ -61,55 +65,89 @@ C
  C Perhaps the finit should be done only if the tags word isn't clear, but
  C nothing uses the rounding mode or anything at the moment.
  
-define(G,
-m4_assert_numargs(1)
-`GSYM_PREFIX`'$1')
+define(`WANT_EBX', eval(4*0)($1))
+define(`WANT_EBP', eval(4*1)($1))
+define(`WANT_ESI', eval(4*2)($1))
+define(`WANT_EDI', eval(4*3)($1))
  
-       .text
-       ALIGN(8)
-PROLOGUE(calling_conventions)
-       movl    (%esp), %eax
-       movl    %eax, G(calling_conventions_retaddr)
+define(`JUNK_EAX', eval(4*4)($1))
+define(`JUNK_ECX', eval(4*5)($1))
+define(`JUNK_EDX', eval(4*6)($1))
  
-       movl    $L(return), (%esp)
+define(`SAVE_EBX', eval(4*7)($1))
+define(`SAVE_EBP', eval(4*8)($1))
+define(`SAVE_ESI', eval(4*9)($1))
+define(`SAVE_EDI', eval(4*10)($1))
  
-       movl    %ebx, G(calling_conventions_save_ebx)
-       movl    %esi, G(calling_conventions_save_esi)
-       movl    %edi, G(calling_conventions_save_edi)
-       movl    %ebp, G(calling_conventions_save_ebp)
+define(`RETADDR',  eval(4*11)($1))
  
-       movl    $0x01234567, %ebx
-       movl    $0x89ABCDEF, %esi
-       movl    $0xFEDCBA98, %edi
-       movl    $0x76543210, %ebp
+define(`EBX',     eval(4*12)($1))
+define(`EBP',     eval(4*13)($1))
+define(`ESI',     eval(4*14)($1))
+define(`EDI',     eval(4*15)($1))
+define(`EFLAGS',   eval(4*16)($1))
  
-       C try to provoke a problem by starting with junk in the registers,
-       C especially in %eax and %edx which will be return values
-       movl    $0x70246135, %eax
-       movl    $0x8ACE9BDF, %ecx
-       movl    $0xFDB97531, %edx
  
-       jmp     *G(calling_conventions_function)
+define(G,
+m4_assert_numargs(1)
+`GSYM_PREFIX`'$1')
  
-L(return):
-       movl    %ebx, G(calling_conventions_ebx)
-       movl    %esi, G(calling_conventions_esi)
-       movl    %edi, G(calling_conventions_edi)
-       movl    %ebp, G(calling_conventions_ebp)
+       TEXT
+       ALIGN(8)
+PROLOGUE(calling_conventions)
+       LEA(    G(calling_conventions_values), %ecx)
+       popl    RETADDR(%ecx)
+
+       movl    %ebx, SAVE_EBX(%ecx)
+       movl    %ebp, SAVE_EBP(%ecx)
+       movl    %esi, SAVE_ESI(%ecx)
+       movl    %edi, SAVE_EDI(%ecx)
+
+       C Values we expect to see unchanged, as per amd64check.c
+       movl    WANT_EBX(%ecx), %ebx
+       movl    WANT_EBP(%ecx), %ebp
+       movl    WANT_ESI(%ecx), %esi
+       movl    WANT_EDI(%ecx), %edi
+
+       C Try to provoke a problem by starting with junk in the caller-saves
+       C registers, especially in %eax and %edx which will be return values
+       movl    JUNK_EAX(%ecx), %eax
+       movl    JUNK_EDX(%ecx), %edx
+C      movl    JUNK_ECX(%ecx), %ecx
+
+ifdef(`PIC',`
+       LEA(    G(calling_conventions_function), %ecx)
+       call    *(%ecx)
+',`
+       call    *G(calling_conventions_function)
+')
+
+       LEA(    G(calling_conventions_values), %ecx)
+
+       movl    %ebx, EBX(%ecx)
+       movl    %ebp, EBP(%ecx)
+       movl    %esi, ESI(%ecx)
+       movl    %edi, EDI(%ecx)
  
         pushf
         popl    %ebx
-       movl    %ebx, G(calling_conventions_eflags)
+       movl    %ebx, EFLAGS(%ecx)
  
+       movl    SAVE_EBX(%ecx), %ebx
+       movl    SAVE_ESI(%ecx), %esi
+       movl    SAVE_EDI(%ecx), %edi
+       movl    SAVE_EBP(%ecx), %ebp
+
+       pushl   RETADDR(%ecx)
+
+ifdef(`PIC',`
+       LEA(    G(calling_conventions_fenv), %ecx)
+       fstenv  (%ecx)
+',`
         fstenv  G(calling_conventions_fenv)
+')
         finit
  
-       movl    G(calling_conventions_save_ebx), %ebx
-       movl    G(calling_conventions_save_esi), %esi
-       movl    G(calling_conventions_save_edi), %edi
-       movl    G(calling_conventions_save_ebp), %ebp
-
-       jmp     *G(calling_conventions_retaddr)
+       ret
  
  EPILOGUE()
-
diff --git a/tests/x86check.c b/tests/x86check.c

index dfebd7a98ce8779331f48c73c6590eaa273e6108..8fa0f06ca723dfc45db3b55a9ea2fc0b54dd937b 100644 (file)
--- a/tests/x86check.c
+++ b/tests/x86check.c
@@ -1,22 +1,22 @@
  /* x86 calling conventions checking. */
  
  /*
-Copyright 2000, 2001 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2010 Free Software Foundation, Inc.
  
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
  
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
  
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
  
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see http://www.gnu.org/licenses/.  */
  
  #include <stdio.h>
  #include "gmp.h"
@@ -24,13 +24,30 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  #include "tests.h"
  
  
-/* temporaries */
-int  calling_conventions_save_ebx;
-int  calling_conventions_save_esi;
-int  calling_conventions_save_edi;
-int  calling_conventions_save_ebp;
-int  calling_conventions_retaddr;
-int  calling_conventions_retval;
+/* Vector if constants and register values.  We use one vector to allow access
+   via a base pointer, very beneficial for the PIC-enabled amd64call.asm.  */
+mp_limb_t calling_conventions_values[17] =
+{
+  CNST_LIMB(0x12345678),       /* want_ebx */
+  CNST_LIMB(0x89ABCDEF),       /* want_ebp */
+  CNST_LIMB(0xDEADBEEF),       /* want_esi */
+  CNST_LIMB(0xFFEEDDCC),       /* want_edi */
+
+  CNST_LIMB(0xFEEDABBA),       /* JUNK_EAX */
+  CNST_LIMB(0xAB78DE89),       /* JUNK_ECX */
+  CNST_LIMB(0x12389018)                /* JUNK_EDX */
+
+  /* rest of array used for dynamic values.  */
+};
+
+/* Index starts for various regions in above vector.  */
+#define WANT   0
+#define JUNK   4
+#define SAVE   7
+#define RETADDR        11
+#define VAL    12
+#define EFLAGS 16
+
  
  /* values to check */
  struct {
@@ -39,11 +56,6 @@ struct {
    unsigned  tag;
    unsigned  other[4];
  } calling_conventions_fenv;
-int  calling_conventions_ebx;
-int  calling_conventions_esi;
-int  calling_conventions_edi;
-int  calling_conventions_ebp;
-int  calling_conventions_eflags;
  
  /* expected values, as per x86call.asm */
  #define VALUE_EBX   0x01234567
@@ -51,6 +63,9 @@ int  calling_conventions_eflags;
  #define VALUE_EDI   0xFEDCBA98
  #define VALUE_EBP   0x76543210
  
+
+const char *regname[] = {"ebx", "ebp", "esi", "edi"};
+
  #define DIR_BIT(eflags)   (((eflags) & (1<<10)) != 0)
  
  
@@ -61,6 +76,7 @@ calling_conventions_check (void)
  {
    const char  *header = "Violated calling conventions:\n";
    int  ret = 1;
+  int i;
  
  #define CHECK(callreg, regstr, value)                   \
    if (callreg != value)                                 \
@@ -71,15 +87,15 @@ calling_conventions_check (void)
        ret = 0;                                          \
      }
  
-  CHECK (calling_conventions_ebx, "ebx", VALUE_EBX);
-  CHECK (calling_conventions_esi, "esi", VALUE_ESI);
-  CHECK (calling_conventions_edi, "edi", VALUE_EDI);
-  CHECK (calling_conventions_ebp, "ebp", VALUE_EBP);
+  for (i = 0; i < 4; i++)
+    {
+      CHECK (calling_conventions_values[VAL+i], regname[i], calling_conventions_values[WANT+i]);
+    }
  
-  if (DIR_BIT (calling_conventions_eflags) != 0)
+  if (DIR_BIT (calling_conventions_values[EFLAGS]) != 0)
      {
        printf ("%s   eflags dir bit  got %d want 0\n",
-              header, DIR_BIT (calling_conventions_eflags));
+              header, DIR_BIT (calling_conventions_values[EFLAGS]));
        header = "";
        ret = 0;
      }
diff --git a/tune/Makefile.am b/tune/Makefile.am

index 022aa7c17df584d8e080eb3c8134a6aa77aeead6..07d2808fd51932f9567c62d10d3e935a14b94f5a 100644 (file)
--- a/tune/Makefile.am
+++ b/tune/Makefile.am
@@ -1,6 +1,7 @@
  ## Process this file with automake to generate Makefile.in
  
-# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009,
+# 2010, 2011 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -43,8 +44,9 @@ libspeed_la_SOURCES =                                                 \
    common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c         \
    freq.c                                                               \
    gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c                        \
-  jacbase1.c jacbase2.c jacbase3.c                                     \
-  mod_1_div.c mod_1_inv.c modlinv.c                                    \
+  hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c     \
+  jacbase1.c jacbase2.c jacbase3.c jacbase4.c                          \
+  mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c            \
    noop.c powm_mod.c powm_redc.c pre_divrem_1.c                         \
    set_strb.c set_strs.c set_strp.c time.c
  
@@ -68,10 +70,10 @@ $(top_builddir)/tests/libtests.la:
  # program.  This can always be forced with "make speed_LDFLAGS=-all-static
  # ..." if desired, see tune/README.
  
-EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup
+EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup tune-gcd-p
  
  DEPENDENCIES = libspeed.la
-LDADD = $(DEPENDENCIES)
+LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
  
  speed_SOURCES = speed.c
  speed_LDFLAGS = $(STATIC)
@@ -82,11 +84,15 @@ speed_ext_SOURCES = speed-ext.c
  speed_ext_LDFLAGS = $(STATIC)
  
  tuneup_SOURCES = tuneup.c
-nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
+nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
  tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
-tuneup_LDADD = $(tuneup_DEPENDENCIES)
+tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
  tuneup_LDFLAGS = $(STATIC)
  
+tune_gcd_p_SOURCES = tune-gcd-p.c
+tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
+tune_gcd_p_LDFLAGS = $(STATIC)
+
  
  tune:
         $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
@@ -96,7 +102,7 @@ allprogs: $(EXTRA_PROGRAMS)
  
  # $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
  CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
-       $(TUNE_MPN_SRCS) sqr_asm.asm \
+       $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
         stg.gnuplot stg.data \
         mtg.gnuplot mtg.data \
         fibg.gnuplot fibg.data \
@@ -122,11 +128,14 @@ DISTCLEANFILES = sqr_basecase.c  $(MANY_DISTCLEAN)
  # recompiled object will be rebuilt if that file changes.
  
  TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = bdiv_q.c bdiv_qr.c                               \
+TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c                    \
    dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c      \
    invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c            \
-  get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c              \
+  get_str.c set_str.c matrix22_mul.c                                   \
+  hgcd.c hgcd_appr.c hgcd_reduce.c                                     \
+  mul_n.c sqr.c powm_sec.c                                             \
    mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c      \
+  mulmid.c mulmid_n.c toom42_mulmid.c                                  \
    nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c   \
    toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
  
@@ -150,5 +159,12 @@ sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
         echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
         echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
  
+# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
+fac_ui.c: $(top_builddir)/mpz/fac_ui.c
+       echo "#define TUNE_PROGRAM_BUILD 1"          >fac_ui.c
+       echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
+       echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
+       echo "#include \"mpz/oddfac_1.c\""           >>fac_ui.c
+       echo "#include \"mpz/fac_ui.c\""             >>fac_ui.c
  
  include ../mpn/Makeasm.am
diff --git a/tune/Makefile.in b/tune/Makefile.in

index 64f177eab5b3529f7f2eba47dba1f872693f926a..6c339b31a292ac06251270a5f817393d46a1c239 100644 (file)
--- a/tune/Makefile.in
+++ b/tune/Makefile.in
@@ -1,9 +1,9 @@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
  # @configure_input@
  
  # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009  Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
  # This Makefile.in is free software; the Free Software Foundation
  # gives unlimited permission to copy and/or distribute it,
  # with or without modifications, as long as this notice is preserved.
@@ -15,7 +15,8 @@
  
  @SET_MAKE@
  
-# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009,
+# 2010, 2011 Free Software Foundation, Inc.
  #
  # This file is part of the GNU MP Library.
  #
@@ -51,6 +52,23 @@
  # along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
  
  VPATH = @srcdir@
+am__make_dryrun = \
+  { \
+    am__dry=no; \
+    case $$MAKEFLAGS in \
+      *\\[\ \  ]*) \
+        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
+          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+      *) \
+        for am__flg in $$MAKEFLAGS; do \
+          case $$am__flg in \
+            *=*|--*) ;; \
+            *n*) am__dry=yes; break;; \
+          esac; \
+        done;; \
+    esac; \
+    test $$am__dry = yes; \
+  }
  pkgdatadir = $(datadir)/@PACKAGE@
  pkgincludedir = $(includedir)/@PACKAGE@
  pkglibdir = $(libdir)/@PACKAGE@
@@ -69,15 +87,14 @@ PRE_UNINSTALL = :
  POST_UNINSTALL = :
  build_triplet = @build@
  host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
  EXTRA_PROGRAMS = speed$(EXEEXT) speed-dynamic$(EXEEXT) \
-       speed-ext$(EXEEXT) tuneup$(EXEEXT)
+       speed-ext$(EXEEXT) tuneup$(EXEEXT) tune-gcd-p$(EXEEXT)
  DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/../mpn/Makeasm.am \
         $(srcdir)/Makefile.am $(srcdir)/Makefile.in
  subdir = tune
  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
  am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-       $(top_srcdir)/configure.in
+       $(top_srcdir)/configure.ac
  am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
         $(ACLOCAL_M4)
  mkinstalldirs = $(install_sh) -d
@@ -87,54 +104,61 @@ CONFIG_CLEAN_VPATH_FILES =
  am__DEPENDENCIES_1 =
  am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) \
         $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
-am_libspeed_la_OBJECTS = common$U.lo divrem1div$U.lo divrem1inv$U.lo \
-       divrem2div$U.lo divrem2inv$U.lo freq$U.lo gcdext_single$U.lo \
-       gcdext_double$U.lo gcdextod$U.lo gcdextos$U.lo jacbase1$U.lo \
-       jacbase2$U.lo jacbase3$U.lo mod_1_div$U.lo mod_1_inv$U.lo \
-       modlinv$U.lo noop$U.lo powm_mod$U.lo powm_redc$U.lo \
-       pre_divrem_1$U.lo set_strb$U.lo set_strs$U.lo set_strp$U.lo \
-       time$U.lo
+am_libspeed_la_OBJECTS = common.lo divrem1div.lo divrem1inv.lo \
+       divrem2div.lo divrem2inv.lo freq.lo gcdext_single.lo \
+       gcdext_double.lo gcdextod.lo gcdextos.lo hgcd_lehmer.lo \
+       hgcd_appr_lehmer.lo hgcd_reduce_1.lo hgcd_reduce_2.lo \
+       jacbase1.lo jacbase2.lo jacbase3.lo jacbase4.lo mod_1_div.lo \
+       mod_1_inv.lo mod_1_1-1.lo mod_1_1-2.lo modlinv.lo noop.lo \
+       powm_mod.lo powm_redc.lo pre_divrem_1.lo set_strb.lo \
+       set_strs.lo set_strp.lo time.lo
  libspeed_la_OBJECTS = $(am_libspeed_la_OBJECTS)
  libspeed_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
         $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
         $(libspeed_la_LDFLAGS) $(LDFLAGS) -o $@
-am_speed_OBJECTS = speed$U.$(OBJEXT)
+am_speed_OBJECTS = speed.$(OBJEXT)
  speed_OBJECTS = $(am_speed_OBJECTS)
  speed_LDADD = $(LDADD)
-speed_DEPENDENCIES = $(DEPENDENCIES)
+speed_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
  speed_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(speed_LDFLAGS) \
         $(LDFLAGS) -o $@
-am_speed_dynamic_OBJECTS = speed$U.$(OBJEXT)
+am_speed_dynamic_OBJECTS = speed.$(OBJEXT)
  speed_dynamic_OBJECTS = $(am_speed_dynamic_OBJECTS)
  speed_dynamic_LDADD = $(LDADD)
-speed_dynamic_DEPENDENCIES = $(DEPENDENCIES)
-am_speed_ext_OBJECTS = speed-ext$U.$(OBJEXT)
+speed_dynamic_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
+am_speed_ext_OBJECTS = speed-ext.$(OBJEXT)
  speed_ext_OBJECTS = $(am_speed_ext_OBJECTS)
  speed_ext_LDADD = $(LDADD)
-speed_ext_DEPENDENCIES = $(DEPENDENCIES)
+speed_ext_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
  speed_ext_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
         $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
         $(speed_ext_LDFLAGS) $(LDFLAGS) -o $@
-am_tuneup_OBJECTS = tuneup$U.$(OBJEXT)
-am__objects_1 = bdiv_q$U.$(OBJEXT) bdiv_qr$U.$(OBJEXT) \
-       dcpi1_div_qr$U.$(OBJEXT) dcpi1_divappr_q$U.$(OBJEXT) \
-       dcpi1_bdiv_qr$U.$(OBJEXT) dcpi1_bdiv_q$U.$(OBJEXT) \
-       invertappr$U.$(OBJEXT) invert$U.$(OBJEXT) binvert$U.$(OBJEXT) \
-       divrem_2$U.$(OBJEXT) gcd$U.$(OBJEXT) gcdext$U.$(OBJEXT) \
-       get_str$U.$(OBJEXT) set_str$U.$(OBJEXT) \
-       matrix22_mul$U.$(OBJEXT) hgcd$U.$(OBJEXT) mul_n$U.$(OBJEXT) \
-       sqr$U.$(OBJEXT) mullo_n$U.$(OBJEXT) mul_fft$U.$(OBJEXT) \
-       mul$U.$(OBJEXT) tdiv_qr$U.$(OBJEXT) mulmod_bnm1$U.$(OBJEXT) \
-       sqrmod_bnm1$U.$(OBJEXT) nussbaumer_mul$U.$(OBJEXT) \
-       toom6h_mul$U.$(OBJEXT) toom8h_mul$U.$(OBJEXT) \
-       toom6_sqr$U.$(OBJEXT) toom8_sqr$U.$(OBJEXT) \
-       toom22_mul$U.$(OBJEXT) toom2_sqr$U.$(OBJEXT) \
-       toom33_mul$U.$(OBJEXT) toom3_sqr$U.$(OBJEXT) \
-       toom44_mul$U.$(OBJEXT) toom4_sqr$U.$(OBJEXT)
-am__objects_2 = $(am__objects_1) divrem_1$U.$(OBJEXT) \
-       mod_1$U.$(OBJEXT)
-nodist_tuneup_OBJECTS = sqr_basecase$U.$(OBJEXT) $(am__objects_2)
+am_tune_gcd_p_OBJECTS = tune-gcd-p.$(OBJEXT)
+tune_gcd_p_OBJECTS = $(am_tune_gcd_p_OBJECTS)
+tune_gcd_p_LDADD = $(LDADD)
+tune_gcd_p_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+       $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+       $(tune_gcd_p_LDFLAGS) $(LDFLAGS) -o $@
+am_tuneup_OBJECTS = tuneup.$(OBJEXT)
+am__objects_1 = div_qr_2.$(OBJEXT) bdiv_q.$(OBJEXT) bdiv_qr.$(OBJEXT) \
+       dcpi1_div_qr.$(OBJEXT) dcpi1_divappr_q.$(OBJEXT) \
+       dcpi1_bdiv_qr.$(OBJEXT) dcpi1_bdiv_q.$(OBJEXT) \
+       invertappr.$(OBJEXT) invert.$(OBJEXT) binvert.$(OBJEXT) \
+       divrem_2.$(OBJEXT) gcd.$(OBJEXT) gcdext.$(OBJEXT) \
+       get_str.$(OBJEXT) set_str.$(OBJEXT) matrix22_mul.$(OBJEXT) \
+       hgcd.$(OBJEXT) hgcd_appr.$(OBJEXT) hgcd_reduce.$(OBJEXT) \
+       mul_n.$(OBJEXT) sqr.$(OBJEXT) powm_sec.$(OBJEXT) \
+       mullo_n.$(OBJEXT) mul_fft.$(OBJEXT) mul.$(OBJEXT) \
+       tdiv_qr.$(OBJEXT) mulmod_bnm1.$(OBJEXT) sqrmod_bnm1.$(OBJEXT) \
+       mulmid.$(OBJEXT) mulmid_n.$(OBJEXT) toom42_mulmid.$(OBJEXT) \
+       nussbaumer_mul.$(OBJEXT) toom6h_mul.$(OBJEXT) \
+       toom8h_mul.$(OBJEXT) toom6_sqr.$(OBJEXT) toom8_sqr.$(OBJEXT) \
+       toom22_mul.$(OBJEXT) toom2_sqr.$(OBJEXT) toom33_mul.$(OBJEXT) \
+       toom3_sqr.$(OBJEXT) toom44_mul.$(OBJEXT) toom4_sqr.$(OBJEXT)
+am__objects_2 = $(am__objects_1) divrem_1.$(OBJEXT) mod_1.$(OBJEXT)
+nodist_tuneup_OBJECTS = sqr_basecase.$(OBJEXT) fac_ui.$(OBJEXT) \
+       $(am__objects_2)
  tuneup_OBJECTS = $(am_tuneup_OBJECTS) $(nodist_tuneup_OBJECTS)
  am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) libspeed.la
  tuneup_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
@@ -154,10 +178,16 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
         $(LDFLAGS) -o $@
  SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
         $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
-       $(tuneup_SOURCES) $(nodist_tuneup_SOURCES)
+       $(tune_gcd_p_SOURCES) $(tuneup_SOURCES) \
+       $(nodist_tuneup_SOURCES)
  DIST_SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
         $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
-       $(tuneup_SOURCES)
+       $(tune_gcd_p_SOURCES) $(tuneup_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
  HEADERS = $(noinst_HEADERS)
  ETAGS = etags
  CTAGS = ctags
@@ -260,8 +290,8 @@ SHELL = @SHELL@
  SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
  STRIP = @STRIP@
  TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
  TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
  U_FOR_BUILD = @U_FOR_BUILD@
  VERSION = @VERSION@
  WITH_READLINE_01 = @WITH_READLINE_01@
@@ -308,7 +338,6 @@ mandir = @mandir@
  mkdir_p = @mkdir_p@
  mpn_objects = @mpn_objects@
  mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
  oldincludedir = @oldincludedir@
  pdfdir = @pdfdir@
  prefix = @prefix@
@@ -341,8 +370,9 @@ libspeed_la_SOURCES = \
    common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c         \
    freq.c                                                               \
    gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c                        \
-  jacbase1.c jacbase2.c jacbase3.c                                     \
-  mod_1_div.c mod_1_inv.c modlinv.c                                    \
+  hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c     \
+  jacbase1.c jacbase2.c jacbase3.c jacbase4.c                          \
+  mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c            \
    noop.c powm_mod.c powm_redc.c pre_divrem_1.c                         \
    set_strb.c set_strs.c set_strp.c time.c
  
@@ -352,21 +382,24 @@ libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
  libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)
  libspeed_la_LDFLAGS = $(STATIC)
  DEPENDENCIES = libspeed.la
-LDADD = $(DEPENDENCIES)
+LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
  speed_SOURCES = speed.c
  speed_LDFLAGS = $(STATIC)
  speed_dynamic_SOURCES = speed.c
  speed_ext_SOURCES = speed-ext.c
  speed_ext_LDFLAGS = $(STATIC)
  tuneup_SOURCES = tuneup.c
-nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
+nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
  tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
-tuneup_LDADD = $(tuneup_DEPENDENCIES)
+tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
  tuneup_LDFLAGS = $(STATIC)
+tune_gcd_p_SOURCES = tune-gcd-p.c
+tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
+tune_gcd_p_LDFLAGS = $(STATIC)
  
  # $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
  CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
-       $(TUNE_MPN_SRCS) sqr_asm.asm \
+       $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
         stg.gnuplot stg.data \
         mtg.gnuplot mtg.data \
         fibg.gnuplot fibg.data \
@@ -391,11 +424,14 @@ DISTCLEANFILES = sqr_basecase.c  $(MANY_DISTCLEAN)
  # FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the
  # recompiled object will be rebuilt if that file changes.
  TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = bdiv_q.c bdiv_qr.c                               \
+TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c                    \
    dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c      \
    invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c            \
-  get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c              \
+  get_str.c set_str.c matrix22_mul.c                                   \
+  hgcd.c hgcd_appr.c hgcd_reduce.c                                     \
+  mul_n.c sqr.c powm_sec.c                                             \
    mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c      \
+  mulmid.c mulmid_n.c toom42_mulmid.c                                  \
    nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c   \
    toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
  
@@ -453,6 +489,7 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
             echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
             cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
         esac;
+$(srcdir)/../mpn/Makeasm.am:
  
  $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
         cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -462,18 +499,21 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
  $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
         cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
  $(am__aclocal_m4_deps):
-libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) 
+libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) $(EXTRA_libspeed_la_DEPENDENCIES) 
         $(libspeed_la_LINK)  $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS)
-speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) 
+speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) $(EXTRA_speed_DEPENDENCIES) 
         @rm -f speed$(EXEEXT)
         $(speed_LINK) $(speed_OBJECTS) $(speed_LDADD) $(LIBS)
-speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) 
+speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) $(EXTRA_speed_dynamic_DEPENDENCIES) 
         @rm -f speed-dynamic$(EXEEXT)
         $(LINK) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS)
-speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) 
+speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) $(EXTRA_speed_ext_DEPENDENCIES) 
         @rm -f speed-ext$(EXEEXT)
         $(speed_ext_LINK) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS)
-tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) 
+tune-gcd-p$(EXEEXT): $(tune_gcd_p_OBJECTS) $(tune_gcd_p_DEPENDENCIES) $(EXTRA_tune_gcd_p_DEPENDENCIES) 
+       @rm -f tune-gcd-p$(EXEEXT)
+       $(tune_gcd_p_LINK) $(tune_gcd_p_OBJECTS) $(tune_gcd_p_LDADD) $(LIBS)
+tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) $(EXTRA_tuneup_DEPENDENCIES) 
         @rm -f tuneup$(EXEEXT)
         $(tuneup_LINK) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS)
  
@@ -482,11 +522,6 @@ mostlyclean-compile:
  
  distclean-compile:
         -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
-       $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
-       -test "$U" = "" || rm -f *_.c
  
  .c.o:
         $(COMPILE) -c $<
@@ -496,172 +531,6 @@ mostlyclean-kr:
  
  .c.lo:
         $(LTCOMPILE) -c -o $@ $<
-bdiv_q_.c: bdiv_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bdiv_q.c; then echo $(srcdir)/bdiv_q.c; else echo bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bdiv_qr_.c: bdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bdiv_qr.c; then echo $(srcdir)/bdiv_qr.c; else echo bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-binvert_.c: binvert.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/binvert.c; then echo $(srcdir)/binvert.c; else echo binvert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-common_.c: common.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/common.c; then echo $(srcdir)/common.c; else echo common.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_bdiv_q_.c: dcpi1_bdiv_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_q.c; then echo $(srcdir)/dcpi1_bdiv_q.c; else echo dcpi1_bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_bdiv_qr_.c: dcpi1_bdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_qr.c; then echo $(srcdir)/dcpi1_bdiv_qr.c; else echo dcpi1_bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_div_qr_.c: dcpi1_div_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_div_qr.c; then echo $(srcdir)/dcpi1_div_qr.c; else echo dcpi1_div_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_divappr_q_.c: dcpi1_divappr_q.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_divappr_q.c; then echo $(srcdir)/dcpi1_divappr_q.c; else echo dcpi1_divappr_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem1div_.c: divrem1div.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1div.c; then echo $(srcdir)/divrem1div.c; else echo divrem1div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem1inv_.c: divrem1inv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1inv.c; then echo $(srcdir)/divrem1inv.c; else echo divrem1inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem2div_.c: divrem2div.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2div.c; then echo $(srcdir)/divrem2div.c; else echo divrem2div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem2inv_.c: divrem2inv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2inv.c; then echo $(srcdir)/divrem2inv.c; else echo divrem2inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_1_.c: divrem_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_2_.c: divrem_2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-freq_.c: freq.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/freq.c; then echo $(srcdir)/freq.c; else echo freq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_.c: gcd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_.c: gcdext.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_double_.c: gcdext_double.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_double.c; then echo $(srcdir)/gcdext_double.c; else echo gcdext_double.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_single_.c: gcdext_single.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_single.c; then echo $(srcdir)/gcdext_single.c; else echo gcdext_single.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdextod_.c: gcdextod.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextod.c; then echo $(srcdir)/gcdextod.c; else echo gcdextod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdextos_.c: gcdextos.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextos.c; then echo $(srcdir)/gcdextos.c; else echo gcdextos.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hgcd_.c: hgcd.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invert_.c: invert.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert.c; then echo $(srcdir)/invert.c; else echo invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invertappr_.c: invertappr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invertappr.c; then echo $(srcdir)/invertappr.c; else echo invertappr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacbase1_.c: jacbase1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase1.c; then echo $(srcdir)/jacbase1.c; else echo jacbase1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacbase2_.c: jacbase2.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase2.c; then echo $(srcdir)/jacbase2.c; else echo jacbase2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacbase3_.c: jacbase3.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase3.c; then echo $(srcdir)/jacbase3.c; else echo jacbase3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_.c: mod_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_div_.c: mod_1_div.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_div.c; then echo $(srcdir)/mod_1_div.c; else echo mod_1_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_inv_.c: mod_1_inv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_inv.c; then echo $(srcdir)/mod_1_inv.c; else echo mod_1_inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-modlinv_.c: modlinv.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/modlinv.c; then echo $(srcdir)/modlinv.c; else echo modlinv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_fft_.c: mul_fft.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_n_.c: mul_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mullo_n_.c: mullo_n.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullo_n.c; then echo $(srcdir)/mullo_n.c; else echo mullo_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mulmod_bnm1_.c: mulmod_bnm1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mulmod_bnm1.c; then echo $(srcdir)/mulmod_bnm1.c; else echo mulmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-noop_.c: noop.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/noop.c; then echo $(srcdir)/noop.c; else echo noop.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nussbaumer_mul_.c: nussbaumer_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nussbaumer_mul.c; then echo $(srcdir)/nussbaumer_mul.c; else echo nussbaumer_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_mod_.c: powm_mod.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_mod.c; then echo $(srcdir)/powm_mod.c; else echo powm_mod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_redc_.c: powm_redc.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_redc.c; then echo $(srcdir)/powm_redc.c; else echo powm_redc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_strb_.c: set_strb.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strb.c; then echo $(srcdir)/set_strb.c; else echo set_strb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_strp_.c: set_strp.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strp.c; then echo $(srcdir)/set_strp.c; else echo set_strp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_strs_.c: set_strs.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strs.c; then echo $(srcdir)/set_strs.c; else echo set_strs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-speed_.c: speed.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed.c; then echo $(srcdir)/speed.c; else echo speed.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-speed-ext_.c: speed-ext.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed-ext.c; then echo $(srcdir)/speed-ext.c; else echo speed-ext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqr_.c: sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr.c; then echo $(srcdir)/sqr.c; else echo sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrmod_bnm1_.c: sqrmod_bnm1.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrmod_bnm1.c; then echo $(srcdir)/sqrmod_bnm1.c; else echo sqrmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-time_.c: time.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/time.c; then echo $(srcdir)/time.c; else echo time.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom22_mul_.c: toom22_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom22_mul.c; then echo $(srcdir)/toom22_mul.c; else echo toom22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom2_sqr_.c: toom2_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom2_sqr.c; then echo $(srcdir)/toom2_sqr.c; else echo toom2_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom33_mul_.c: toom33_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom33_mul.c; then echo $(srcdir)/toom33_mul.c; else echo toom33_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom3_sqr_.c: toom3_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom3_sqr.c; then echo $(srcdir)/toom3_sqr.c; else echo toom3_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom44_mul_.c: toom44_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom44_mul.c; then echo $(srcdir)/toom44_mul.c; else echo toom44_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom4_sqr_.c: toom4_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom4_sqr.c; then echo $(srcdir)/toom4_sqr.c; else echo toom4_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom6_sqr_.c: toom6_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6_sqr.c; then echo $(srcdir)/toom6_sqr.c; else echo toom6_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom6h_mul_.c: toom6h_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6h_mul.c; then echo $(srcdir)/toom6h_mul.c; else echo toom6h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom8_sqr_.c: toom8_sqr.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8_sqr.c; then echo $(srcdir)/toom8_sqr.c; else echo toom8_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom8h_mul_.c: toom8h_mul.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8h_mul.c; then echo $(srcdir)/toom8h_mul.c; else echo toom8h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tuneup_.c: tuneup.c $(ANSI2KNR)
-       $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tuneup.c; then echo $(srcdir)/tuneup.c; else echo tuneup.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bdiv_q_.$(OBJEXT) bdiv_q_.lo bdiv_qr_.$(OBJEXT) bdiv_qr_.lo \
-binvert_.$(OBJEXT) binvert_.lo common_.$(OBJEXT) common_.lo \
-dcpi1_bdiv_q_.$(OBJEXT) dcpi1_bdiv_q_.lo dcpi1_bdiv_qr_.$(OBJEXT) \
-dcpi1_bdiv_qr_.lo dcpi1_div_qr_.$(OBJEXT) dcpi1_div_qr_.lo \
-dcpi1_divappr_q_.$(OBJEXT) dcpi1_divappr_q_.lo divrem1div_.$(OBJEXT) \
-divrem1div_.lo divrem1inv_.$(OBJEXT) divrem1inv_.lo \
-divrem2div_.$(OBJEXT) divrem2div_.lo divrem2inv_.$(OBJEXT) \
-divrem2inv_.lo divrem_1_.$(OBJEXT) divrem_1_.lo divrem_2_.$(OBJEXT) \
-divrem_2_.lo freq_.$(OBJEXT) freq_.lo gcd_.$(OBJEXT) gcd_.lo \
-gcdext_.$(OBJEXT) gcdext_.lo gcdext_double_.$(OBJEXT) \
-gcdext_double_.lo gcdext_single_.$(OBJEXT) gcdext_single_.lo \
-gcdextod_.$(OBJEXT) gcdextod_.lo gcdextos_.$(OBJEXT) gcdextos_.lo \
-get_str_.$(OBJEXT) get_str_.lo hgcd_.$(OBJEXT) hgcd_.lo \
-invert_.$(OBJEXT) invert_.lo invertappr_.$(OBJEXT) invertappr_.lo \
-jacbase1_.$(OBJEXT) jacbase1_.lo jacbase2_.$(OBJEXT) jacbase2_.lo \
-jacbase3_.$(OBJEXT) jacbase3_.lo matrix22_mul_.$(OBJEXT) \
-matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo mod_1_div_.$(OBJEXT) \
-mod_1_div_.lo mod_1_inv_.$(OBJEXT) mod_1_inv_.lo modlinv_.$(OBJEXT) \
-modlinv_.lo mul_.$(OBJEXT) mul_.lo mul_fft_.$(OBJEXT) mul_fft_.lo \
-mul_n_.$(OBJEXT) mul_n_.lo mullo_n_.$(OBJEXT) mullo_n_.lo \
-mulmod_bnm1_.$(OBJEXT) mulmod_bnm1_.lo noop_.$(OBJEXT) noop_.lo \
-nussbaumer_mul_.$(OBJEXT) nussbaumer_mul_.lo powm_mod_.$(OBJEXT) \
-powm_mod_.lo powm_redc_.$(OBJEXT) powm_redc_.lo \
-pre_divrem_1_.$(OBJEXT) pre_divrem_1_.lo set_str_.$(OBJEXT) \
-set_str_.lo set_strb_.$(OBJEXT) set_strb_.lo set_strp_.$(OBJEXT) \
-set_strp_.lo set_strs_.$(OBJEXT) set_strs_.lo speed_.$(OBJEXT) \
-speed_.lo speed-ext_.$(OBJEXT) speed-ext_.lo sqr_.$(OBJEXT) sqr_.lo \
-sqr_basecase_.$(OBJEXT) sqr_basecase_.lo sqrmod_bnm1_.$(OBJEXT) \
-sqrmod_bnm1_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo time_.$(OBJEXT) \
-time_.lo toom22_mul_.$(OBJEXT) toom22_mul_.lo toom2_sqr_.$(OBJEXT) \
-toom2_sqr_.lo toom33_mul_.$(OBJEXT) toom33_mul_.lo \
-toom3_sqr_.$(OBJEXT) toom3_sqr_.lo toom44_mul_.$(OBJEXT) \
-toom44_mul_.lo toom4_sqr_.$(OBJEXT) toom4_sqr_.lo toom6_sqr_.$(OBJEXT) \
-toom6_sqr_.lo toom6h_mul_.$(OBJEXT) toom6h_mul_.lo \
-toom8_sqr_.$(OBJEXT) toom8_sqr_.lo toom8h_mul_.$(OBJEXT) \
-toom8h_mul_.lo tuneup_.$(OBJEXT) tuneup_.lo : $(ANSI2KNR)
  
  mostlyclean-libtool:
         -rm -f *.lo
@@ -765,10 +634,15 @@ install-am: all-am
  
  installcheck: installcheck-am
  install-strip:
-       $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-         install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-         `test -z '$(STRIP)' || \
-           echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+       if test -z '$(STRIP)'; then \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+             install; \
+       else \
+         $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+           install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+           "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+       fi
  mostlyclean-generic:
  
  clean-generic:
@@ -837,7 +711,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
  
  mostlyclean: mostlyclean-am
  
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
         mostlyclean-libtool
  
  pdf: pdf-am
@@ -850,7 +724,7 @@ ps-am:
  
  uninstall-am:
  
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
  
  .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
         clean-libtool ctags distclean distclean-compile \
@@ -862,8 +736,8 @@ uninstall-am:
         install-pdf-am install-ps install-ps-am install-strip \
         installcheck installcheck-am installdirs maintainer-clean \
         maintainer-clean-generic mostlyclean mostlyclean-compile \
-       mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
-       pdf-am ps ps-am tags uninstall uninstall-am
+       mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+       tags uninstall uninstall-am
  
  
  $(top_builddir)/tests/libtests.la:
@@ -895,6 +769,14 @@ sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
         echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
         echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
  
+# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
+fac_ui.c: $(top_builddir)/mpz/fac_ui.c
+       echo "#define TUNE_PROGRAM_BUILD 1"          >fac_ui.c
+       echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
+       echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
+       echo "#include \"mpz/oddfac_1.c\""           >>fac_ui.c
+       echo "#include \"mpz/fac_ui.c\""             >>fac_ui.c
+
  # .s assembler, no preprocessing.
  #
  .s.o:
diff --git a/tune/README b/tune/README

index b6e41eda0fe4af8a630109dd47beea01f499ba7e..80acd7b1e4fd836532d16e79c05c964030695f87 100644 (file)
--- a/tune/README
+++ b/tune/README
@@ -287,10 +287,16 @@ mpn_divrem_1, using division by 32 as an example.
  
  EXAMPLE COMPARISONS - MULTIPLICATION
  
-mul_basecase takes a ".<r>" parameter which is the first (larger) size
-parameter.  For example to show speeds for 20x1 up to 20x15 in cycles,
+mul_basecase takes a ".<r>" parameter. If positivie, it gives the second
+(smaller) operand size.  For example to show speeds for 3x3 up to 20x3 in
+cycles,
  
-        ./speed -s 1-15 -c mpn_mul_basecase.20
+        ./speed -s 3-20 -c mpn_mul_basecase.3
+
+A negative ".<-r>" parameter fixes the size of the product to the absolute
+value r.  For example to show speeds for 10x10 up to 19x1 in cycles,
+
+        ./speed -s 10-19 -c mpn_mul_basecase.-20
  
  mul_basecase with no parameter does an NxN multiply, so for example to show
  speeds in cycles for 1x1, 2x2, 3x3, etc, up to 20x20, in cycles,
diff --git a/tune/common.c b/tune/common.c

index 4c6629120c71156ce8d52297c0a82483e24d902a..daac5f10941d6bc5863f7899621612af84809960 100644 (file)
--- a/tune/common.c
+++ b/tune/common.c
@@ -1,7 +1,7 @@
  /* Shared speed subroutines.
  
-Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2010
-Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2010,
+2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -41,6 +41,7 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  int   speed_option_addrs = 0;
  int   speed_option_verbose = 0;
+int   speed_option_cycles_broken = 0;
  
  
  /* Provide __clz_tab even if it's not required, for the benefit of new code
@@ -121,10 +122,9 @@ double_cmp_ptr (const double *p, const double *q)
     s->r, -1.0 should be returned.  See the various base routines below.  */
  
  double
-speed_measure (double (*fun) __GMP_PROTO ((struct speed_params *s)),
-              struct speed_params *s)
+speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s)
  {
-#define TOLERANCE    1.005  /* 0.5% */
+#define TOLERANCE    1.01  /* 1% */
    const int max_zeros = 10;
  
    struct speed_params  s_dummy;
@@ -366,6 +366,10 @@ speed_option_set (const char *s)
      {
        speed_option_verbose = n;
      }
+  else if (strcmp (s, "cycles-broken") == 0)
+    {
+      speed_option_cycles_broken = 1;
+    }
    else
      {
        printf ("Unrecognised -o option: %s\n", s);
@@ -456,6 +460,11 @@ speed_mpn_com (struct speed_params *s)
  {
    SPEED_ROUTINE_MPN_COPY (mpn_com);
  }
+double
+speed_mpn_tabselect (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TABSELECT (mpn_tabselect);
+}
  
  
  double
@@ -551,6 +560,20 @@ speed_mpn_mul_4 (struct speed_params *s)
    SPEED_ROUTINE_MPN_UNARY_4 (mpn_mul_4);
  }
  #endif
+#if HAVE_NATIVE_mpn_mul_5
+double
+speed_mpn_mul_5 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_5 (mpn_mul_5);
+}
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+double
+speed_mpn_mul_6 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_6 (mpn_mul_6);
+}
+#endif
  
  
  double
@@ -662,6 +685,17 @@ speed_mpn_divrem_2_inv (struct speed_params *s)
    SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_inv);
  }
  
+double
+speed_mpn_div_qr_2n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 1);
+}
+double
+speed_mpn_div_qr_2u (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 0);
+}
+
  double
  speed_mpn_mod_1 (struct speed_params *s)
  {
@@ -685,6 +719,16 @@ speed_mpn_mod_1_1 (struct speed_params *s)
    SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps);
  }
  double
+speed_mpn_mod_1_1_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1);
+}
+double
+speed_mpn_mod_1_1_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2);
+}
+double
  speed_mpn_mod_1_2 (struct speed_params *s)
  {
    SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2);
@@ -822,6 +866,22 @@ speed_mpn_mu_bdiv_qr (struct speed_params *s)
    SPEED_ROUTINE_MPN_MU_BDIV_QR (mpn_mu_bdiv_qr, mpn_mu_bdiv_qr_itch);
  }
  
+double
+speed_mpn_broot (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BROOT (mpn_broot);
+}
+double
+speed_mpn_broot_invm1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1);
+}
+double
+speed_mpn_brootinv (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size);
+}
+
  double
  speed_mpn_binvert (struct speed_params *s)
  {
@@ -886,6 +946,38 @@ speed_mpn_sub_n (struct speed_params *s)
  SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);
  }
  
+double
+speed_mpn_add_err1_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_add_err1_n);
+}
+double
+speed_mpn_sub_err1_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_sub_err1_n);
+}
+double
+speed_mpn_add_err2_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_add_err2_n);
+}
+double
+speed_mpn_sub_err2_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_sub_err2_n);
+}
+double
+speed_mpn_add_err3_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_add_err3_n);
+}
+double
+speed_mpn_sub_err3_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_sub_err3_n);
+}
+
+
  #if HAVE_NATIVE_mpn_add_n_sub_n
  double
  speed_mpn_add_n_sub_n (struct speed_params *s)
@@ -908,6 +1000,27 @@ speed_mpn_sublsh1_n (struct speed_params *s)
    SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh1_n);
  }
  #endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+double
+speed_mpn_addlsh1_n_ip1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip1);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+double
+speed_mpn_addlsh1_n_ip2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip2);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+double
+speed_mpn_sublsh1_n_ip1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_sublsh1_n_ip1);
+}
+#endif
  #if HAVE_NATIVE_mpn_rsblsh1_n
  double
  speed_mpn_rsblsh1_n (struct speed_params *s)
@@ -929,6 +1042,27 @@ speed_mpn_sublsh2_n (struct speed_params *s)
    SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh2_n);
  }
  #endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+double
+speed_mpn_addlsh2_n_ip1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip1);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+double
+speed_mpn_addlsh2_n_ip2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip2);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+double
+speed_mpn_sublsh2_n_ip1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_COPY (mpn_sublsh2_n_ip1);
+}
+#endif
  #if HAVE_NATIVE_mpn_rsblsh2_n
  double
  speed_mpn_rsblsh2_n (struct speed_params *s)
@@ -936,6 +1070,48 @@ speed_mpn_rsblsh2_n (struct speed_params *s)
    SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh2_n);
  }
  #endif
+#if HAVE_NATIVE_mpn_addlsh_n
+double
+speed_mpn_addlsh_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh_n (wp, xp, yp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n
+double
+speed_mpn_sublsh_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh_n (wp, xp, yp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+double
+speed_mpn_addlsh_n_ip1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip1 (wp, s->xp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+double
+speed_mpn_addlsh_n_ip2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip2 (wp, s->xp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+double
+speed_mpn_sublsh_n_ip1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_sublsh_n_ip1 (wp, s->xp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_n
+double
+speed_mpn_rsblsh_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_rsblsh_n (wp, xp, yp, s->size, 7));
+}
+#endif
  #if HAVE_NATIVE_mpn_rsh1add_n
  double
  speed_mpn_rsh1add_n (struct speed_params *s)
@@ -951,47 +1127,58 @@ speed_mpn_rsh1sub_n (struct speed_params *s)
  }
  #endif
  
+double
+speed_mpn_addcnd_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addcnd_n (wp, xp, yp, s->size, 1));
+}
+double
+speed_mpn_subcnd_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_subcnd_n (wp, xp, yp, s->size, 1));
+}
+
  /* mpn_and_n etc can be macros and so have to be handled with
     SPEED_ROUTINE_MPN_BINARY_N_CALL forms */
  double
  speed_mpn_and_n (struct speed_params *s)
  {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, s->xp, s->yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, xp, yp, s->size));
  }
  double
  speed_mpn_andn_n (struct speed_params *s)
  {
-SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, s->xp, s->yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, xp, yp, s->size));
  }
  double
  speed_mpn_nand_n (struct speed_params *s)
  {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, s->xp, s->yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, xp, yp, s->size));
  }
  double
  speed_mpn_ior_n (struct speed_params *s)
  {
-SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, s->xp, s->yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, xp, yp, s->size));
  }
  double
  speed_mpn_iorn_n (struct speed_params *s)
  {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, s->xp, s->yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, xp, yp, s->size));
  }
  double
  speed_mpn_nior_n (struct speed_params *s)
  {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, s->xp, s->yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, xp, yp, s->size));
  }
  double
  speed_mpn_xor_n (struct speed_params *s)
  {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, s->xp, s->yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, xp, yp, s->size));
  }
  double
  speed_mpn_xnor_n (struct speed_params *s)
  {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, s->xp, s->yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, xp, yp, s->size));
  }
  
  
@@ -1036,6 +1223,14 @@ speed_mpn_sqr_diagonal (struct speed_params *s)
  }
  #endif
  
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+double
+speed_mpn_sqr_diag_addlsh1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL (mpn_sqr_diag_addlsh1 (wp, tp, s->xp, s->size));
+}
+#endif
+
  double
  speed_mpn_toom2_sqr (struct speed_params *s)
  {
@@ -1137,6 +1332,16 @@ speed_mpn_toom53_for_toom42_mul (struct speed_params *s)
  {
    SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
  }
+double
+speed_mpn_toom43_for_toom54_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL (mpn_toom43_mul);
+}
+double
+speed_mpn_toom54_for_toom43_mul (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL (mpn_toom54_mul);
+}
  
  double
  speed_mpn_nussbaumer_mul (struct speed_params *s)
@@ -1244,6 +1449,30 @@ speed_mpn_mullo_basecase (struct speed_params *s)
    SPEED_ROUTINE_MPN_MULLO_BASECASE (mpn_mullo_basecase);
  }
  
+double
+speed_mpn_mulmid_basecase (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase);
+}
+
+double
+speed_mpn_mulmid (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MULMID (mpn_mulmid);
+}
+
+double
+speed_mpn_mulmid_n (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n);
+}
+
+double
+speed_mpn_toom42_mulmid (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid);
+}
+
  double
  speed_mpn_mulmod_bnm1 (struct speed_params *s)
  {
@@ -1324,91 +1553,41 @@ speed_mpn_matrix22_mul (struct speed_params *s)
  double
  speed_mpn_hgcd (struct speed_params *s)
  {
-  mp_ptr wp;
-  mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
-  mp_size_t hgcd_scratch = mpn_hgcd_itch (s->size);
-  mp_ptr ap;
-  mp_ptr bp;
-  mp_ptr tmp1;
-
-  struct hgcd_matrix hgcd;
-  int res;
-  unsigned i;
-  double t;
-  TMP_DECL;
-
-  if (s->size < 2)
-    return -1;
-
-  TMP_MARK;
-
-  SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
-  SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
-
-  s->xp[s->size - 1] |= 1;
-  s->yp[s->size - 1] |= 1;
-
-  SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
-  SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
-
-  speed_starttime ();
-  i = s->reps;
-  do
-    {
-      MPN_COPY (ap, s->xp, s->size);
-      MPN_COPY (bp, s->yp, s->size);
-      mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
-      res = mpn_hgcd (ap, bp, s->size, &hgcd, wp);
-    }
-  while (--i != 0);
-  t = speed_endtime ();
-  TMP_FREE;
-  return t;
+  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);
  }
  
  double
  speed_mpn_hgcd_lehmer (struct speed_params *s)
  {
-  mp_ptr wp;
-  mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
-  mp_size_t hgcd_scratch = MPN_HGCD_LEHMER_ITCH (s->size);
-  mp_ptr ap;
-  mp_ptr bp;
-  mp_ptr tmp1;
-
-  struct hgcd_matrix hgcd;
-  int res;
-  unsigned i;
-  double t;
-  TMP_DECL;
-
-  if (s->size < 2)
-    return -1;
-
-  TMP_MARK;
-
-  SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
-  SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
+  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch);
+}
  
-  s->xp[s->size - 1] |= 1;
-  s->yp[s->size - 1] |= 1;
+double
+speed_mpn_hgcd_appr (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
+}
  
-  SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
-  SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
+double
+speed_mpn_hgcd_appr_lehmer (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch);
+}
  
-  speed_starttime ();
-  i = s->reps;
-  do
-    {
-      MPN_COPY (ap, s->xp, s->size);
-      MPN_COPY (bp, s->yp, s->size);
-      mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
-      res = mpn_hgcd_lehmer (ap, bp, s->size, &hgcd, wp);
-    }
-  while (--i != 0);
-  t = speed_endtime ();
-  TMP_FREE;
-  return t;
+double
+speed_mpn_hgcd_reduce (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
+}
+double
+speed_mpn_hgcd_reduce_1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
+}
+double
+speed_mpn_hgcd_reduce_2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
  }
  
  double
@@ -1486,6 +1665,11 @@ speed_mpn_jacobi_base_3 (struct speed_params *s)
  {
    SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3);
  }
+double
+speed_mpn_jacobi_base_4 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_4);
+}
  
  
  double
@@ -1551,6 +1735,11 @@ speed_mpz_powm_redc (struct speed_params *s)
    SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc);
  }
  double
+speed_mpz_powm_sec (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_POWM (mpz_powm_sec);
+}
+double
  speed_mpz_powm_ui (struct speed_params *s)
  {
    SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui);
@@ -1781,6 +1970,40 @@ speed_mpz_bin_uiui (struct speed_params *s)
    return t;
  }
  
+/* If r==0, calculate binomial(2^size,size),
+   otherwise calculate binomial(2^size,r). */
+
+double
+speed_mpz_bin_ui (struct speed_params *s)
+{
+  mpz_t          w, x;
+  unsigned long  k;
+  unsigned  i;
+  double    t;
+
+  mpz_init (w);
+  mpz_init_set_ui (x, 0);
+
+  mpz_setbit (x, s->size);
+
+  if (s->r != 0)
+    k = s->r;
+  else
+    k = s->size;
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      mpz_bin_ui (w, x, k);
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+
+  mpz_clear (w);
+  mpz_clear (x);
+  return t;
+}
  
  /* The multiplies are successively dependent so the latency is measured, not
     the issue rate.  There's only 10 per loop so the code doesn't get too big
@@ -2033,44 +2256,6 @@ speed_udiv_qrnnd (struct speed_params *s)
    SPEED_ROUTINE_UDIV_QRNND_B;
  }
  
-double
-speed_udiv_qrnnd_preinv1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_UDIV_QRNND_A (1);
-  {
-    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
-  }
-  SPEED_ROUTINE_UDIV_QRNND_B;
-}
-
-double
-speed_udiv_qrnnd_preinv2 (struct speed_params *s)
-{
-  SPEED_ROUTINE_UDIV_QRNND_A (1);
-  {
-    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
-  }
-  SPEED_ROUTINE_UDIV_QRNND_B;
-}
-
  double
  speed_udiv_qrnnd_c (struct speed_params *s)
  {
diff --git a/tune/hgcd_appr_lehmer.c b/tune/hgcd_appr_lehmer.c

new file mode 100644 (file)

index 0000000..18123e9
--- /dev/null
+++ b/tune/hgcd_appr_lehmer.c
@@ -0,0 +1,29 @@
+/* mpn/generic/hgcd_appr.c forced to use Lehmer's quadratic algorithm. */
+
+/*
+Copyright 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef  HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_appr  mpn_hgcd_appr_lehmer
+#define __gmpn_hgcd_appr_itch mpn_hgcd_appr_lehmer_itch
+
+#include "../mpn/generic/hgcd_appr.c"
diff --git a/tune/hgcd_lehmer.c b/tune/hgcd_lehmer.c

new file mode 100644 (file)

index 0000000..309d2c5
--- /dev/null
+++ b/tune/hgcd_lehmer.c
@@ -0,0 +1,29 @@
+/* mpn/generic/hgcd.c forced to use Lehmer's quadratic algorithm. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef  HGCD_THRESHOLD
+#define HGCD_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd  mpn_hgcd_lehmer
+#define __gmpn_hgcd_itch mpn_hgcd_lehmer_itch
+
+#include "../mpn/generic/hgcd.c"
diff --git a/tune/hgcd_reduce_1.c b/tune/hgcd_reduce_1.c

new file mode 100644 (file)

index 0000000..9963624
--- /dev/null
+++ b/tune/hgcd_reduce_1.c
@@ -0,0 +1,30 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef  HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_reduce  mpn_hgcd_reduce_1
+#define __gmpn_hgcd_reduce_itch  mpn_hgcd_reduce_1_itch
+
+
+#include "../mpn/generic/hgcd_reduce.c"
diff --git a/tune/hgcd_reduce_2.c b/tune/hgcd_reduce_2.c

new file mode 100644 (file)

index 0000000..1eed4ba
--- /dev/null
+++ b/tune/hgcd_reduce_2.c
@@ -0,0 +1,29 @@
+/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef  HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 0
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch
+
+#include "../mpn/generic/hgcd_reduce.c"
diff --git a/tune/jacbase4.c b/tune/jacbase4.c

new file mode 100644 (file)

index 0000000..19fe251
--- /dev/null
+++ b/tune/jacbase4.c
@@ -0,0 +1,27 @@
+/* mpn/generic/jacbase.c method 4.
+
+Copyright 2002, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 4
+#define __gmpn_jacobi_base mpn_jacobi_base_4
+
+#include "mpn/generic/jacbase.c"
diff --git a/tune/mod_1_1-1.c b/tune/mod_1_1-1.c

new file mode 100644 (file)

index 0000000..763d591
--- /dev/null
+++ b/tune/mod_1_1-1.c
@@ -0,0 +1,30 @@
+/* mpn/generic/mod_1_1.c method 1.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 1
+#undef mpn_mod_1_1p
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p mpn_mod_1_1p_1
+#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_1
+
+#include "mpn/generic/mod_1_1.c"
diff --git a/tune/mod_1_1-2.c b/tune/mod_1_1-2.c

new file mode 100644 (file)

index 0000000..10d7bed
--- /dev/null
+++ b/tune/mod_1_1-2.c
@@ -0,0 +1,30 @@
+/* mpn/generic/mod_1_1.c method 2.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 2
+#undef mpn_mod_1_1p
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p mpn_mod_1_1p_2
+#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_2
+
+#include "mpn/generic/mod_1_1.c"
diff --git a/tune/speed-ext.c b/tune/speed-ext.c

index 2035fa80db8d1c7cdb2e3c0b6af081d94443b866..08899120f0ed407582b9534db08f4c0226db8dba 100644 (file)
--- a/tune/speed-ext.c
+++ b/tune/speed-ext.c
@@ -57,9 +57,9 @@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  
  
  #define SPEED_EXTRA_PROTOS                                              \
-  double speed_mean_calls __GMP_PROTO ((struct speed_params *s));       \
-  double speed_mean_open  __GMP_PROTO ((struct speed_params *s));       \
-  double speed_mean_open2 __GMP_PROTO ((struct speed_params *s));
+  double speed_mean_calls (struct speed_params *s);                    \
+  double speed_mean_open  (struct speed_params *s);                    \
+  double speed_mean_open2 (struct speed_params *s);
  
  #define SPEED_EXTRA_ROUTINES            \
    { "mean_calls",  speed_mean_calls  }, \
diff --git a/tune/speed.c b/tune/speed.c

index 2ead1bec85fdbc642a88425a780c10d61c2d3bf4..fe8254d04d1007634cd8e11f829e7885e7b44c05 100644 (file)
--- a/tune/speed.c
+++ b/tune/speed.c
@@ -1,7 +1,7 @@
  /* Speed measuring program.
  
-Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free
-Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010,
+2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -153,6 +153,13 @@ const struct routine_t {
    { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
    { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
  
+  { "mpn_add_err1_n",    speed_mpn_add_err1_n    },
+  { "mpn_add_err2_n",    speed_mpn_add_err2_n    },
+  { "mpn_add_err3_n",    speed_mpn_add_err3_n    },
+  { "mpn_sub_err1_n",    speed_mpn_sub_err1_n    },
+  { "mpn_sub_err2_n",    speed_mpn_sub_err2_n    },
+  { "mpn_sub_err3_n",    speed_mpn_sub_err3_n    },
+
  #if HAVE_NATIVE_mpn_add_n_sub_n
    { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
  #endif
@@ -191,6 +198,12 @@ const struct routine_t {
  #if HAVE_NATIVE_mpn_mul_4
    { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
  #endif
+#if HAVE_NATIVE_mpn_mul_5
+  { "mpn_mul_5",         speed_mpn_mul_5,     FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+  { "mpn_mul_6",         speed_mpn_mul_6,     FLAG_R_OPTIONAL },
+#endif
  
    { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
    { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
@@ -198,18 +211,20 @@ const struct routine_t {
    { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
    { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
  #endif
-  { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R_OPTIONAL },
+  { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
  #if HAVE_NATIVE_mpn_mod_1c
-  { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R_OPTIONAL },
+  { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
  #endif
    { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
    { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
    { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
  
-  { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R_OPTIONAL },
-  { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R_OPTIONAL },
-  { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R_OPTIONAL },
-  { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R_OPTIONAL },
+  { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R },
+  { "mpn_mod_1_1_1",     speed_mpn_mod_1_1_1,     FLAG_R },
+  { "mpn_mod_1_1_2",     speed_mpn_mod_1_1_2,     FLAG_R },
+  { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R },
+  { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R },
+  { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R },
  
    { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
    { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
@@ -222,10 +237,13 @@ const struct routine_t {
    { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
    { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
  
+  { "mpn_div_qr_2n",     speed_mpn_div_qr_2n,       },
+  { "mpn_div_qr_2u",     speed_mpn_div_qr_2u,       },
+
    { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
    { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
  
-  { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R_OPTIONAL },
+  { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R },
    { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
    { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
  
@@ -259,16 +277,17 @@ const struct routine_t {
  
    { "mpn_hgcd",          speed_mpn_hgcd             },
    { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
+  { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
+  { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
+
+  { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
+  { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
+  { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
  
    { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
    { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
  
    { "mpn_gcd",           speed_mpn_gcd                    },
-#if 0
-  { "mpn_gcd_binary",    speed_mpn_gcd_binary             },
-  { "mpn_gcd_accel",     speed_mpn_gcd_accel              },
-  { "find_a",            speed_find_a,        FLAG_NODATA },
-#endif
  
    { "mpn_gcdext",            speed_mpn_gcdext            },
    { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
@@ -283,6 +302,7 @@ const struct routine_t {
    { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
    { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
    { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
+  { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4    },
  
    { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
    { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
@@ -290,6 +310,9 @@ const struct routine_t {
  #if HAVE_NATIVE_mpn_sqr_diagonal
    { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
  #endif
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+  { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
+#endif
  
    { "mpn_mul_n",         speed_mpn_mul_n            },
    { "mpn_sqr",           speed_mpn_sqr              },
@@ -320,6 +343,11 @@ const struct routine_t {
    { "mpn_mullo_n",        speed_mpn_mullo_n         },
    { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
  
+  { "mpn_mulmid_basecase",  speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
+  { "mpn_toom42_mulmid",    speed_mpn_toom42_mulmid },
+  { "mpn_mulmid_n",         speed_mpn_mulmid_n },
+  { "mpn_mulmid",           speed_mpn_mulmid, FLAG_R_OPTIONAL },
+
    { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
    { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
    { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
@@ -342,6 +370,10 @@ const struct routine_t {
    { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
    { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
  
+  { "mpn_broot",               speed_mpn_broot,    FLAG_R },
+  { "mpn_broot_invm1",         speed_mpn_broot_invm1, FLAG_R },
+  { "mpn_brootinv",            speed_mpn_brootinv, FLAG_R },
+
    { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
    { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
    { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
@@ -357,10 +389,12 @@ const struct routine_t {
  
    { "mpz_add",           speed_mpz_add              },
    { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
+  { "mpz_bin_ui",        speed_mpz_bin_ui,   FLAG_NODATA | FLAG_R_OPTIONAL },
    { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
    { "mpz_powm",          speed_mpz_powm             },
    { "mpz_powm_mod",      speed_mpz_powm_mod         },
    { "mpz_powm_redc",     speed_mpz_powm_redc        },
+  { "mpz_powm_sec",      speed_mpz_powm_sec        },
    { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
  
    { "mpz_mod",           speed_mpz_mod              },
@@ -378,31 +412,71 @@ const struct routine_t {
  #if HAVE_NATIVE_mpn_copyd
    { "mpn_copyd",         speed_mpn_copyd            },
  #endif
+  { "mpn_tabselect",     speed_mpn_tabselect, FLAG_R_OPTIONAL },
  #if HAVE_NATIVE_mpn_addlsh1_n
-  { "mpn_addlsh1_n",     speed_mpn_addlsh1_n        },
+  { "mpn_addlsh1_n",     speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
  #endif
  #if HAVE_NATIVE_mpn_sublsh1_n
-  { "mpn_sublsh1_n",     speed_mpn_sublsh1_n        },
+  { "mpn_sublsh1_n",     speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+  { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1    },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+  { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2    },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+  { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1    },
  #endif
  #if HAVE_NATIVE_mpn_rsblsh1_n
-  { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n        },
+  { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
  #endif
  #if HAVE_NATIVE_mpn_addlsh2_n
-  { "mpn_addlsh2_n",     speed_mpn_addlsh2_n        },
+  { "mpn_addlsh2_n",     speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
  #endif
  #if HAVE_NATIVE_mpn_sublsh2_n
-  { "mpn_sublsh2_n",     speed_mpn_sublsh2_n        },
+  { "mpn_sublsh2_n",     speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+  { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1    },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+  { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2    },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+  { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1    },
  #endif
  #if HAVE_NATIVE_mpn_rsblsh2_n
-  { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n        },
+  { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n
+  { "mpn_addlsh_n",     speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n
+  { "mpn_sublsh_n",     speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+  { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1    },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+  { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2    },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+  { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1    },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_n
+  { "mpn_rsblsh_n",     speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
  #endif
  #if HAVE_NATIVE_mpn_rsh1add_n
-  { "mpn_rsh1add_n",     speed_mpn_rsh1add_n        },
+  { "mpn_rsh1add_n",     speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
  #endif
  #if HAVE_NATIVE_mpn_rsh1sub_n
-  { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n        },
+  { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
  #endif
  
+  { "mpn_addcnd_n",     speed_mpn_addcnd_n, FLAG_R_OPTIONAL },
+  { "mpn_subcnd_n",     speed_mpn_subcnd_n, FLAG_R_OPTIONAL },
+
    { "MPN_ZERO",          speed_MPN_ZERO             },
  
    { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
@@ -432,8 +506,6 @@ const struct routine_t {
    { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
  
    { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
-  { "udiv_qrnnd_preinv1",     speed_udiv_qrnnd_preinv1,     FLAG_R_OPTIONAL },
-  { "udiv_qrnnd_preinv2",     speed_udiv_qrnnd_preinv2,     FLAG_R_OPTIONAL },
    { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
  #if HAVE_NATIVE_mpn_udiv_qrnnd
    { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
@@ -796,7 +868,7 @@ run_gnuplot (int argc, char *argv[])
    fprintf (fp, "set key left\n");
  
    /* designed to make it possible to see crossovers easily */
-  fprintf (fp, "set data style lines\n");
+  fprintf (fp, "set style data lines\n");
  
    fprintf (fp, "plot ");
    for (i = 0; i < num_choices; i++)
diff --git a/tune/speed.h b/tune/speed.h

index ac004cef3ddf5ca5b8337081fb2f18a3c2e580b9..3ab12d63db6620b97636f14fd27982ba6f37c5e9 100644 (file)
--- a/tune/speed.h
+++ b/tune/speed.h
@@ -1,7 +1,7 @@
  /* Header for speed and threshold things.
  
-Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free
-Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010, 2011,
+2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -87,13 +87,13 @@ extern double  speed_unittime;
  extern double  speed_cycletime;
  extern int     speed_precision;
  extern char    speed_time_string[];
-void speed_time_init __GMP_PROTO ((void));
-void speed_cycletime_fail __GMP_PROTO ((const char *str));
-void speed_cycletime_init __GMP_PROTO ((void));
-void speed_cycletime_need_cycles __GMP_PROTO ((void));
-void speed_cycletime_need_seconds __GMP_PROTO ((void));
-void speed_starttime __GMP_PROTO ((void));
-double speed_endtime __GMP_PROTO ((void));
+void speed_time_init (void);
+void speed_cycletime_fail (const char *str);
+void speed_cycletime_init (void);
+void speed_cycletime_need_cycles (void);
+void speed_cycletime_need_seconds (void);
+void speed_starttime (void);
+double speed_endtime (void);
  
  
  struct speed_params {
@@ -117,250 +117,296 @@ struct speed_params {
    struct {
      mp_ptr    ptr;
      mp_size_t size;
-  } src[3], dst[3];
+  } src[5], dst[4];
  };
  
-typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s));
+typedef double (*speed_function_t) (struct speed_params *);
  
-double speed_measure __GMP_PROTO ((speed_function_t fun, struct speed_params *s));
+double speed_measure (speed_function_t fun, struct speed_params *);
  
  /* Prototypes for speed measuring routines */
  
-double speed_back_to_back __GMP_PROTO ((struct speed_params *s));
-double speed_count_leading_zeros __GMP_PROTO ((struct speed_params *s));
-double speed_count_trailing_zeros __GMP_PROTO ((struct speed_params *s));
-double speed_find_a __GMP_PROTO ((struct speed_params *s));
-double speed_gmp_allocate_free __GMP_PROTO ((struct speed_params *s));
-double speed_gmp_allocate_reallocate_free __GMP_PROTO ((struct speed_params *s));
-double speed_invert_limb __GMP_PROTO ((struct speed_params *s));
-double speed_malloc_free __GMP_PROTO ((struct speed_params *s));
-double speed_malloc_realloc_free __GMP_PROTO ((struct speed_params *s));
-double speed_memcpy __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb_mul1 __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb_loop __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb_cond __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb_arith __GMP_PROTO ((struct speed_params *s));
-
-double speed_mpf_init_clear __GMP_PROTO ((struct speed_params *s));
-
-double speed_mpn_add_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addlsh1_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addlsh2_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_add_n_sub_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_and_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_andn_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_4 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_5 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_6 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_7 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_8 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_com __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_copyd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_copyi __GMP_PROTO ((struct speed_params *s));
-double speed_MPN_COPY __GMP_PROTO ((struct speed_params *s));
-double speed_MPN_COPY_DECR __GMP_PROTO ((struct speed_params *s));
-double speed_MPN_COPY_INCR __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divexact_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divexact_by3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_bdiv_q_1 __GMP_PROTO ((struct speed_params *));
-double speed_mpn_pi1_bdiv_q_1 __GMP_PROTO ((struct speed_params *));
-double speed_mpn_bdiv_dbm1c __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1f __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1c __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1cf __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1f_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1f_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_2_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_2_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_fib2_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext_double __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext_one_double __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext_one_single __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext_single __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_get_str __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_hamdist __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_ior_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_iorn_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_jacobi_base __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_jacobi_base_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_jacobi_base_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_jacobi_base_3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_lshift __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_lshiftc __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1c __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_4 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_34lsub1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_modexact_1_odd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_modexact_1c_odd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_1_inplace __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_4 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_basecase __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_fft __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_fft_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_fft_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_fft_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_back_to_back (struct speed_params *);
+double speed_count_leading_zeros (struct speed_params *);
+double speed_count_trailing_zeros (struct speed_params *);
+double speed_find_a (struct speed_params *);
+double speed_gmp_allocate_free (struct speed_params *);
+double speed_gmp_allocate_reallocate_free (struct speed_params *);
+double speed_invert_limb (struct speed_params *);
+double speed_malloc_free (struct speed_params *);
+double speed_malloc_realloc_free (struct speed_params *);
+double speed_memcpy (struct speed_params *);
+double speed_binvert_limb (struct speed_params *);
+double speed_binvert_limb_mul1 (struct speed_params *);
+double speed_binvert_limb_loop (struct speed_params *);
+double speed_binvert_limb_cond (struct speed_params *);
+double speed_binvert_limb_arith (struct speed_params *);
+
+double speed_mpf_init_clear (struct speed_params *);
+
+double speed_mpn_add_n (struct speed_params *);
+double speed_mpn_add_err1_n (struct speed_params *);
+double speed_mpn_add_err2_n (struct speed_params *);
+double speed_mpn_add_err3_n (struct speed_params *);
+double speed_mpn_addcnd_n (struct speed_params *);
+double speed_mpn_addlsh_n (struct speed_params *);
+double speed_mpn_addlsh1_n (struct speed_params *);
+double speed_mpn_addlsh2_n (struct speed_params *);
+double speed_mpn_addlsh_n_ip1 (struct speed_params *);
+double speed_mpn_addlsh1_n_ip1 (struct speed_params *);
+double speed_mpn_addlsh2_n_ip1 (struct speed_params *);
+double speed_mpn_addlsh_n_ip2 (struct speed_params *);
+double speed_mpn_addlsh1_n_ip2 (struct speed_params *);
+double speed_mpn_addlsh2_n_ip2 (struct speed_params *);
+double speed_mpn_add_n_sub_n (struct speed_params *);
+double speed_mpn_and_n (struct speed_params *);
+double speed_mpn_andn_n (struct speed_params *);
+double speed_mpn_addmul_1 (struct speed_params *);
+double speed_mpn_addmul_2 (struct speed_params *);
+double speed_mpn_addmul_3 (struct speed_params *);
+double speed_mpn_addmul_4 (struct speed_params *);
+double speed_mpn_addmul_5 (struct speed_params *);
+double speed_mpn_addmul_6 (struct speed_params *);
+double speed_mpn_addmul_7 (struct speed_params *);
+double speed_mpn_addmul_8 (struct speed_params *);
+double speed_mpn_com (struct speed_params *);
+double speed_mpn_copyd (struct speed_params *);
+double speed_mpn_copyi (struct speed_params *);
+double speed_MPN_COPY (struct speed_params *);
+double speed_MPN_COPY_DECR (struct speed_params *);
+double speed_MPN_COPY_INCR (struct speed_params *);
+double speed_mpn_tabselect (struct speed_params *);
+double speed_mpn_divexact_1 (struct speed_params *);
+double speed_mpn_divexact_by3 (struct speed_params *);
+double speed_mpn_bdiv_q_1 (struct speed_params *);
+double speed_mpn_pi1_bdiv_q_1 (struct speed_params *);
+double speed_mpn_bdiv_dbm1c (struct speed_params *);
+double speed_mpn_divrem_1 (struct speed_params *);
+double speed_mpn_divrem_1f (struct speed_params *);
+double speed_mpn_divrem_1c (struct speed_params *);
+double speed_mpn_divrem_1cf (struct speed_params *);
+double speed_mpn_divrem_1_div (struct speed_params *);
+double speed_mpn_divrem_1f_div (struct speed_params *);
+double speed_mpn_divrem_1_inv (struct speed_params *);
+double speed_mpn_divrem_1f_inv (struct speed_params *);
+double speed_mpn_divrem_2 (struct speed_params *);
+double speed_mpn_divrem_2_div (struct speed_params *);
+double speed_mpn_divrem_2_inv (struct speed_params *);
+double speed_mpn_div_qr_2n (struct speed_params *);
+double speed_mpn_div_qr_2u (struct speed_params *);
+double speed_mpn_fib2_ui (struct speed_params *);
+double speed_mpn_matrix22_mul (struct speed_params *);
+double speed_mpn_hgcd (struct speed_params *);
+double speed_mpn_hgcd_lehmer (struct speed_params *);
+double speed_mpn_hgcd_appr (struct speed_params *);
+double speed_mpn_hgcd_appr_lehmer (struct speed_params *);
+double speed_mpn_hgcd_reduce (struct speed_params *);
+double speed_mpn_hgcd_reduce_1 (struct speed_params *);
+double speed_mpn_hgcd_reduce_2 (struct speed_params *);
+double speed_mpn_gcd (struct speed_params *);
+double speed_mpn_gcd_1 (struct speed_params *);
+double speed_mpn_gcd_1N (struct speed_params *);
+double speed_mpn_gcdext (struct speed_params *);
+double speed_mpn_gcdext_double (struct speed_params *);
+double speed_mpn_gcdext_one_double (struct speed_params *);
+double speed_mpn_gcdext_one_single (struct speed_params *);
+double speed_mpn_gcdext_single (struct speed_params *);
+double speed_mpn_get_str (struct speed_params *);
+double speed_mpn_hamdist (struct speed_params *);
+double speed_mpn_ior_n (struct speed_params *);
+double speed_mpn_iorn_n (struct speed_params *);
+double speed_mpn_jacobi_base (struct speed_params *);
+double speed_mpn_jacobi_base_1 (struct speed_params *);
+double speed_mpn_jacobi_base_2 (struct speed_params *);
+double speed_mpn_jacobi_base_3 (struct speed_params *);
+double speed_mpn_jacobi_base_4 (struct speed_params *);
+double speed_mpn_lshift (struct speed_params *);
+double speed_mpn_lshiftc (struct speed_params *);
+double speed_mpn_mod_1 (struct speed_params *);
+double speed_mpn_mod_1c (struct speed_params *);
+double speed_mpn_mod_1_div (struct speed_params *);
+double speed_mpn_mod_1_inv (struct speed_params *);
+double speed_mpn_mod_1_1 (struct speed_params *);
+double speed_mpn_mod_1_1_1 (struct speed_params *);
+double speed_mpn_mod_1_1_2 (struct speed_params *);
+double speed_mpn_mod_1_2 (struct speed_params *);
+double speed_mpn_mod_1_3 (struct speed_params *);
+double speed_mpn_mod_1_4 (struct speed_params *);
+double speed_mpn_mod_34lsub1 (struct speed_params *);
+double speed_mpn_modexact_1_odd (struct speed_params *);
+double speed_mpn_modexact_1c_odd (struct speed_params *);
+double speed_mpn_mul_1 (struct speed_params *);
+double speed_mpn_mul_1_inplace (struct speed_params *);
+double speed_mpn_mul_2 (struct speed_params *);
+double speed_mpn_mul_3 (struct speed_params *);
+double speed_mpn_mul_4 (struct speed_params *);
+double speed_mpn_mul_5 (struct speed_params *);
+double speed_mpn_mul_6 (struct speed_params *);
+double speed_mpn_mul (struct speed_params *);
+double speed_mpn_mul_basecase (struct speed_params *);
+double speed_mpn_mulmid (struct speed_params *);
+double speed_mpn_mulmid_basecase (struct speed_params *);
+double speed_mpn_mul_fft (struct speed_params *);
+double speed_mpn_mul_fft_sqr (struct speed_params *);
+double speed_mpn_fft_mul (struct speed_params *);
+double speed_mpn_fft_sqr (struct speed_params *);
  #if WANT_OLD_FFT_FULL
-double speed_mpn_mul_fft_full __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_fft_full_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_full (struct speed_params *);
+double speed_mpn_mul_fft_full_sqr (struct speed_params *);
  #endif
-double speed_mpn_nussbaumer_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_nussbaumer_mul_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_n_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mullo_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mullo_basecase __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_nand_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_nior_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_popcount __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sbpi1_div_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dcpi1_div_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sbpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dcpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_div_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_divappr_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mupi_div_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_div_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sbpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dcpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sbpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dcpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_bdiv_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_bdiv_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_invert __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_invertappr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_ni_invertappr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_binvert __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_redc_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_redc_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_redc_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rsblsh1_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rsblsh2_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rsh1add_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rsh1sub_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rshift __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sb_divrem_m3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sb_divrem_m3_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sb_divrem_m3_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_set_str __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_bc_set_str __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dc_set_str __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_set_str_pre __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqr_basecase __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqr_diagonal __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sublsh1_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sublsh2_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_submul_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom2_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom3_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom4_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom6_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom8_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom22_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom33_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom44_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom6h_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom8h_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom32_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom42_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom43_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom63_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom32_for_toom43_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom43_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom32_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom53_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom42_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom53_for_toom42_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_bc_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mulmod_bnm1_rounded __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqrmod_bnm1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_udiv_qrnnd_r __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_umul_ppmm __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_umul_ppmm_r __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_xnor_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_xor_n __GMP_PROTO ((struct speed_params *s));
-double speed_MPN_ZERO __GMP_PROTO ((struct speed_params *s));
-
-double speed_mpq_init_clear __GMP_PROTO ((struct speed_params *s));
-
-double speed_mpz_add __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_bin_uiui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_fac_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_fib_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_fib2_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_init_clear __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_init_realloc_clear __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_jacobi __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_lucnum_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_lucnum2_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_mod __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_powm __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_powm_mod __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_powm_redc __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_powm_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_urandomb __GMP_PROTO ((struct speed_params *s));
-
-double speed_gmp_randseed __GMP_PROTO ((struct speed_params *s));
-double speed_gmp_randseed_ui __GMP_PROTO ((struct speed_params *s));
-
-double speed_noop __GMP_PROTO ((struct speed_params *s));
-double speed_noop_wxs __GMP_PROTO ((struct speed_params *s));
-double speed_noop_wxys __GMP_PROTO ((struct speed_params *s));
-
-double speed_operator_div __GMP_PROTO ((struct speed_params *s));
-double speed_operator_mod __GMP_PROTO ((struct speed_params *s));
-
-double speed_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
-double speed_udiv_qrnnd_preinv1 __GMP_PROTO ((struct speed_params *s));
-double speed_udiv_qrnnd_preinv2 __GMP_PROTO ((struct speed_params *s));
-double speed_udiv_qrnnd_c __GMP_PROTO ((struct speed_params *s));
-double speed_umul_ppmm __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nussbaumer_mul (struct speed_params *);
+double speed_mpn_nussbaumer_mul_sqr (struct speed_params *);
+double speed_mpn_mul_n (struct speed_params *);
+double speed_mpn_mul_n_sqr (struct speed_params *);
+double speed_mpn_mulmid_n (struct speed_params *);
+double speed_mpn_mullo_n (struct speed_params *);
+double speed_mpn_mullo_basecase (struct speed_params *);
+double speed_mpn_nand_n (struct speed_params *);
+double speed_mpn_nior_n (struct speed_params *);
+double speed_mpn_popcount (struct speed_params *);
+double speed_mpn_preinv_divrem_1 (struct speed_params *);
+double speed_mpn_preinv_divrem_1f (struct speed_params *);
+double speed_mpn_preinv_mod_1 (struct speed_params *);
+double speed_mpn_sbpi1_div_qr (struct speed_params *);
+double speed_mpn_dcpi1_div_qr (struct speed_params *);
+double speed_mpn_sbpi1_divappr_q (struct speed_params *);
+double speed_mpn_dcpi1_divappr_q (struct speed_params *);
+double speed_mpn_mu_div_qr (struct speed_params *);
+double speed_mpn_mu_divappr_q (struct speed_params *);
+double speed_mpn_mupi_div_qr (struct speed_params *);
+double speed_mpn_mu_div_q (struct speed_params *);
+double speed_mpn_sbpi1_bdiv_qr (struct speed_params *);
+double speed_mpn_dcpi1_bdiv_qr (struct speed_params *);
+double speed_mpn_sbpi1_bdiv_q (struct speed_params *);
+double speed_mpn_dcpi1_bdiv_q (struct speed_params *);
+double speed_mpn_mu_bdiv_q (struct speed_params *);
+double speed_mpn_mu_bdiv_qr (struct speed_params *);
+double speed_mpn_broot (struct speed_params *);
+double speed_mpn_broot_invm1 (struct speed_params *);
+double speed_mpn_brootinv (struct speed_params *);
+double speed_mpn_invert (struct speed_params *);
+double speed_mpn_invertappr (struct speed_params *);
+double speed_mpn_ni_invertappr (struct speed_params *);
+double speed_mpn_binvert (struct speed_params *);
+double speed_mpn_redc_1 (struct speed_params *);
+double speed_mpn_redc_2 (struct speed_params *);
+double speed_mpn_redc_n (struct speed_params *);
+double speed_mpn_rsblsh_n (struct speed_params *);
+double speed_mpn_rsblsh1_n (struct speed_params *);
+double speed_mpn_rsblsh2_n (struct speed_params *);
+double speed_mpn_rsh1add_n (struct speed_params *);
+double speed_mpn_rsh1sub_n (struct speed_params *);
+double speed_mpn_rshift (struct speed_params *);
+double speed_mpn_sb_divrem_m3 (struct speed_params *);
+double speed_mpn_sb_divrem_m3_div (struct speed_params *);
+double speed_mpn_sb_divrem_m3_inv (struct speed_params *);
+double speed_mpn_set_str (struct speed_params *);
+double speed_mpn_bc_set_str (struct speed_params *);
+double speed_mpn_dc_set_str (struct speed_params *);
+double speed_mpn_set_str_pre (struct speed_params *);
+double speed_mpn_sqr_basecase (struct speed_params *);
+double speed_mpn_sqr_diag_addlsh1 (struct speed_params *);
+double speed_mpn_sqr_diagonal (struct speed_params *);
+double speed_mpn_sqr (struct speed_params *);
+double speed_mpn_sqrtrem (struct speed_params *);
+double speed_mpn_rootrem (struct speed_params *);
+double speed_mpn_sub_n (struct speed_params *);
+double speed_mpn_sub_err1_n (struct speed_params *);
+double speed_mpn_sub_err2_n (struct speed_params *);
+double speed_mpn_sub_err3_n (struct speed_params *);
+double speed_mpn_subcnd_n (struct speed_params *);
+double speed_mpn_sublsh_n (struct speed_params *);
+double speed_mpn_sublsh1_n (struct speed_params *);
+double speed_mpn_sublsh2_n (struct speed_params *);
+double speed_mpn_sublsh_n_ip1 (struct speed_params *);
+double speed_mpn_sublsh1_n_ip1 (struct speed_params *);
+double speed_mpn_sublsh2_n_ip1 (struct speed_params *);
+double speed_mpn_submul_1 (struct speed_params *);
+double speed_mpn_toom2_sqr (struct speed_params *);
+double speed_mpn_toom3_sqr (struct speed_params *);
+double speed_mpn_toom4_sqr (struct speed_params *);
+double speed_mpn_toom6_sqr (struct speed_params *);
+double speed_mpn_toom8_sqr (struct speed_params *);
+double speed_mpn_toom22_mul (struct speed_params *);
+double speed_mpn_toom33_mul (struct speed_params *);
+double speed_mpn_toom44_mul (struct speed_params *);
+double speed_mpn_toom6h_mul (struct speed_params *);
+double speed_mpn_toom8h_mul (struct speed_params *);
+double speed_mpn_toom32_mul (struct speed_params *);
+double speed_mpn_toom42_mul (struct speed_params *);
+double speed_mpn_toom43_mul (struct speed_params *);
+double speed_mpn_toom63_mul (struct speed_params *);
+double speed_mpn_toom32_for_toom43_mul (struct speed_params *);
+double speed_mpn_toom43_for_toom32_mul (struct speed_params *);
+double speed_mpn_toom32_for_toom53_mul (struct speed_params *);
+double speed_mpn_toom53_for_toom32_mul (struct speed_params *);
+double speed_mpn_toom42_for_toom53_mul (struct speed_params *);
+double speed_mpn_toom53_for_toom42_mul (struct speed_params *);
+double speed_mpn_toom43_for_toom54_mul (struct speed_params *);
+double speed_mpn_toom54_for_toom43_mul (struct speed_params *);
+double speed_mpn_toom42_mulmid (struct speed_params *);
+double speed_mpn_mulmod_bnm1 (struct speed_params *);
+double speed_mpn_bc_mulmod_bnm1 (struct speed_params *);
+double speed_mpn_mulmod_bnm1_rounded (struct speed_params *);
+double speed_mpn_sqrmod_bnm1 (struct speed_params *);
+double speed_mpn_udiv_qrnnd (struct speed_params *);
+double speed_mpn_udiv_qrnnd_r (struct speed_params *);
+double speed_mpn_umul_ppmm (struct speed_params *);
+double speed_mpn_umul_ppmm_r (struct speed_params *);
+double speed_mpn_xnor_n (struct speed_params *);
+double speed_mpn_xor_n (struct speed_params *);
+double speed_MPN_ZERO (struct speed_params *);
+
+double speed_mpq_init_clear (struct speed_params *);
+
+double speed_mpz_add (struct speed_params *);
+double speed_mpz_bin_uiui (struct speed_params *);
+double speed_mpz_bin_ui (struct speed_params *);
+double speed_mpz_fac_ui (struct speed_params *);
+double speed_mpz_fib_ui (struct speed_params *);
+double speed_mpz_fib2_ui (struct speed_params *);
+double speed_mpz_init_clear (struct speed_params *);
+double speed_mpz_init_realloc_clear (struct speed_params *);
+double speed_mpz_jacobi (struct speed_params *);
+double speed_mpz_lucnum_ui (struct speed_params *);
+double speed_mpz_lucnum2_ui (struct speed_params *);
+double speed_mpz_mod (struct speed_params *);
+double speed_mpz_powm (struct speed_params *);
+double speed_mpz_powm_mod (struct speed_params *);
+double speed_mpz_powm_redc (struct speed_params *);
+double speed_mpz_powm_sec (struct speed_params *);
+double speed_mpz_powm_ui (struct speed_params *);
+double speed_mpz_urandomb (struct speed_params *);
+
+double speed_gmp_randseed (struct speed_params *);
+double speed_gmp_randseed_ui (struct speed_params *);
+
+double speed_noop (struct speed_params *);
+double speed_noop_wxs (struct speed_params *);
+double speed_noop_wxys (struct speed_params *);
+
+double speed_operator_div (struct speed_params *);
+double speed_operator_mod (struct speed_params *);
+
+double speed_udiv_qrnnd (struct speed_params *);
+double speed_udiv_qrnnd_preinv1 (struct speed_params *);
+double speed_udiv_qrnnd_preinv2 (struct speed_params *);
+double speed_udiv_qrnnd_preinv3 (struct speed_params *);
+double speed_udiv_qrnnd_c (struct speed_params *);
+double speed_umul_ppmm (struct speed_params *);
  
  /* Prototypes for other routines */
  
  /* low 32-bits in p[0], high 32-bits in p[1] */
-void speed_cyclecounter __GMP_PROTO ((unsigned p[2]));
+void speed_cyclecounter (unsigned p[2]);
  
-void mftb_function __GMP_PROTO ((unsigned p[2]));
+void mftb_function (unsigned p[2]);
  
  /* In i386 gcc -fPIC, ebx is a fixed register and can't be declared a dummy
     output or a clobber for the cpuid, hence an explicit save and restore.  A
     clobber as such doesn't provoke an error unfortunately (gcc 3.0), so use
     the dummy output style in non-PIC, so there's an error if somehow -fPIC
-   is used without a -DPIC to tell us about it.         */
+   is used without a -DPIC to tell us about it.  */
  #if defined(__GNUC__) && ! defined (NO_ASM)    \
    && (defined (__i386__) || defined (__i486__))
  #if defined (PIC) || defined (__APPLE_CC__)
@@ -392,67 +438,77 @@ void mftb_function __GMP_PROTO ((unsigned p[2]));
  #endif
  #endif
  
-double speed_cyclecounter_diff __GMP_PROTO ((const unsigned [2], const unsigned [2]));
-int gettimeofday_microseconds_p __GMP_PROTO ((void));
-int getrusage_microseconds_p __GMP_PROTO ((void));
-int cycles_works_p __GMP_PROTO ((void));
-long clk_tck __GMP_PROTO ((void));
-double freq_measure __GMP_PROTO ((const char *, double (*)(void)));
-
-int double_cmp_ptr __GMP_PROTO ((const double *, const double *));
-void pentium_wbinvd __GMP_PROTO ((void));
-typedef int (*qsort_function_t) __GMP_PROTO ((const void *, const void *));
-
-void noop __GMP_PROTO ((void));
-void noop_1 __GMP_PROTO ((mp_limb_t));
-void noop_wxs __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
-void noop_wxys __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
-void mpn_cache_fill __GMP_PROTO ((mp_srcptr, mp_size_t));
-void mpn_cache_fill_dummy __GMP_PROTO ((mp_limb_t));
-void speed_cache_fill __GMP_PROTO ((struct speed_params *));
-void speed_operand_src __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
-void speed_operand_dst __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
+double speed_cyclecounter_diff (const unsigned [2], const unsigned [2]);
+int gettimeofday_microseconds_p (void);
+int getrusage_microseconds_p (void);
+int cycles_works_p (void);
+long clk_tck (void);
+double freq_measure (const char *, double (*)(void));
+
+int double_cmp_ptr (const double *, const double *);
+void pentium_wbinvd (void);
+typedef int (*qsort_function_t) (const void *, const void *);
+
+void noop (void);
+void noop_1 (mp_limb_t);
+void noop_wxs (mp_ptr, mp_srcptr, mp_size_t);
+void noop_wxys (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void mpn_cache_fill (mp_srcptr, mp_size_t);
+void mpn_cache_fill_dummy (mp_limb_t);
+void speed_cache_fill (struct speed_params *);
+void speed_operand_src (struct speed_params *, mp_ptr, mp_size_t);
+void speed_operand_dst (struct speed_params *, mp_ptr, mp_size_t);
  
  extern int  speed_option_addrs;
  extern int  speed_option_verbose;
-void speed_option_set __GMP_PROTO((const char *));
+extern int  speed_option_cycles_broken;
+void speed_option_set (const char *);
  
-mp_limb_t mpn_divrem_1_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
-mp_limb_t mpn_divrem_1_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
-mp_limb_t mpn_divrem_2_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
-mp_limb_t mpn_divrem_2_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+mp_limb_t mpn_divrem_2_inv (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
  
-int mpn_jacobi_base_1 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
-int mpn_jacobi_base_2 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
-int mpn_jacobi_base_3 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
+int mpn_jacobi_base_1 (mp_limb_t, mp_limb_t, int);
+int mpn_jacobi_base_2 (mp_limb_t, mp_limb_t, int);
+int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int);
+int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int);
  
-mp_limb_t mpn_mod_1_div __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
-mp_limb_t mpn_mod_1_inv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
  
-mp_size_t mpn_gcd_binary
-  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcd_accel
-  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcdext_one_double
-  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcdext_one_single
-  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcdext_single
-  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcdext_double
-  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
  
-mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
-mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
+void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
  
-mp_size_t mpn_set_str_basecase __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
-void mpn_pre_set_str __GMP_PROTO ((mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr));
+mp_size_t mpn_gcdext_one_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_gcdext_one_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_gcdext_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_gcdext_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_hgcd_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+mp_size_t mpn_hgcd_lehmer_itch (mp_size_t);
  
-void mpz_powm_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
-void mpz_powm_redc __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+mp_size_t mpn_hgcd_appr_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+mp_size_t mpn_hgcd_appr_lehmer_itch (mp_size_t);
  
-int speed_routine_count_zeros_setup
-  __GMP_PROTO ((struct speed_params *, mp_ptr, int, int));
+mp_size_t mpn_hgcd_reduce_1 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
+mp_size_t mpn_hgcd_reduce_1_itch (mp_size_t, mp_size_t);
+
+mp_size_t mpn_hgcd_reduce_2 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
+mp_size_t mpn_hgcd_reduce_2_itch (mp_size_t, mp_size_t);
+
+mp_limb_t mpn_sb_divrem_mn_div (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+mp_limb_t mpn_sb_divrem_mn_inv (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_size_t mpn_set_str_basecase (mp_ptr, const unsigned char *, size_t, int);
+void mpn_pre_set_str (mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr);
+
+void mpz_powm_mod (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
+void mpz_powm_redc (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
+
+int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
  
  
  /* "get" is called repeatedly until it ticks over, just in case on a fast
@@ -549,7 +605,7 @@ int speed_routine_count_zeros_setup
  #define SPEED_RESTRICT_COND(cond)   if (!(cond)) return -1.0;
  
  /* For mpn_copy or similar. */
-#define SPEED_ROUTINE_MPN_COPY(function)                               \
+#define SPEED_ROUTINE_MPN_COPY_CALL(call)                              \
    {                                                                    \
      mp_ptr    wp;                                                      \
      unsigned  i;                                                       \
@@ -568,13 +624,18 @@ int speed_routine_count_zeros_setup
      speed_starttime ();                                                        \
      i = s->reps;                                                       \
      do                                                                 \
-      function (wp, s->xp, s->size);                                   \
+      call;                                                            \
      while (--i != 0);                                                  \
      t = speed_endtime ();                                              \
                                                                         \
      TMP_FREE;                                                          \
      return t;                                                          \
    }
+#define SPEED_ROUTINE_MPN_COPY(function)                               \
+  SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size))
+
+#define SPEED_ROUTINE_MPN_TABSELECT(function)                          \
+  SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size, 1, s->r))
  
  #define SPEED_ROUTINE_MPN_COPYC(function)                              \
    {                                                                    \
@@ -604,7 +665,7 @@ int speed_routine_count_zeros_setup
    }
  
  /* s->size is still in limbs, and it's limbs which are copied, but
-   "function" takes a size in bytes not limbs. */
+   "function" takes a size in bytes not limbs.  */
  #define SPEED_ROUTINE_MPN_COPY_BYTES(function)                         \
    {                                                                    \
      mp_ptr    wp;                                                      \
@@ -680,6 +741,72 @@ int speed_routine_count_zeros_setup
      return t;                                                          \
    }
  
+
+/* For mpn_aors_errK_n, where 1 <= K <= 3. */
+#define SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL(call, K)                   \
+  {                                                                    \
+    mp_ptr     wp;                                                     \
+    mp_ptr     xp, yp;                                                 \
+    mp_ptr     zp[K];                                                  \
+    mp_limb_t  ep[2*K];                                                        \
+    unsigned   i;                                                      \
+    double     t;                                                      \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    /* (don't have a mechnanism to specify zp alignments) */           \
+    for (i = 0; i < K; i++)                                            \
+      SPEED_TMP_ALLOC_LIMBS (zp[i], s->size, 0);                       \
+                                                                       \
+    xp = s->xp;                                                                \
+    yp = s->yp;                                                                \
+                                                                       \
+    if (s->r == 0)     ;                                               \
+    else if (s->r == 1) { xp = wp;         }                           \
+    else if (s->r == 2) {         yp = wp; }                           \
+    else if (s->r == 3) { xp = wp; yp = wp; }                          \
+    else if (s->r == 4) {     yp = xp;     }                           \
+    else               {                                               \
+      TMP_FREE;                                                                \
+      return -1.0;                                                     \
+    }                                                                  \
+                                                                       \
+    /* initialize wp if operand overlap */                             \
+    if (xp == wp || yp == wp)                                          \
+      MPN_COPY (wp, s->xp, s->size);                                   \
+                                                                       \
+    speed_operand_src (s, xp, s->size);                                        \
+    speed_operand_src (s, yp, s->size);                                        \
+    for (i = 0; i < K; i++)                                            \
+      speed_operand_src (s, zp[i], s->size);                           \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR1_N(function)                      \
+  SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], s->size, 0), 1)
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR2_N(function)                      \
+  SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], s->size, 0), 2)
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR3_N(function)                      \
+  SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], zp[2], s->size, 0), 3)
+
+
  /* For mpn_add_n, mpn_sub_n, or similar. */
  #define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call)                          \
    {                                                                    \
@@ -895,30 +1022,30 @@ int speed_routine_count_zeros_setup
  /* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
  #define SPEED_ROUTINE_MPN_MUL(function)                                        \
    {                                                                    \
-    mp_ptr    wp, xp;                                                  \
+    mp_ptr    wp;                                                      \
      mp_size_t size1;                                                   \
      unsigned  i;                                                       \
      double    t;                                                       \
      TMP_DECL;                                                          \
                                                                         \
      size1 = (s->r == 0 ? s->size : s->r);                              \
+    if (size1 < 0) size1 = -size1 - s->size;                           \
                                                                         \
-    SPEED_RESTRICT_COND (s->size >= 1);                                        \
-    SPEED_RESTRICT_COND (size1 >= s->size);                            \
+    SPEED_RESTRICT_COND (size1 >= 1);                                  \
+    SPEED_RESTRICT_COND (s->size >= size1);                            \
                                                                         \
      TMP_MARK;                                                          \
      SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp);          \
-    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);                    \
                                                                         \
-    speed_operand_src (s, xp, size1);                                  \
-    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, size1);                               \
      speed_operand_dst (s, wp, size1 + s->size);                                \
      speed_cache_fill (s);                                              \
                                                                         \
      speed_starttime ();                                                        \
      i = s->reps;                                                       \
      do                                                                 \
-      function (wp, xp, size1, s->yp, s->size);                                \
+      function (wp, s->xp, s->size, s->yp, size1);                     \
      while (--i != 0);                                                  \
      t = speed_endtime ();                                              \
                                                                         \
@@ -1018,6 +1145,106 @@ int speed_routine_count_zeros_setup
      return t;                                                          \
    }
  
+/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */
+#define SPEED_ROUTINE_MPN_MULMID(function)                             \
+  {                                                                    \
+    mp_ptr    wp, xp;                                                  \
+    mp_size_t size1;                                                   \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    size1 = (s->r == 0 ? (2 * s->size - 1) : s->r);                    \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+    SPEED_RESTRICT_COND (size1 >= s->size);                            \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);      \
+    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);                    \
+                                                                       \
+    speed_operand_src (s, xp, size1);                                  \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, size1 - s->size + 3);                    \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, xp, size1, s->yp, s->size);                                \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_MULMID_N(function)                           \
+  {                                                                    \
+    mp_ptr    wp, xp;                                                  \
+    mp_size_t size1;                                                   \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    size1 = 2 * s->size - 1;                                           \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);      \
+    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);                    \
+                                                                       \
+    speed_operand_src (s, xp, size1);                                  \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, size1 - s->size + 3);                    \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, xp, s->yp, s->size);                               \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function)                      \
+  {                                                                    \
+    mp_ptr    wp, xp, scratch;                                         \
+    mp_size_t size1, scratch_size;                                     \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    size1 = 2 * s->size - 1;                                           \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 1);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);      \
+    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);                    \
+    scratch_size = mpn_toom42_mulmid_itch (s->size);                   \
+    SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0);                  \
+                                                                       \
+    speed_operand_src (s, xp, size1);                                  \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, wp, size1 - s->size + 3);                    \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, xp, s->yp, s->size, scratch);                      \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
  #define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call)                       \
    {                                                                    \
      mp_ptr    wp, tp;                                                  \
@@ -1201,6 +1428,17 @@ int speed_routine_count_zeros_setup
       mpn_toom53_mul_itch (s->size, 11*s->size/20),                     \
       MPN_TOOM53_MUL_MINSIZE)
  
+#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL(function)              \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace),        \
+     mpn_toom42_mul_itch (s->size, 5*s->size/6),                       \
+     MPN_TOOM54_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL(function)              \
+  SPEED_ROUTINE_MPN_MUL_N_TSPACE                                       \
+    (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace),        \
+     mpn_toom54_mul_itch (s->size, 5*s->size/6),                       \
+     MPN_TOOM54_MUL_MINSIZE)
+
  
  
  #define SPEED_ROUTINE_MPN_SQR_CALL(call)                               \
@@ -1233,9 +1471,34 @@ int speed_routine_count_zeros_setup
  #define SPEED_ROUTINE_MPN_SQR(function)                                        \
    SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))
  
-#define SPEED_ROUTINE_MPN_SQR_DIAGONAL(function)                       \
-  SPEED_ROUTINE_MPN_SQR (function)
-
+#define SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL(call)                  \
+  {                                                                    \
+    mp_ptr    wp, tp;                                                  \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 2);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_wp);              \
+    SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp);              \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, tp, 2 * s->size);                            \
+    speed_operand_dst (s, wp, 2 * s->size);                            \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      call;                                                            \
+    while (--i != 0);                                                  \
+    t = speed_endtime () / 2;                                          \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
  
  #define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize)             \
    {                                                                    \
@@ -1361,7 +1624,7 @@ int speed_routine_count_zeros_setup
      i = s->reps;                                                       \
      do {                                                               \
        pfunc (inv, s->r);                                               \
-      function (s->xp, s->size, s->r, inv);                            \
+      function (s->xp, s->size, s->r << inv[1], inv);                          \
      } while (--i != 0);                                                        \
                                                                         \
      return speed_endtime ();                                           \
@@ -1606,9 +1869,9 @@ int speed_routine_count_zeros_setup
  #define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn)                 \
    {                                                                    \
      unsigned   i;                                                      \
-    mp_ptr     dp, tp, qp, rp, ip, scratch;                            \
+    mp_ptr     dp, tp, qp, rp, ip, scratch, tmp;                       \
      double     t;                                                      \
-    mp_size_t size1, itch;                                             \
+    mp_size_t  size1, itch;                                            \
      TMP_DECL;                                                          \
                                                                         \
      size1 = (s->r == 0 ? 2 * s->size : s->r);                          \
@@ -1635,7 +1898,8 @@ int speed_routine_count_zeros_setup
      dp[s->size-1] |= GMP_NUMB_HIGHBIT;                                 \
      tp[size1 - 1] = dp[s->size-1] - 1;                                 \
                                                                         \
-    mpn_invert (ip, dp, s->size, NULL);                                        \
+    tmp = TMP_ALLOC_LIMBS (mpn_invert_itch (s->size));                 \
+    mpn_invert (ip, dp, s->size, tmp);                                 \
                                                                         \
      speed_operand_dst (s, qp, size1 - s->size);                                \
      speed_operand_dst (s, rp, s->size);                                        \
@@ -1815,6 +2079,42 @@ int speed_routine_count_zeros_setup
      return t;                                                          \
    }
  
+#define SPEED_ROUTINE_MPN_BROOT(function)      \
+  {                                            \
+    SPEED_RESTRICT_COND (s->r & 1);            \
+    s->xp[0] |= 1;                             \
+    SPEED_ROUTINE_MPN_UNARY_1_CALL             \
+      ((*function) (wp, s->xp, s->size, s->r));        \
+  }
+
+#define SPEED_ROUTINE_MPN_BROOTINV(function, itch)     \
+  {                                                    \
+    mp_ptr    wp, tp;                                  \
+    unsigned  i;                                       \
+    double    t;                                       \
+    TMP_DECL;                                          \
+    TMP_MARK;                                          \
+    SPEED_RESTRICT_COND (s->size >= 1);                        \
+    SPEED_RESTRICT_COND (s->r & 1);                    \
+    wp = TMP_ALLOC_LIMBS (s->size);                    \
+    tp = TMP_ALLOC_LIMBS ( (itch));                    \
+    s->xp[0] |= 1;                                     \
+                                                       \
+    speed_operand_src (s, s->xp, s->size);             \
+    speed_operand_dst (s, wp, s->size);                        \
+    speed_cache_fill (s);                              \
+                                                       \
+    speed_starttime ();                                        \
+    i = s->reps;                                       \
+    do                                                 \
+      (*function) (wp, s->xp, s->size, s->r, tp);      \
+    while (--i != 0);                                  \
+    t = speed_endtime ();                              \
+                                                       \
+    TMP_FREE;                                          \
+    return t;                                          \
+  }
+
  #define SPEED_ROUTINE_MPN_INVERT(function,itchfn)                      \
    {                                                                    \
      long  i;                                                           \
@@ -2421,6 +2721,107 @@ int speed_routine_count_zeros_setup
       function (px[j-1], py[j-1], 0))
  
  
+#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc)                    \
+  {                                                                    \
+    mp_size_t hgcd_init_itch, hgcd_itch;                               \
+    mp_ptr ap, bp, wp, tmp1;                                           \
+    struct hgcd_matrix hgcd;                                           \
+    int res;                                                           \
+    unsigned i;                                                                \
+    double t;                                                          \
+    TMP_DECL;                                                          \
+                                                                       \
+    if (s->size < 2)                                                   \
+      return -1;                                                       \
+                                                                       \
+    TMP_MARK;                                                          \
+                                                                       \
+    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);              \
+    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);              \
+                                                                       \
+    s->xp[s->size - 1] |= 1;                                           \
+    s->yp[s->size - 1] |= 1;                                           \
+                                                                       \
+    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);              \
+    hgcd_itch = itchfunc (s->size);                                    \
+                                                                       \
+    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);         \
+    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp);                        \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, ap, s->size + 1);                            \
+    speed_operand_dst (s, bp, s->size + 1);                            \
+    speed_operand_dst (s, wp, hgcd_itch);                              \
+    speed_operand_dst (s, tmp1, hgcd_init_itch);                       \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       MPN_COPY (ap, s->xp, s->size);                                  \
+       MPN_COPY (bp, s->yp, s->size);                                  \
+       mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);                    \
+       res = func (ap, bp, s->size, &hgcd, wp);                        \
+      }                                                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
+#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc)             \
+  {                                                                    \
+    mp_size_t hgcd_init_itch, hgcd_step_itch;                          \
+    mp_ptr ap, bp, wp, tmp1;                                           \
+    struct hgcd_matrix hgcd;                                           \
+    mp_size_t p = s->size/2;                                           \
+    int res;                                                           \
+    unsigned i;                                                                \
+    double t;                                                          \
+    TMP_DECL;                                                          \
+                                                                       \
+    if (s->size < 2)                                                   \
+      return -1;                                                       \
+                                                                       \
+    TMP_MARK;                                                          \
+                                                                       \
+    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);              \
+    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);              \
+                                                                       \
+    s->xp[s->size - 1] |= 1;                                           \
+    s->yp[s->size - 1] |= 1;                                           \
+                                                                       \
+    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);              \
+    hgcd_step_itch = itchfunc (s->size, p);                            \
+                                                                       \
+    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);         \
+    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp);                   \
+                                                                       \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, s->yp, s->size);                             \
+    speed_operand_dst (s, ap, s->size + 1);                            \
+    speed_operand_dst (s, bp, s->size + 1);                            \
+    speed_operand_dst (s, wp, hgcd_step_itch);                         \
+    speed_operand_dst (s, tmp1, hgcd_init_itch);                       \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      {                                                                        \
+       MPN_COPY (ap, s->xp, s->size);                                  \
+       MPN_COPY (bp, s->yp, s->size);                                  \
+       mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);                    \
+       res = func (&hgcd, ap, bp, s->size, p, wp);                     \
+      }                                                                        \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
+
  /* Run some GCDs of s->size limbs each.  The number of different data values
     is decreased as s->size**2, since GCD is a quadratic algorithm.
     SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
@@ -2684,6 +3085,46 @@ int speed_routine_count_zeros_setup
      return t;                                                          \
    }
  
+#define SPEED_ROUTINE_MPN_DIV_QR_2(function, norm)                     \
+  {                                                                    \
+    mp_ptr    wp, xp;                                                  \
+    mp_limb_t yp[2];                                                   \
+    mp_limb_t rp[2];                                                   \
+    unsigned  i;                                                       \
+    double    t;                                                       \
+    TMP_DECL;                                                          \
+                                                                       \
+    SPEED_RESTRICT_COND (s->size >= 2);                                        \
+                                                                       \
+    TMP_MARK;                                                          \
+    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);                  \
+                                                                       \
+    /* divisor must be normalized */                                   \
+    MPN_COPY (yp, s->yp_block, 2);                                     \
+    if (norm)                                                          \
+      yp[1] |= GMP_NUMB_HIGHBIT;                                       \
+    else                                                               \
+      {                                                                        \
+       yp[1] &= ~GMP_NUMB_HIGHBIT;                                     \
+       if (yp[1] == 0)                                                 \
+         yp[1] = 1;                                                    \
+      }                                                                        \
+    speed_operand_src (s, s->xp, s->size);                             \
+    speed_operand_src (s, yp, 2);                                      \
+    speed_operand_dst (s, wp, s->size);                                        \
+    speed_operand_dst (s, rp, 2);                                      \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        \
+    i = s->reps;                                                       \
+    do                                                                 \
+      function (wp, rp, s->xp, s->size, yp);                           \
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    TMP_FREE;                                                          \
+    return t;                                                          \
+  }
  
  #define SPEED_ROUTINE_MODLIMB_INVERT(function)                         \
    {                                                                    \
@@ -2852,8 +3293,7 @@ int speed_routine_count_zeros_setup
      for (i = 0; i < s->size; i++)                                      \
        xp[i] = s->xp[i] % base;                                         \
                                                                         \
-    wn = ((mp_size_t) (s->size / mp_bases[base].chars_per_bit_exactly)) \
-      / GMP_LIMB_BITS + 2;                                             \
+    LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);                       \
      SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);                       \
                                                                         \
      /* use this during development to check wn is big enough */                \
@@ -2877,7 +3317,7 @@ int speed_routine_count_zeros_setup
    }
  
  
-/* Run an accel gcd find_a() function over various data values.         A set of
+/* Run an accel gcd find_a() function over various data values.  A set of
     values is used in case some run particularly fast or slow.  The size
     parameter is ignored, the amount of data tested is fixed.  */
  
@@ -3013,9 +3453,6 @@ int speed_routine_count_zeros_setup
    }
  
  
-#endif
-
-
  #define SPEED_ROUTINE_MPN_BACK_TO_BACK(function)                       \
    {                                                                    \
      unsigned  i;                                                       \
@@ -3055,3 +3492,6 @@ int speed_routine_count_zeros_setup
  
  #define SPEED_ROUTINE_MPN_ZERO(function)                               \
    SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))
+
+
+#endif
diff --git a/tune/time.c b/tune/time.c

index 613f2ae964156f02866366a94d20c592d14a51cb..1670f6ecd3d825108bc6960bc69ff9694babc2d3 100644 (file)
--- a/tune/time.c
+++ b/tune/time.c
@@ -1,6 +1,7 @@
  /* Time routines for speed measurments.
  
-Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2010, 2011, 2012 Free Software
+Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -255,7 +256,7 @@ static const int  use_stck = 1;  /* always use when available */
  typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
  #define STCK(timestamp)                 \
    do {                                  \
-    asm ("stck %0" : "=m" (timestamp)); \
+    asm ("stck %0" : "=Q" (timestamp)); \
    } while (0)
  #else
  static const int  have_stck = 0;
@@ -456,9 +457,22 @@ cycles_works_p (void)
    if (result != -1)
      goto done;
  
+  /* FIXME: On linux, the cycle counter is not saved and restored over
+   * context switches, making it almost useless for precise cputime
+   * measurements. When available, it's better to use clock_gettime,
+   * which seems to have reasonable accuracy (tested on x86_32,
+   * linux-2.6.26, glibc-2.7). However, there are also some linux
+   * systems where clock_gettime is broken in one way or the other,
+   * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
+   * kind-of implemented but broken (needs code to detect that), and
+   * on those systems a wall-clock cycle counter is the least bad
+   * fallback.
+   *
+   * So we need some code to disable the cycle counter on some but not
+   * all linux systems. */
  #ifdef SIGILL
    {
-    RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
+    RETSIGTYPE (*old_handler) (int);
      unsigned  cycles[2];
  
      old_handler = signal (SIGILL, cycles_works_handler);
@@ -671,8 +685,8 @@ getrusage_backwards_p (void)
           if (speed_option_verbose)
             printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
                     i,
-                   prev.ru_utime.tv_sec, prev.ru_utime.tv_usec,
-                   next.ru_utime.tv_sec, next.ru_utime.tv_usec);
+                   (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
+                   (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
           result = 1;
           break;
         }
@@ -709,6 +723,8 @@ const int  have_cgt_id = 0;
  # define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
  #endif
  
+#define CGT_DELAY_COUNT 1000
+
  int
  cgt_works_p (void)
  {
@@ -750,6 +766,44 @@ cgt_works_p (void)
    cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
    printf ("clock_gettime is %s accurate\n",
           unittime_string (cgt_unittime));
+
+  if (cgt_unittime < 10e-9)
+    {
+      /* Do we believe this? */
+      struct timespec start, end;
+      static volatile int counter;
+      double duration;
+      if (clock_gettime (CGT_ID, &start))
+       {
+         if (speed_option_verbose)
+           printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+         result = 0;
+         return result;
+       }
+      /* Loop of at least 1000 memory accesses, ought to take at
+        least 100 ns*/
+      for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
+       ;
+      if (clock_gettime (CGT_ID, &end))
+       {
+         if (speed_option_verbose)
+           printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+         result = 0;
+         return result;
+       }
+      duration = (end.tv_sec + end.tv_nsec * 1e-9
+                 - start.tv_sec - start.tv_nsec * 1e-9);
+      if (speed_option_verbose)
+       printf ("delay loop of %d rounds took %s (according to clock_get_time)\n",
+               CGT_DELAY_COUNT, unittime_string (duration));
+      if (duration < 100e-9)
+       {
+         if (speed_option_verbose)
+           printf ("clock_gettime id=%d not believable\n", CGT_ID);
+         result = 0;
+         return result;
+       }
+    }
    result = 1;
    return result;
  }
@@ -779,7 +833,7 @@ int
  mftb_works_p (void)
  {
    unsigned   a[2];
-  RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
+  RETSIGTYPE (*old_handler) (int);
    double     cycletime;
  
    /* suppress a warning about a[] unused */
@@ -941,7 +995,7 @@ speed_time_init (void)
  
    speed_cycletime_init ();
  
-  if (have_cycles && cycles_works_p ())
+  if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
      {
        use_cycles = 1;
        DEFAULT (speed_cycletime, 1.0);
@@ -1072,7 +1126,7 @@ speed_time_init (void)
        use_cgt = 1;
        speed_unittime = cgt_unittime;
        DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
-      strcpy (speed_time_string, "microsecond accurate getrusage()");
+      strcpy (speed_time_string, "microsecond accurate clock_gettime()");
      }
    else if (have_times && clk_tck() > 1000000)
      {
diff --git a/tune/tune-gcd-p.c b/tune/tune-gcd-p.c

new file mode 100644 (file)

index 0000000..6d88631
--- /dev/null
+++ b/tune/tune-gcd-p.c
@@ -0,0 +1,214 @@
+/* tune-gcd-p
+
+   Tune the choice for splitting p in divide-and-conquer gcd.
+
+Copyright 2008, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define TUNE_GCD_P 1
+
+#include "../mpn/gcd.c"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "speed.h"
+
+/* Search for minimum over a range. FIXME: Implement golden-section /
+   fibonacci search*/
+static int
+search (double *minp, double (*f)(void *, int), void *ctx, int start, int end)
+{
+  int x[4];
+  double y[4];
+
+  int best_i;
+
+  x[0] = start;
+  x[3] = end;
+
+  y[0] = f(ctx, x[0]);
+  y[3] = f(ctx, x[3]);
+
+  for (;;)
+    {
+      int i;
+      int length = x[3] - x[0];
+
+      x[1] = x[0] + length/3;
+      x[2] = x[0] + 2*length/3;
+
+      y[1] = f(ctx, x[1]);
+      y[2] = f(ctx, x[2]);
+
+#if 0
+      printf("%d: %f, %d: %f, %d:, %f %d: %f\n",
+            x[0], y[0], x[1], y[1], x[2], y[2], x[3], y[3]);
+#endif
+      for (best_i = 0, i = 1; i < 4; i++)
+       if (y[i] < y[best_i])
+         best_i = i;
+
+      if (length <= 4)
+       break;
+
+      if (best_i >= 2)
+       {
+         x[0] = x[1];
+         y[0] = y[1];
+       }
+      else
+       {
+         x[3] = x[2];
+         y[3] = y[2];
+       }
+    }
+  *minp = y[best_i];
+  return x[best_i];
+}
+
+static int
+compare_double(const void *ap, const void *bp)
+{
+  double a = * (const double *) ap;
+  double b = * (const double *) bp;
+
+  if (a < b)
+    return -1;
+  else if (a > b)
+    return 1;
+  else
+    return 0;
+}
+
+static double
+median (double *v, size_t n)
+{
+  qsort(v, n, sizeof(*v), compare_double);
+
+  return v[n/2];
+}
+
+#define TIME(res, code) do {                           \
+  double time_measurement[5];                          \
+  unsigned time_i;                                     \
+                                                       \
+  for (time_i = 0; time_i < 5; time_i++)               \
+    {                                                  \
+      speed_starttime();                               \
+      code;                                            \
+      time_measurement[time_i] = speed_endtime();      \
+    }                                                  \
+  res = median(time_measurement, 5);                   \
+} while (0)
+
+struct bench_data
+{
+  mp_size_t n;
+  mp_ptr ap;
+  mp_ptr bp;
+  mp_ptr up;
+  mp_ptr vp;
+  mp_ptr gp;
+};
+
+static double
+bench_gcd (void *ctx, int p)
+{
+  struct bench_data *data = ctx;
+  double t;
+
+  p_table[data->n] = p;
+  TIME(t, {
+      MPN_COPY (data->up, data->ap, data->n);
+      MPN_COPY (data->vp, data->bp, data->n);
+      mpn_gcd (data->gp, data->up, data->n, data->vp, data->n);
+    });
+
+  return t;
+}
+
+int
+main(int argc, char **argv)
+{
+  gmp_randstate_t rands;  struct bench_data data;
+  mp_size_t n;
+
+  TMP_DECL;
+
+  /* Unbuffered so if output is redirected to a file it isn't lost if the
+     program is killed part way through.  */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
+
+  gmp_randinit_default (rands);
+
+  TMP_MARK;
+
+  data.ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  data.bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  data.up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  data.vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  data.gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+
+  mpn_random (data.ap, P_TABLE_SIZE);
+  mpn_random (data.bp, P_TABLE_SIZE);
+
+  memset (p_table, 0, sizeof(p_table));
+
+  for (n = 100; n < P_TABLE_SIZE; n++)
+    {
+      mp_size_t p;
+      mp_size_t best_p;
+      double best_time;
+      double lehmer_time;
+
+      if (data.ap[n-1] == 0)
+       data.ap[n-1] = 1;
+
+      if (data.bp[n-1] == 0)
+       data.bp[n-1] = 1;
+
+      data.n = n;
+
+      lehmer_time = bench_gcd (&data, 0);
+
+      best_p = search (&best_time, bench_gcd, &data, n/5, 4*n/5);
+      if (best_time > lehmer_time)
+       best_p = 0;
+
+      printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
+      if (best_p > 0)
+       {
+         double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
+         printf(" %5.3g%%", speedup);
+         if (speedup < 1.0)
+           {
+             printf(" (ignored)");
+             best_p = 0;
+           }
+       }
+      printf("\n");
+
+      p_table[n] = best_p;
+    }
+  TMP_FREE;
+  gmp_randclear(rands);
+  return 0;
+}
diff --git a/tune/tuneup.c b/tune/tuneup.c

index 54827c1426b1863a028cb4773520d6425f3eee0e..20f9161e6d7963b2b5d7b1723d2c35c083cc1c3d 100644 (file)
--- a/tune/tuneup.c
+++ b/tune/tuneup.c
@@ -1,7 +1,7 @@
  /* Create tuned thresholds for various algorithms.
  
-Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010, 2011
-Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010,
+2011, 2012 Free Software Foundation, Inc.
  
  This file is part of the GNU MP Library.
  
@@ -156,6 +156,7 @@ mp_size_t  mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
  mp_size_t  mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
  mp_size_t  mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
  mp_size_t  mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
+mp_size_t  mul_toom43_to_toom54_threshold = MP_SIZE_T_MAX;
  mp_size_t  mul_fft_threshold            = MP_SIZE_T_MAX;
  mp_size_t  mul_fft_modf_threshold       = MP_SIZE_T_MAX;
  mp_size_t  sqr_basecase_threshold       = MP_SIZE_T_MAX;
@@ -170,8 +171,10 @@ mp_size_t  sqr_fft_modf_threshold       = MP_SIZE_T_MAX;
  mp_size_t  mullo_basecase_threshold     = MP_SIZE_T_MAX;
  mp_size_t  mullo_dc_threshold           = MP_SIZE_T_MAX;
  mp_size_t  mullo_mul_n_threshold        = MP_SIZE_T_MAX;
+mp_size_t  mulmid_toom42_threshold      = MP_SIZE_T_MAX;
  mp_size_t  mulmod_bnm1_threshold        = MP_SIZE_T_MAX;
  mp_size_t  sqrmod_bnm1_threshold        = MP_SIZE_T_MAX;
+mp_size_t  div_qr_2_pi2_threshold       = MP_SIZE_T_MAX;
  mp_size_t  dc_div_qr_threshold          = MP_SIZE_T_MAX;
  mp_size_t  dc_divappr_q_threshold       = MP_SIZE_T_MAX;
  mp_size_t  mu_div_qr_threshold          = MP_SIZE_T_MAX;
@@ -189,15 +192,17 @@ mp_size_t  binv_newton_threshold        = MP_SIZE_T_MAX;
  mp_size_t  redc_1_to_redc_2_threshold   = MP_SIZE_T_MAX;
  mp_size_t  redc_1_to_redc_n_threshold   = MP_SIZE_T_MAX;
  mp_size_t  redc_2_to_redc_n_threshold   = MP_SIZE_T_MAX;
-mp_size_t  powm_threshold               = MP_SIZE_T_MAX;
  mp_size_t  matrix22_strassen_threshold  = MP_SIZE_T_MAX;
  mp_size_t  hgcd_threshold               = MP_SIZE_T_MAX;
+mp_size_t  hgcd_appr_threshold          = MP_SIZE_T_MAX;
+mp_size_t  hgcd_reduce_threshold        = MP_SIZE_T_MAX;
  mp_size_t  gcd_dc_threshold             = MP_SIZE_T_MAX;
  mp_size_t  gcdext_dc_threshold          = MP_SIZE_T_MAX;
  mp_size_t  divrem_1_norm_threshold      = MP_SIZE_T_MAX;
  mp_size_t  divrem_1_unnorm_threshold    = MP_SIZE_T_MAX;
  mp_size_t  mod_1_norm_threshold         = MP_SIZE_T_MAX;
  mp_size_t  mod_1_unnorm_threshold       = MP_SIZE_T_MAX;
+int       mod_1_1p_method              = 0;
  mp_size_t  mod_1n_to_mod_1_1_threshold  = MP_SIZE_T_MAX;
  mp_size_t  mod_1u_to_mod_1_1_threshold  = MP_SIZE_T_MAX;
  mp_size_t  mod_1_1_to_mod_1_2_threshold = MP_SIZE_T_MAX;
@@ -208,6 +213,8 @@ mp_size_t  get_str_dc_threshold         = MP_SIZE_T_MAX;
  mp_size_t  get_str_precompute_threshold = MP_SIZE_T_MAX;
  mp_size_t  set_str_dc_threshold         = MP_SIZE_T_MAX;
  mp_size_t  set_str_precompute_threshold = MP_SIZE_T_MAX;
+mp_size_t  fac_odd_threshold            = 0;
+mp_size_t  fac_dsc_threshold            = FAC_DSC_THRESHOLD_LIMIT;
  
  mp_size_t  fft_modf_sqr_threshold = MP_SIZE_T_MAX;
  mp_size_t  fft_modf_mul_threshold = MP_SIZE_T_MAX;
@@ -252,6 +259,9 @@ struct param_t {
  #ifndef HAVE_NATIVE_mpn_mod_1
  #define HAVE_NATIVE_mpn_mod_1 0
  #endif
+#ifndef HAVE_NATIVE_mpn_mod_1_1p
+#define HAVE_NATIVE_mpn_mod_1_1p 0
+#endif
  #ifndef HAVE_NATIVE_mpn_modexact_1_odd
  #define HAVE_NATIVE_mpn_modexact_1_odd 0
  #endif
@@ -357,12 +367,12 @@ analyze_dat (int final)
  }
  
  
-/* Measuring for recompiled mpn/generic/divrem_1.c and mpn/generic/mod_1.c */
+/* Measuring for recompiled mpn/generic/divrem_1.c, mpn/generic/mod_1.c
+ * and mpz/fac_ui.c */
  
-mp_limb_t mpn_divrem_1_tune
-  __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
-mp_limb_t mpn_mod_1_tune
-   __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_divrem_1_tune (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_mod_1_tune (mp_srcptr, mp_size_t, mp_limb_t);
+void mpz_fac_ui_tune (mpz_ptr, unsigned long);
  
  double
  speed_mpn_mod_1_tune (struct speed_params *s)
@@ -374,6 +384,11 @@ speed_mpn_divrem_1_tune (struct speed_params *s)
  {
    SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);
  }
+double
+speed_mpz_fac_ui_tune (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune);
+}
  
  
  double
@@ -1157,6 +1172,8 @@ void
  tune_mul_n (void)
  {
    static struct param_t  param;
+  mp_size_t next_toom_start;
+  int something_changed;
  
    param.function = speed_mpn_mul_n;
  
@@ -1165,25 +1182,84 @@ tune_mul_n (void)
    param.max_size = MUL_TOOM22_THRESHOLD_LIMIT-1;
    one (&mul_toom22_threshold, &param);
  
-  param.name = "MUL_TOOM33_THRESHOLD";
-  param.min_size = MAX (mul_toom22_threshold, MPN_TOOM33_MUL_MINSIZE);
-  param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
-  one (&mul_toom33_threshold, &param);
+  param.noprint = 1;
+
+  /* Threshold sequence loop.  Disable functions that would be used in a very
+     narrow range, re-measuring things when that happens.  */
+  something_changed = 1;
+  while (something_changed)
+    {
+      something_changed = 0;
+
+       next_toom_start = mul_toom22_threshold;
+
+       if (mul_toom33_threshold != 0)
+         {
+           param.name = "MUL_TOOM33_THRESHOLD";
+           param.min_size = MAX (next_toom_start, MPN_TOOM33_MUL_MINSIZE);
+           param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
+           one (&mul_toom33_threshold, &param);
+
+           if (next_toom_start * 1.05 >= mul_toom33_threshold)
+             {
+               mul_toom33_threshold = 0;
+               something_changed = 1;
+             }
+         }
+
+       next_toom_start = MAX (next_toom_start, mul_toom33_threshold);
+
+       if (mul_toom44_threshold != 0)
+         {
+           param.name = "MUL_TOOM44_THRESHOLD";
+           param.min_size = MAX (next_toom_start, MPN_TOOM44_MUL_MINSIZE);
+           param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
+           one (&mul_toom44_threshold, &param);
+
+           if (next_toom_start * 1.05 >= mul_toom44_threshold)
+             {
+               mul_toom44_threshold = 0;
+               something_changed = 1;
+             }
+         }
+
+       next_toom_start = MAX (next_toom_start, mul_toom44_threshold);
  
-  param.name = "MUL_TOOM44_THRESHOLD";
-  param.min_size = MAX (mul_toom33_threshold, MPN_TOOM44_MUL_MINSIZE);
-  param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
-  one (&mul_toom44_threshold, &param);
+       if (mul_toom6h_threshold != 0)
+         {
+           param.name = "MUL_TOOM6H_THRESHOLD";
+           param.min_size = MAX (next_toom_start, MPN_TOOM6H_MUL_MINSIZE);
+           param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
+           one (&mul_toom6h_threshold, &param);
+
+           if (next_toom_start * 1.05 >= mul_toom6h_threshold)
+             {
+               mul_toom6h_threshold = 0;
+               something_changed = 1;
+             }
+         }
  
-  param.name = "MUL_TOOM6H_THRESHOLD";
-  param.min_size = MAX (mul_toom44_threshold, MPN_TOOM6H_MUL_MINSIZE);
-  param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
-  one (&mul_toom6h_threshold, &param);
+       next_toom_start = MAX (next_toom_start, mul_toom6h_threshold);
+
+       if (mul_toom8h_threshold != 0)
+         {
+           param.name = "MUL_TOOM8H_THRESHOLD";
+           param.min_size = MAX (next_toom_start, MPN_TOOM8H_MUL_MINSIZE);
+           param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
+           one (&mul_toom8h_threshold, &param);
+
+           if (next_toom_start * 1.05 >= mul_toom8h_threshold)
+             {
+               mul_toom8h_threshold = 0;
+               something_changed = 1;
+             }
+         }
+    }
  
-  param.name = "MUL_TOOM8H_THRESHOLD";
-  param.min_size = MAX (mul_toom6h_threshold, MPN_TOOM8H_MUL_MINSIZE);
-  param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
-  one (&mul_toom8h_threshold, &param);
+    print_define ("MUL_TOOM33_THRESHOLD", MUL_TOOM33_THRESHOLD);
+    print_define ("MUL_TOOM44_THRESHOLD", MUL_TOOM44_THRESHOLD);
+    print_define ("MUL_TOOM6H_THRESHOLD", MUL_TOOM6H_THRESHOLD);
+    print_define ("MUL_TOOM8H_THRESHOLD", MUL_TOOM8H_THRESHOLD);
  
    /* disabled until tuned */
    MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
@@ -1200,34 +1276,43 @@ tune_mul (void)
    param.function = speed_mpn_toom32_for_toom43_mul;
    param.function2 = speed_mpn_toom43_for_toom32_mul;
    param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
-  param.min_size = MPN_TOOM43_MUL_MINSIZE;
+  param.min_size = MPN_TOOM43_MUL_MINSIZE * 24 / 17;
    one (&thres, &param);
-  mul_toom32_to_toom43_threshold = 17*thres/24;
+  mul_toom32_to_toom43_threshold = thres * 17 / 24;
    print_define ("MUL_TOOM32_TO_TOOM43_THRESHOLD", mul_toom32_to_toom43_threshold);
  
    param.function = speed_mpn_toom32_for_toom53_mul;
    param.function2 = speed_mpn_toom53_for_toom32_mul;
    param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
-  param.min_size = MPN_TOOM53_MUL_MINSIZE;
+  param.min_size = MPN_TOOM53_MUL_MINSIZE * 30 / 19;
    one (&thres, &param);
-  mul_toom32_to_toom53_threshold = 19*thres/30;
+  mul_toom32_to_toom53_threshold = thres * 19 / 30;
    print_define ("MUL_TOOM32_TO_TOOM53_THRESHOLD", mul_toom32_to_toom53_threshold);
  
    param.function = speed_mpn_toom42_for_toom53_mul;
    param.function2 = speed_mpn_toom53_for_toom42_mul;
    param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
-  param.min_size = MPN_TOOM53_MUL_MINSIZE;
+  param.min_size = MPN_TOOM53_MUL_MINSIZE * 20 / 11;
    one (&thres, &param);
-  mul_toom42_to_toom53_threshold = 11*thres/20;
+  mul_toom42_to_toom53_threshold = thres * 11 / 20;
    print_define ("MUL_TOOM42_TO_TOOM53_THRESHOLD", mul_toom42_to_toom53_threshold);
  
    param.function = speed_mpn_toom42_mul;
    param.function2 = speed_mpn_toom63_mul;
    param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
-  param.min_size = MPN_TOOM63_MUL_MINSIZE;
+  param.min_size = MPN_TOOM63_MUL_MINSIZE * 2;
    one (&thres, &param);
-  mul_toom42_to_toom63_threshold = thres/2;
+  mul_toom42_to_toom63_threshold = thres / 2;
    print_define ("MUL_TOOM42_TO_TOOM63_THRESHOLD", mul_toom42_to_toom63_threshold);
+
+  /* Use ratio 5/6 when measuring, the middle of the range 2/3 to 1. */
+  param.function = speed_mpn_toom43_for_toom54_mul;
+  param.function2 = speed_mpn_toom54_for_toom43_mul;
+  param.name = "MUL_TOOM43_TO_TOOM54_THRESHOLD";
+  param.min_size = MPN_TOOM54_MUL_MINSIZE * 6 / 5;
+  one (&thres, &param);
+  mul_toom43_to_toom54_threshold = thres * 5 / 6;
+  print_define ("MUL_TOOM43_TO_TOOM54_THRESHOLD", mul_toom43_to_toom54_threshold);
  }
  
  
@@ -1276,6 +1361,18 @@ tune_mullo (void)
  #endif
  }
  
+void
+tune_mulmid (void)
+{
+  static struct param_t  param;
+
+  param.name = "MULMID_TOOM42_THRESHOLD";
+  param.function = speed_mpn_mulmid_n;
+  param.min_size = 4;
+  param.max_size = 100;
+  one (&mulmid_toom42_threshold, &param);
+}
+
  void
  tune_mulmod_bnm1 (void)
  {
@@ -1365,29 +1462,83 @@ tune_sqr (void)
  
    {
      static struct param_t  param;
-    mp_size_t toom3_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
+    mp_size_t next_toom_start;
+    int something_changed;
  
      param.function = speed_mpn_sqr;
+    param.noprint = 1;
+
+  /* Threshold sequence loop.  Disable functions that would be used in a very
+     narrow range, re-measuring things when that happens.  */
+    something_changed = 1;
+    while (something_changed)
+      {
+       something_changed = 0;
+
+       next_toom_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
+
+       sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
+       param.name = "SQR_TOOM3_THRESHOLD";
+       param.min_size = MAX (next_toom_start, MPN_TOOM3_SQR_MINSIZE);
+       param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
+       one (&sqr_toom3_threshold, &param);
+
+       next_toom_start = MAX (next_toom_start, sqr_toom3_threshold);
+
+       if (sqr_toom4_threshold != 0)
+         {
+           param.name = "SQR_TOOM4_THRESHOLD";
+           sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
+           param.min_size = MAX (next_toom_start, MPN_TOOM4_SQR_MINSIZE);
+           param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
+           one (&sqr_toom4_threshold, &param);
+
+           if (next_toom_start * 1.05 >= sqr_toom4_threshold)
+             {
+               sqr_toom4_threshold = 0;
+               something_changed = 1;
+             }
+         }
+
+       next_toom_start = MAX (next_toom_start, sqr_toom4_threshold);
+
+       if (sqr_toom6_threshold != 0)
+         {
+           param.name = "SQR_TOOM6_THRESHOLD";
+           sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
+           param.min_size = MAX (next_toom_start, MPN_TOOM6_SQR_MINSIZE);
+           param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
+           one (&sqr_toom6_threshold, &param);
+
+           if (next_toom_start * 1.05 >= sqr_toom6_threshold)
+             {
+               sqr_toom6_threshold = 0;
+               something_changed = 1;
+             }
+         }
+
+       next_toom_start = MAX (next_toom_start, sqr_toom6_threshold);
+
+       if (sqr_toom8_threshold != 0)
+         {
+           param.name = "SQR_TOOM8_THRESHOLD";
+           sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
+           param.min_size = MAX (next_toom_start, MPN_TOOM8_SQR_MINSIZE);
+           param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
+           one (&sqr_toom8_threshold, &param);
+
+           if (next_toom_start * 1.05 >= sqr_toom8_threshold)
+             {
+               sqr_toom8_threshold = 0;
+               something_changed = 1;
+             }
+         }
+      }
  
-    param.name = "SQR_TOOM3_THRESHOLD";
-    param.min_size = MAX (toom3_start, MPN_TOOM3_SQR_MINSIZE);
-    param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
-    one (&sqr_toom3_threshold, &param);
-
-    param.name = "SQR_TOOM4_THRESHOLD";
-    param.min_size = MAX (sqr_toom3_threshold, MPN_TOOM4_SQR_MINSIZE);
-    param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
-    one (&sqr_toom4_threshold, &param);
-
-    param.name = "SQR_TOOM6_THRESHOLD";
-    param.min_size = MAX (sqr_toom4_threshold, MPN_TOOM6_SQR_MINSIZE);
-    param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
-    one (&sqr_toom6_threshold, &param);
-
-    param.name = "SQR_TOOM8_THRESHOLD";
-    param.min_size = MAX (sqr_toom6_threshold, MPN_TOOM8_SQR_MINSIZE);
-    param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
-    one (&sqr_toom8_threshold, &param);
+    print_define ("SQR_TOOM3_THRESHOLD", SQR_TOOM3_THRESHOLD);
+    print_define ("SQR_TOOM4_THRESHOLD", SQR_TOOM4_THRESHOLD);
+    print_define ("SQR_TOOM6_THRESHOLD", SQR_TOOM6_THRESHOLD);
+    print_define ("SQR_TOOM8_THRESHOLD", SQR_TOOM8_THRESHOLD);
    }
  }
  
@@ -1432,7 +1583,7 @@ tune_mu_div (void)
      param.name = "MU_DIV_QR_THRESHOLD";
      param.function = speed_mpn_dcpi1_div_qr;
      param.function2 = speed_mpn_mu_div_qr;
-    param.min_size = 6;
+    param.min_size = mul_toom22_threshold;
      param.max_size = 5000;
      param.step_factor = 0.02;
      one (&mu_div_qr_threshold, &param);
@@ -1442,7 +1593,7 @@ tune_mu_div (void)
      param.name = "MU_DIVAPPR_Q_THRESHOLD";
      param.function = speed_mpn_dcpi1_divappr_q;
      param.function2 = speed_mpn_mu_divappr_q;
-    param.min_size = 6;
+    param.min_size = mul_toom22_threshold;
      param.max_size = 5000;
      param.step_factor = 0.02;
      one (&mu_divappr_q_threshold, &param);
@@ -1491,7 +1642,7 @@ tune_mu_bdiv (void)
      param.name = "MU_BDIV_QR_THRESHOLD";
      param.function = speed_mpn_dcpi1_bdiv_qr;
      param.function2 = speed_mpn_mu_bdiv_qr;
-    param.min_size = 4;
+    param.min_size = mul_toom22_threshold;
      param.max_size = 5000;
      param.step_factor = 0.02;
      one (&mu_bdiv_qr_threshold, &param);
@@ -1501,7 +1652,7 @@ tune_mu_bdiv (void)
      param.name = "MU_BDIV_Q_THRESHOLD";
      param.function = speed_mpn_dcpi1_bdiv_q;
      param.function2 = speed_mpn_mu_bdiv_q;
-    param.min_size = 4;
+    param.min_size = mul_toom22_threshold;
      param.max_size = 5000;
      param.step_factor = 0.02;
      one (&mu_bdiv_q_threshold, &param);
@@ -1564,6 +1715,7 @@ tune_redc (void)
      param.min_is_always = 1;
      param.max_size = TUNE_REDC_2_MAX;
      param.noprint = 1;
+    param.stop_factor = 1.5;
      one (&redc_1_to_redc_2_threshold, &param);
    }
    {
@@ -1575,17 +1727,24 @@ tune_redc (void)
      param.noprint = 1;
      one (&redc_2_to_redc_n_threshold, &param);
    }
-  if (redc_1_to_redc_2_threshold >= TUNE_REDC_2_MAX - 1)
-    {
-      /* Disable REDC_2.  This is not supposed to happen.  */
-      print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
-      print_define_remark ("REDC_2_TO_REDC_N_THRESHOLD", 0, "anomaly: never REDC_2");
-    }
-  else
+  if (redc_1_to_redc_2_threshold >= redc_2_to_redc_n_threshold)
      {
-      print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
-      print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
+      redc_2_to_redc_n_threshold = 0;  /* disable redc_2 */
+
+      /* Never use redc2, measure redc_1 -> redc_n cutoff, store result as
+        REDC_1_TO_REDC_2_THRESHOLD.  */
+      {
+       static struct param_t  param;
+       param.name = "REDC_1_TO_REDC_2_THRESHOLD";
+       param.function = speed_mpn_redc_1;
+       param.function2 = speed_mpn_redc_n;
+       param.min_size = 16;
+       param.noprint = 1;
+       one (&redc_1_to_redc_2_threshold, &param);
+      }
      }
+  print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
+  print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
  #else
    {
      static struct param_t  param;
@@ -1619,6 +1778,30 @@ tune_hgcd (void)
    one (&hgcd_threshold, &param);
  }
  
+void
+tune_hgcd_appr (void)
+{
+  static struct param_t  param;
+  param.name = "HGCD_APPR_THRESHOLD";
+  param.function = speed_mpn_hgcd_appr;
+  /* We seem to get strange results for small sizes */
+  param.min_size = 50;
+  param.stop_since_change = 150;
+  one (&hgcd_appr_threshold, &param);
+}
+
+void
+tune_hgcd_reduce (void)
+{
+  static struct param_t  param;
+  param.name = "HGCD_REDUCE_THRESHOLD";
+  param.function = speed_mpn_hgcd_reduce;
+  param.min_size = 30;
+  param.max_size = 7000;
+  param.step_factor = 0.04;
+  one (&hgcd_reduce_threshold, &param);
+}
+
  void
  tune_gcd_dc (void)
  {
@@ -1643,6 +1826,134 @@ tune_gcdext_dc (void)
    one (&gcdext_dc_threshold, &param);
  }
  
+/* In tune_powm_sec we compute the table used by the win_size function.  The
+   cutoff points are in exponent bits, disregarding other operand sizes.  It is
+   not possible to use the one framework since it currently uses a granilarity
+   of full limbs.
+*/
+
+/* This win_size replaces the variant in the powm code, allowing us to
+   control k in the k-ary algorithms.  */
+int winsize;
+int
+win_size (mp_bitcnt_t eb)
+{
+  return winsize;
+}
+
+void
+tune_powm_sec (void)
+{
+  mp_size_t n;
+  int k, i;
+  mp_size_t itch;
+  mp_bitcnt_t nbits, nbits_next, possible_nbits_cutoff;
+  const int n_max = 3000 / GMP_NUMB_BITS;
+  const int n_measurements = 5;
+  mp_ptr rp, bp, ep, mp, tp;
+  double ttab[n_measurements], tk, tkp1;
+  TMP_DECL;
+  TMP_MARK;
+
+  possible_nbits_cutoff = 0;
+
+  k = 1;
+
+  winsize = 10;                        /* the itch function needs this */
+  itch = mpn_powm_sec_itch (n_max, n_max, n_max);
+
+  rp = TMP_ALLOC_LIMBS (n_max);
+  bp = TMP_ALLOC_LIMBS (n_max);
+  ep = TMP_ALLOC_LIMBS (n_max);
+  mp = TMP_ALLOC_LIMBS (n_max);
+  tp = TMP_ALLOC_LIMBS (itch);
+
+  mpn_random (bp, n_max);
+  mpn_random (mp, n_max);
+  mp[0] |= 1;
+
+/* How about taking the M operand size into account?
+
+   An operation R=powm(B,E,N) will take time O(log(E)*M(log(N))) (assuming
+   B = O(M)).
+
+   Using k-ary and no sliding window, the precomputation will need time
+   O(2^(k-1)*M(log(N))) and the main computation will need O(log(E)*S(N)) +
+   O(log(E)/k*M(N)), for the squarings, multiplications, respectively.
+
+   An operation R=powm_sec(B,E,N) will take time like powm.
+
+   Using k-ary, the precomputation will need time O(2^k*M(log(N))) and the
+   main computation will need O(log(E)*S(N)) + O(log(E)/k*M(N)) +
+   O(log(E)/k*2^k*log(N)), for the squarings, multiplications, and full
+   table reads, respectively.  */
+
+  printf ("#define POWM_SEC_TABLE  ");
+
+  for (nbits = 1; nbits <= n_max * GMP_NUMB_BITS; )
+    {
+      n = (nbits - 1) / GMP_NUMB_BITS + 1;
+
+      /* Generate E such that sliding-window for k and k+1 works equally
+        well/poorly (but sliding is not used in powm_sec, of course). */
+      for (i = 0; i < n; i++)
+       ep[i] = ~CNST_LIMB(0);
+
+      /* Truncate E to be exactly nbits large.  */
+      if (nbits % GMP_NUMB_BITS != 0)
+       mpn_rshift (ep, ep, n, GMP_NUMB_BITS - nbits % GMP_NUMB_BITS);
+      ep[n - 1] |= CNST_LIMB(1) << (nbits - 1) % GMP_NUMB_BITS;
+
+      winsize = k;
+      for (i = 0; i < n_measurements; i++)
+       {
+         speed_starttime ();
+         mpn_powm_sec (rp, bp, n, ep, n, mp, n, tp);
+         ttab[i] = speed_endtime ();
+       }
+      tk = median (ttab, n_measurements);
+
+      winsize = k + 1;
+      speed_starttime ();
+      for (i = 0; i < n_measurements; i++)
+       {
+         speed_starttime ();
+         mpn_powm_sec (rp, bp, n, ep, n, mp, n, tp);
+         ttab[i] = speed_endtime ();
+       }
+      tkp1 = median (ttab, n_measurements);
+/*
+      printf ("testing: %ld, %d", nbits, k, ep[n-1]);
+      printf ("   %10.5f  %10.5f\n", tk, tkp1);
+*/
+      if (tkp1 < tk)
+       {
+         if (possible_nbits_cutoff)
+           {
+             /* Two consecutive sizes indicate k increase, obey.  */
+             if (k > 1)
+               printf (",");
+             printf ("%ld", (long) possible_nbits_cutoff);
+             k++;
+             possible_nbits_cutoff = 0;
+           }
+         else
+           {
+             /* One measurement indicate k increase, save nbits for further
+                consideration.  */
+             possible_nbits_cutoff = nbits;
+           }
+       }
+      else
+       possible_nbits_cutoff = 0;
+
+      nbits_next = nbits * 65 / 64;
+      nbits = nbits_next + (nbits_next == nbits);
+    }
+  printf ("\n");
+  TMP_FREE;
+}
+
  
  /* size_extra==1 reflects the fact that with high<divisor one division is
     always skipped.  Forcing high<divisor while testing ensures consistency
@@ -1662,7 +1973,7 @@ tune_gcdext_dc (void)
    param.stop_factor = 2.0;
  
  
-double (*tuned_speed_mpn_divrem_1) __GMP_PROTO ((struct speed_params *));
+double (*tuned_speed_mpn_divrem_1) (struct speed_params *);
  
  void
  tune_divrem_1 (void)
@@ -1734,6 +2045,27 @@ tune_mod_1 (void)
        return;
      }
  
+  if (!HAVE_NATIVE_mpn_mod_1_1p)
+    {
+      static struct param_t  param;
+      double   t1, t2;
+
+      s.size = 10;
+      s.r = randlimb_half ();
+
+      t1 = tuneup_measure (speed_mpn_mod_1_1_1, &param, &s);
+      t2 = tuneup_measure (speed_mpn_mod_1_1_2, &param, &s);
+
+      if (t1 == -1.0 || t2 == -1.0)
+       {
+         printf ("Oops, can't measure all mpn_mod_1_1 methods at %ld\n",
+                 (long) s.size);
+         abort ();
+       }
+      mod_1_1p_method = (t1 < t2) ? 1 : 2;
+      print_define ("MOD_1_1P_METHOD", mod_1_1p_method);
+    }
+
    if (UDIV_PREINV_ALWAYS)
      {
        print_define ("MOD_1_NORM_THRESHOLD", 0L);
@@ -1775,7 +2107,7 @@ tune_mod_1 (void)
      static struct param_t  param;
  
      param.check_size = 256;
-    s.r = randlimb_norm () / 5;
+    s.r = randlimb_half ();
      param.noprint = 1;
  
      param.function = speed_mpn_mod_1_1;
@@ -1954,6 +2286,16 @@ tune_divrem_2 (void)
    one (&divrem_2_threshold, &param);
  }
  
+void
+tune_div_qr_2 (void)
+{
+  static struct param_t  param;
+  param.name = "DIV_QR_2_PI2_THRESHOLD";
+  param.function = speed_mpn_div_qr_2n;
+  param.check_size = 500;
+  param.min_size = 4;
+  one (&div_qr_2_pi2_threshold, &param);
+}
  
  /* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
     tune for that.  Its speed can differ on odd or even divisor, so take an
@@ -2097,7 +2439,7 @@ void
  tune_jacobi_base (void)
  {
    static struct param_t  param;
-  double   t1, t2, t3;
+  double   t1, t2, t3, t4;
    int      method;
  
    s.size = GMP_LIMB_BITS * 3 / 4;
@@ -2114,19 +2456,25 @@ tune_jacobi_base (void)
    if (option_trace >= 1)
      printf ("size=%ld, mpn_jacobi_base_3 %.9f\n", (long) s.size, t3);
  
-  if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0)
+  t4 = tuneup_measure (speed_mpn_jacobi_base_4, &param, &s);
+  if (option_trace >= 1)
+    printf ("size=%ld, mpn_jacobi_base_4 %.9f\n", (long) s.size, t4);
+
+  if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0 || t4 == -1.0)
      {
        printf ("Oops, can't measure all mpn_jacobi_base methods at %ld\n",
                (long) s.size);
        abort ();
      }
  
-  if (t1 < t2 && t1 < t3)
+  if (t1 < t2 && t1 < t3 && t1 < t4)
      method = 1;
-  else if (t2 < t3)
+  else if (t2 < t3 && t2 < t4)
      method = 2;
-  else
+  else if (t3 < t4)
      method = 3;
+  else
+    method = 4;
  
    print_define ("JACOBI_BASE_METHOD", method);
  }
@@ -2184,8 +2532,7 @@ speed_mpn_pre_set_str (struct speed_params *s)
    for (i = 0; i < s->size; i++)
      str[i] = s->xp[i] % base;
  
-  wn = ((mp_size_t) (s->size / mp_bases[base].chars_per_bit_exactly))
-    / GMP_LIMB_BITS + 2;
+  LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);
    SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
  
    /* use this during development to check wn is big enough */
@@ -2287,6 +2634,25 @@ tune_fft_sqr (void)
    fft (&param);
  }
  
+void
+tune_fac_ui (void)
+{
+  static struct param_t  param;
+
+  param.function = speed_mpz_fac_ui_tune;
+
+  param.name = "FAC_DSC_THRESHOLD";
+  param.min_size = 70;
+  param.max_size = FAC_DSC_THRESHOLD_LIMIT;
+  one (&fac_dsc_threshold, &param);
+
+  param.name = "FAC_ODD_THRESHOLD";
+  param.min_size = 22;
+  param.stop_factor = 1.7;
+  param.min_is_always = 1;
+  one (&fac_odd_threshold, &param);
+}
+
  void
  all (void)
  {
@@ -2356,7 +2722,10 @@ all (void)
    tune_divrem_1 ();
    tune_mod_1 ();
    tune_preinv_divrem_1 ();
+#if 0
    tune_divrem_2 ();
+#endif
+  tune_div_qr_2 ();
    tune_divexact_1 ();
    tune_modexact_1_odd ();
    printf("\n");
@@ -2370,6 +2739,9 @@ all (void)
    tune_sqr ();
    printf("\n");
  
+  tune_mulmid ();
+  printf("\n");
+
    tune_mulmod_bnm1 ();
    tune_sqrmod_bnm1 ();
    printf("\n");
@@ -2399,8 +2771,13 @@ all (void)
    tune_mu_bdiv ();
    printf("\n");
  
+  tune_powm_sec ();
+  printf("\n");
+
    tune_matrix22_mul ();
    tune_hgcd ();
+  tune_hgcd_appr ();
+  tune_hgcd_reduce();
    tune_gcd_dc ();
    tune_gcdext_dc ();
    tune_jacobi_base ();
@@ -2410,6 +2787,9 @@ all (void)
    tune_set_str ();
    printf("\n");
  
+  tune_fac_ui ();
+  printf("\n");
+
    time (&end_time);
    printf ("/* Tuneup completed successfully, took %ld seconds */\n",
            (long) (end_time - start_time));
author	Junfeng Dong <junfeng.dong@intel.com>
	Mon, 16 Dec 2013 10:29:55 +0000 (18:29 +0800)
committer	Junfeng Dong <junfeng.dong@intel.com>
	Mon, 16 Dec 2013 10:29:55 +0000 (18:29 +0800)